HashMap底层实现原理 0.样例数据 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 import java.util.HashMap;import java.util.Iterator;import java.util.Map;import java.util.Set;public class CollectionTest { public static void main (String[] args) { Map<String,Integer> map = new HashMap<>(); int count = 1 ; for (char i = 65 ; i < 91 ; i++) { map.put(String.valueOf(i),count); count++; } Set<String> keySet = map.keySet(); Iterator<String> iterator = keySet.iterator(); while (iterator.hasNext()) { String key = iterator.next(); System.out.println(key+ " => " + map.get(key)); } System.out.println("******************************" ); Iterator<Map.Entry<String, Integer>> iteratorMap = map.entrySet().iterator(); while (iteratorMap.hasNext()) { Map.Entry<String, Integer> mapEntry = iteratorMap.next(); System.out.println(mapEntry); } System.out.println("******************************" ); for (Map.Entry<String, Integer> entry : map.entrySet()) { System.out.println(entry.getKey() + " => " + entry.getValue()); } } }
1. 类信息 1 public class HashMap <K ,V > extends AbstractMap <K ,V > implements Map <K ,V >, Cloneable , Serializable {
2. 基本属性 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 private static final long serialVersionUID = 362498820763181265L ; static final int DEFAULT_INITIAL_CAPACITY = 1 << 4 ; static final int MAXIMUM_CAPACITY = 1 << 30 ;static final float DEFAULT_LOAD_FACTOR = 0.75f ;static final int TREEIFY_THRESHOLD = 8 ; static final int UNTREEIFY_THRESHOLD = 6 ; static final int MIN_TREEIFY_CAPACITY = 64 ;static class Node <K ,V > implements Map .Entry <K ,V > { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this .hash = hash; this .key = key; this .value = value; this .next = next; } public final K getKey () { return key; } public final V getValue () { return value; } public final String toString () { return key + "=" + value; } public final int hashCode () { return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue (V newValue) { V oldValue = value; value = newValue; return oldValue; } public final boolean equals (Object o) { if (o == this ) return true ; if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true ; } return false ; } } transient Node<K,V>[] table; transient Set<Map.Entry<K,V>> entrySet; transient int size; transient int modCount; int threshold; final float loadFactor; public HashMap (int initialCapacity, float loadFactor) { if (initialCapacity < 0 ) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this .loadFactor = loadFactor; this .threshold = tableSizeFor(initialCapacity); } static final int tableSizeFor (int cap) { int n = cap - 1 ; n |= n >>> 1 ; n |= n >>> 2 ; n |= n >>> 4 ; n |= n >>> 8 ; n |= n >>> 16 ; return (n < 0 ) ? 1 : (n >= 1 << 30 ) ? 1 << 30 : n + 1 ; } public HashMap (int initialCapacity) { this (initialCapacity, DEFAULT_LOAD_FACTOR); } public HashMap () { this .loadFactor = DEFAULT_LOAD_FACTOR; } static final class TreeNode <K ,V > extends LinkedHashMap .Entry <K ,V > { TreeNode<K,V> parent; TreeNode<K,V> left; TreeNode<K,V> right; TreeNode<K,V> prev; boolean red; TreeNode(int hash, K key, V val, Node<K,V> next) { super (hash, key, val, next); } final TreeNode<K,V> root () { for (TreeNode<K,V> r = this , p;;) { if ((p = r.parent) == null ) return r; r = p; } } }
3. hash算法 HashMap定位数组索引位置,直接决定了hash方法的离散性能。下面是定位哈希桶数组的源码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 static final int hash (Object key) { int h; return (key == null ) ? 0 : (h = key.hashCode()) ^ (h >>> 16 ); } int index = (tab.length - 1 ) & hash; } public int hashCode () { int h = hash; if (h == 0 && value.length > 0 ) { char val[] = valu for (int i = 0 ; i < value.length; i++) { h = 31 * h + val[i]; } hash = h; } return h; }
HashMap底层数组的长度总是2的n次方,并且取模运算为“h mod table.length”,对应上面的公式,可以得到该运算等同于“h & (table.length - 1)”。这是HashMap在速度上的优化,因为&比%具有更高的效率。
在JDK1.8的实现中,还优化了高位运算的算法,将hashCode的高16位与hashCode进行异或运算,主要是为了在table的length较小的时候,让高位也参与运算,并且不会有太大的开销。
4. get方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 public V get (Object key) { Node<K,V> e; return (e = getNode(hash(key), key)) == null ? null : e.value; } final Node<K,V> getNode (int hash, Object key) { Node<K,V>[] tab; Node<K,V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1 ) & hash]) != null ) { if (first.hash == hash && ((k = first.key) == key || (key != null && key.equals(k)))) return first; if ((e = first.next) != null ) { if (first instanceof TreeNode) return ((TreeNode<K,V>)first).getTreeNode(hash, key); do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null ); } } return null ; }
5. put方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 public V put (K key, V value) { return putVal(hash(key), key, value, false , true ); } final V putVal (int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; if ((tab = table) == null || (n = tab.length) == 0 ) n = (tab = resize()).length; if ((p = tab[i = (n - 1 ) & hash]) == null ) tab[i] = newNode(hash, key, value, null ); else { Node<K,V> e; K k; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this , tab, hash, key, value); else { for (int binCount = 0 ; ; ++binCount) { if ((e = p.next) == null ) { p.next = newNode(hash, key, value, null ); if (binCount >= TREEIFY_THRESHOLD - 1 ) treeifyBin(tab, hash); break ; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break ; p = e; } } if (e != null ) { V oldValue = e.value; if (!onlyIfAbsent || oldValue == null ) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; if (++size > threshold) resize(); afterNodeInsertion(evict); return null ; }
从上面的源码分析可以看出
1、如果节点已经存在,则更新原值
2、如果节点不存在,则插入数组中,如果数组已经有值,则判断是非是红黑树,如果是,则调用红黑树方法插入
3、如果插入的是链表,插入尾部,然后判断节点数是否超过8,如果超过,则转换为红黑树
4、先插入的数据,后面判断是否超过阀值再进行的扩容
putTreeVal,插入红黑树方法就不看了,看下treeifyBin方法,该方法是将链表转化为红黑树,
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 final void treeifyBin (Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) resize(); else if ((e = tab[index = (n - 1 ) & hash]) != null ) { TreeNode<K,V> hd = null , tl = null ; do { TreeNode<K,V> p = replacementTreeNode(e, null ); if (tl == null ) hd = p; else { p.prev = tl; tl.next = p; } tl = p; } while ((e = e.next) != null ); if ((tab[index] = hd) != null ) hd.treeify(tab); } }
可以看到,会先判断tab的节点数是否超过64,如果没超过,则进行扩容,如果超过了才会转换为红黑树
可以得到两个结论
1、什么时候转换为红黑树
当链表数目超过8,并且map节点数量超过64,才会转换为红黑树
2、什么时候扩容(前提是map数目没有超过最大容量值 1<<30 )
新增节点时,发生了碰撞,并且节点数目超过阀值
新增节点时,发生了碰撞,节点数量木有超过阀值,但是链表数目>8,map节点<64时
再看下resize()方法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null ) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0 ; if (oldCap > 0 ) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; else if ((newCap = oldCap << 1 ) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1 ; } else if (oldThr > 0 ) newCap = oldThr; else { newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int )(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0 ) { float ft = (float )newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float )MAXIMUM_CAPACITY ? (int )ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings ({"rawtypes" ,"unchecked" }) Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null ) { for (int j = 0 ; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null ) { oldTab[j] = null ; if (e.next == null ) newTab[e.hash & (newCap - 1 )] = e; else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this , newTab, j, oldCap); else { Node<K,V> loHead = null , loTail = null ; Node<K,V> hiHead = null , hiTail = null ; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0 ) { if (loTail == null ) loHead = e; else loTail.next = e; loTail = e; } else { if (hiTail == null ) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null ); if (loTail != null ) { loTail.next = null ; newTab[j] = loHead; } if (hiTail != null ) { hiTail.next = null ; newTab[j + oldCap] = hiHead; } } } } } return newTab; }
可以看出,扩容时,节点重hash只分布在原索引位置与原索引+oldCap位置,为什么呢
假设老表的容量为16,即oldCap=16,则新表容量为16*2=32,假设节点1的hash值为0000 0000 0000 0000 0000 1111 0000 1010,节点2的hash值为0000 0000 0000 0000 0000 1111 0001 1010,则节点1和节点2在老表的索引位置计算如下图计算1,由于老表的长度限制,节点1和节点2的索引位置只取决于节点hash值的最后4位。再看计算2,计算2为新表的索引计算,可以知道如果两个节点在老表的索引位置相同,则新表的索引位置只取决于节点hash值倒数第5位的值,而此位置的值刚好为老表的容量值16,此时节点在新表的索引位置只有两种情况:原索引位置和原索引+oldCap位置(在此例中即为10和10+16=26)。由于结果只取决于节点hash值的倒数第5位,而此位置的值刚好为老表的容量值16,因此此时新表的索引位置的计算可以替换为计算3,直接使用节点的hash值与老表的容量16进行位于运算,如果结果为0则该节点在新表的索引位置为原索引位置,否则该节点在新表的索引位置为原索引+oldCap位置。
6. remove()方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 public V remove (Object key) { Node<K,V> e; return (e = removeNode(hash(key), key, null , false , true )) == null ? null : e.value; } final Node<K,V> removeNode (int hash, Object key, Object value, boolean matchValue, boolean movable) { Node<K,V>[] tab; Node<K,V> p; int n, index; if ((tab = table) != null && (n = tab.length) > 0 && (p = tab[index = (n - 1 ) & hash]) != null ) { Node<K,V> node = null , e; K k; V v; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) node = p; else if ((e = p.next) != null ) { if (p instanceof TreeNode) node = ((TreeNode<K,V>)p).getTreeNode(hash, key); else { do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) { node = e; break ; } p = e; } while ((e = e.next) != null ); } } if (node != null && (!matchValue || (v = node.value) == value || (value != null && value.equals(v)))) { if (node instanceof TreeNode) ((TreeNode<K,V>)node).removeTreeNode(this , tab, movable); else if (node == p) tab[index] = node.next; else p.next = node.next; ++modCount; --size; afterNodeRemoval(node); return node; } } return null ; }
7. JDK1.7和1.8的区别 1、JDK1.7的时候使用的是数组+ 单链表的数据结构。但是在JDK1.8及之后时,使用的是数组+链表+红黑树的数据结构(当链表的深度达到8的时候,也就是默认阈值,就会自动扩容把链表转成红黑树的数据结构来把时间复杂度从O(n)变成O(logN)提高了效率)
2、JDK1.7用的是头插法,而JDK1.8及之后使用的都是尾插法,那么他们为什么要这样做呢?因为JDK1.7是用单链表进行的纵向延伸,当采用头插法时会容易出现逆序且环形链表死循环问题。但是在JDK1.8之后是因为加入了红黑树使用尾插法,能够避免出现逆序且链表死循环的问题。
3、扩容后数据存储位置的计算方式也不一样:1. 在JDK1.7的时候是直接用hash值和需要扩容的二进制数进行&(这里就是为什么扩容的时候为啥一定必须是2的多少次幂的原因所在,因为如果只有2的n次幂的情况时最后一位二进制数才一定是1,这样能最大程度减少hash碰撞)(hash值 & length-1),而在JDK1.8的时候直接用了JDK1.7的时候计算的规律,也就是扩容前的原始位置+扩容的大小值=JDK1.8的计算方式,而不再是JDK1.7的那种异或的方法。但是这种方式就相当于只需要判断Hash值的新增参与运算的位是0还是1就直接迅速计算出了扩容后的储存方式。
4、jdk1.7 先扩容再put ,jdk1.8 先put再扩容