文中说起HashMap7的参见博客http://www.javashuo.com/article/p-qcwimtzs-nv.htmlhtml
红黑树、TreeMap分析详见http://www.javashuo.com/article/p-weyeglqm-nv.htmljava
//同jdk7 static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 static final int MAXIMUM_CAPACITY = 1 << 30; static final float DEFAULT_LOAD_FACTOR = 0.75f; //树化阈值,也就是说链表长度超过8才会进行树化 static final int TREEIFY_THRESHOLD = 8; //链表化阈值,也就是说红黑树的节点个数少于6才会退化成链表 static final int UNTREEIFY_THRESHOLD = 6; //最小树化容量,也就是说链表长度超过64才会树化 static final int MIN_TREEIFY_CAPACITY = 64; //仍是熟悉的味道,Node数组,数组加链表的存储结构 transient Node<K,V>[] table;
简单来讲,树化阈值和链表化阈值应该相等,统一为一个阈值,超过则树化,低于则链表化,假设就规定为8,就会出现这样的问题,若是一个链表长度从7到8了,那么就树化,可是过一下子又从8到7了,又须要变回链表,而不管链表转化成树仍是树转化成链表,都是很是费时的,这就大大下降了HashMap的效率,此外在树化、链表化的过程当中有大量的垃圾对象产生,从而加快触发GCnode
等下揭晓数组
static class Node<K,V> implements Map.Entry<K,V> { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } }
等同于JDK7的entry节点换了个名字,仍是熟悉的链表app
static final class TreeNode<K,V> extends LinkedHashMap.Entry<K,V> { TreeNode<K,V> parent; // red-black tree links TreeNode<K,V> left; TreeNode<K,V> right; TreeNode<K,V> prev; // needed to unlink next upon deletion boolean red; TreeNode(int hash, K key, V val, Node<K,V> next) { super(hash, key, val, next); } }
boolean red
,红黑树它来了dom
HashMap向外提供的功能就是时间复杂度为O(1)的查询,可是基于数组链表的冲突解决方式,以及HashMap经过位运算计算index的方式,若是hashCode的实现不能实现很好的分散效果,好比本身的类中重写了hashCode方法,可能致使某一个链表过长,从而使得HashMap的查询速度退化到O(n),这是没有办法接收的,因此须要选择一种支持快速查找的结构--有序的二叉树函数
为何是红黑树性能
这一点在关于TreeMap中已经分析清楚了,若是选择二叉搜索树,在必定的状况下,二叉搜索树会退化成链表,而AVL树的实现复杂,插入删除效率不及红黑树,因此选择综合性能不错的红黑树。this
public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; //tableSizeFor方法返回一个大于initialCapacity的最小二次幂 this.threshold = tableSizeFor(initialCapacity); }
public HashMap(int initialCapacity, float loadFactor) { //作一些范围检查 if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); //对loadFactor赋值以及threshold赋值 this.loadFactor = loadFactor; threshold = initialCapacity; //空方法,交由子类实现,在HashMap中无用 init(); }
区别:翻译
highestOneBit()、countBit()
方法计算二次幂,JDK8中本身实现了public V put(K key, V value) { return putVal(hash(key), key, value, false, true); }
新增两个参数:
@param onlyIfAbsent if true, don't change existing value 对应第四个参数-false 若是为true,插入已经存在key时,不修改value @param evict if false, the table is in creation mode. 对应第五个参数-true 暂且不明
final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; //初始化 if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; //(n - 1) & hash //JDK8中没有了indexFor方法,可是仍是采用一样的逻辑计算index //为null直接插入 if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { //发生哈希冲突 Node<K,V> e; K k; //若是与第一个node的key的hash值相同,而且key相同 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; //若是已是树结构了,调用红黑树的方式插入结点 //红黑树的插入等下再聊 else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); else { //区别于JDK7中的头插法,采用了尾插法,为何采用尾插法呢? for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); //若是当前的链表长度超过了树化阈值则树化,-1是由于第一个结点没计数 if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } if (e != null) { // existing mapping for key V oldValue = e.value; //根据传入的参数onlyIfAbSent决定是否修改已经存在的key对应的value值 if (!onlyIfAbsent || oldValue == null) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; //若是size超过阈值,则扩容 if (++size > threshold) resize(); //hashMap中为空方法 afterNodeInsertion(evict); return null; }
从上面的代码能够看出数组链表的逻辑基本相似,可是JDK8中的实现中新结点的插入采用了尾插法
头插法的问题明天再补!
static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }
相较于JDK7的屡次扰动,JDK8的扰动次数减小了可是利用了高16位和低16位的数据来进行扰动
final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0; if (oldCap > 0) { if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } //newCap=oldCap << 1扩容为原来的两倍 else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } //oldCap==0 else if (oldThr > 0) // initial capacity was placed in threshold //若是构造函数中计算出来的threshold被赋值给newCap了 newCap = oldThr; else { // zero initial threshold signifies using defaults //若是调用了默认的构造函数,cap和threshold就会不同 newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0) { float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null) { //拷贝数组 for (int j = 0; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null) { oldTab[j] = null; //若是链表只有这一个节点 if (e.next == null) newTab[e.hash & (newCap - 1)] = e; //若是是红黑树 else if (e instanceof TreeNode) ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); else { Node<K,V> loHead = null, loTail = null; Node<K,V> hiHead = null, hiTail = null; Node<K,V> next; do { next = e.next; if ((e.hash & oldCap) == 0) { if (loTail == null) loHead = e; //尾插法 else loTail.next = e; loTail = e; } else { if (hiTail == null) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null); //这里就能够直接将两条链的头部拷贝到新的node数组的相应位置便可 if (loTail != null) { loTail.next = null; newTab[j] = loHead; } if (hiTail != null) { hiTail.next = null; newTab[j + oldCap] = hiHead; } } } } } return newTab; }
抛开红黑树来看,这里利用了一个特性
假设hashcode= 0010 1111 初始容量为8 index=hashcode&(leng-1)=0010 1111 & 0000 0111 = 0000 0111 =7 此外还有一个hashcode2 = 0000 0111 按照相同的index计算方法,二者发生了冲突,此时若是发生扩容 新的容量为16-1 = 15 = 0000 1111 此时二者再去运算结果分别为: index1 = 1111 = 15 index2 = 0111 = 7
经过上面的举例能够看出,容量左移一位以后,左移的那一位是否为1致使旧链分裂成两条新链,而这两条新链的head结点的差值就是最高位的1表示的大小(1000=8),也就是旧的容量
其中初始化也会调用到resize方法,分别走两个分支
else if (oldThr > 0) // initial capacity was placed in threshold //若是构造函数中计算出来的threshold被赋值给newCap了 newCap = oldThr; else { // zero initial threshold signifies using defaults //若是调用了默认的构造函数,cap和threshold就会不同 newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); }
与JDK7中的实现不大相同,第一个分支的capacity与threshold是相同的,经过简单的实验查看验证一下
public static void main(String[] args) throws NoSuchFieldException { HashMap<Integer, Integer> map = new HashMap<>(8); Class<? extends HashMap> mapClass = map.getClass(); //threshold Field threshold = mapClass.getDeclaredField("threshold"); threshold.setAccessible(true); try { Integer num = (Integer)threshold.get(map); System.out.println(num); } catch (IllegalAccessException e) { e.printStackTrace(); } //capacity try { map.put(1,1); Method capacity = map.getClass().getDeclaredMethod("capacity"); capacity.setAccessible(true); Integer c = (Integer)capacity.invoke(map); System.out.println(c); } catch (NoSuchMethodException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (InvocationTargetException e) { e.printStackTrace(); } }
两个输出都是8,而初始化若是不传入,则会发现capacity为16,threshold为12=16*0.75,这与JDK7仍是略有不一样的
final void treeifyBin(Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) //若是length<64,不进行树化,进行扩容,扩容一样可能致使链的分裂从而缩短链的长度 resize(); else if ((e = tab[index = (n - 1) & hash]) != null) { TreeNode<K,V> hd = null, tl = null; //把Node链表转换成TreeNode链表 do { //replacementTreeNode把Node转成TreeNode,new一个新的出来赋值便可 TreeNode<K,V> p = replacementTreeNode(e, null); if (tl == null) hd = p; else { p.prev = tl; //你可能比较差别,TreeNode结构里面没有声明next变量,可是你顺着TreeNode的继承结构会发现它实际继承了Node,天然就会有next成员变量 tl.next = p; } tl = p; } while ((e = e.next) != null); if ((tab[index] = hd) != null) hd.treeify(tab); } }
TreeNode<K,V> replacementTreeNode(Node<K,V> p, Node<K,V> next) { return new TreeNode<>(p.hash, p.key, p.value, next); }
final void treeify(Node<K,V>[] tab) { TreeNode<K,V> root = null; for (TreeNode<K,V> x = this, next; x != null; x = next) { next = (TreeNode<K,V>)x.next; x.left = x.right = null; //root结点为null,root->x,而且将x染黑 if (root == null) { x.parent = null; x.red = false; root = x; } else { K k = x.key; int h = x.hash; Class<?> kc = null; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk = p.key; //利用hash排序 if ((ph = p.hash) > h) dir = -1; else if (ph < h) dir = 1; //是否利用本身定义的排序规则进行排序,这里就不细究了 else if ((kc == null && (kc = comparableClassFor(k)) == null) || (dir = compareComparables(kc, k, pk)) == 0) dir = tieBreakOrder(k, pk); TreeNode<K,V> xp = p; //if dir<=0 p=p.left else p=p.right //二分搜索隐藏在这里 //if p!=null 说明还没找到 if ((p = (dir <= 0) ? p.left : p.right) == null) { x.parent = xp; if (dir <= 0) xp.left = x; else xp.right = x; //插入平衡,与TreeMap中的红黑树实现基本一致 root = balanceInsertion(root, x); break; } } } } moveRootToFront(tab, root); }
static <K,V> TreeNode<K,V> balanceInsertion(TreeNode<K,V> root, TreeNode<K,V> x) { x.red = true; for (TreeNode<K,V> xp, xpp, xppl, xppr;;) { //第一个结点,直接染黑便可 if ((xp = x.parent) == null) { x.red = false; return x; } else if (!xp.red || (xpp = xp.parent) == null) //root return root; //x的父亲为祖父的左孩子 if (xp == (xppl = xpp.left)) { //叔叔结点为红,父亲叔叔染黑,祖父染红,祖父成为x if ((xppr = xpp.right) != null && xppr.red) { xppr.red = false; xp.red = false; xpp.red = true; x = xpp; } //叔叔结点为Nil或者黑色 else { //x为父亲的右孩子,以父亲为中心左旋 if (x == xp.right) { root = rotateLeft(root, x = xp); xpp = (xp = x.parent) == null ? null : xp.parent; } //x为左孩子,父亲染黑,祖父染红,以祖父为中心右旋 if (xp != null) { xp.red = false; if (xpp != null) { xpp.red = true; root = rotateRight(root, xpp); } } } } //对称操做 else { if (xppl != null && xppl.red) { xppl.red = false; xp.red = false; xpp.red = true; x = xpp; } else { if (x == xp.left) { root = rotateRight(root, x = xp); xpp = (xp = x.parent) == null ? null : xp.parent; } if (xp != null) { xp.red = false; if (xpp != null) { xpp.red = true; root = rotateLeft(root, xpp); } } } } } }
不贴代码了,同样的操做,先定位再插入,最后平衡红黑树
这里贴一段HashMap中的官方的注解便可
Because TreeNodes are about twice the size of regular nodes, we use them only when bins contain enough nodes to warrant use (see TREEIFY_THRESHOLD). And when they become too small (due to removal or resizing) they are converted back to plain bins. In usages with well-distributed user hashCodes, tree bins are rarely used. Ideally, under random hashCodes, the frequency of nodes in bins follows a Poisson distribution.The first values are: 0: 0.60653066 1: 0.30326533 2: 0.07581633 3: 0.01263606 4: 0.00157952 5: 0.00015795 6: 0.00001316 7: 0.00000094 8: 0.00000006
简单翻译一下就是,treeNode的大小大约为普通Node的2倍数,比较占内存,若是使用well-distributed
也就是分布合理的hashcode方法,很难用到红黑树,由于若是彻底分布合理,只会触发扩容。
因此JDK的意思就是能不用红黑树就不用
under random hashCodes, the frequency of nodes in bins follows a Poisson distribution.
若是在足够random的hashcode下,每一个链表的大小服从泊松分布,能够看到当链表长度为8时,可能性已经很小了,设置成8的意思就是说在足够random的hashcode方法下,尽量的不使用红黑树,那么设置成8就足够了
你可能有问题?既然JDK要极力避免使用红黑树,为何还要做为一种实现添加进来呢?
上面的前提是足够随机的hashcode计算,架不住有些同志的类本身重写了hashCode方法,那么就有可能致使分布不均匀,致使链表过长,若是不树化,就妄为hashMap查询时间复杂度O(1)的名号了!!