ConcurrentHashMap源码分析

时间 2019-12-02
原文原文链接
/** JDK7 */
public class ConcurrentHashMap<K, V> extends AbstractMap<K, V> implements ConcurrentMap<K, V>, Serializable {

private static final long serialVersionUID = 7249069246763182397L;

/**
 * 默认的初始容量是16
 */
static final int DEFAULT_INITIAL_CAPACITY = 16;

/**
 * 默认的加载因子是0.75
 */
static final float DEFAULT_LOAD_FACTOR = 0.75f;

/**
 * 默认的并发级别是16
 */
static final int DEFAULT_CONCURRENCY_LEVEL = 16;

/**
 * The maximum capacity, used if a higher value is implicitly specified by either of the constructors with arguments.
 * MUST be a power of two <= 1<<30 to ensure that entries are indexable using ints.
 */
static final int MAXIMUM_CAPACITY = 1 << 30;

/**
 * The minimum capacity for per-segment tables.
 * Must be a power of two, at least two to avoid immediate resizing on next use after lazy construction.
 */
static final int MIN_SEGMENT_TABLE_CAPACITY = 2;

/**
 * The maximum number of segments to allow; used to bound constructor arguments.
 * Must be power of two less than 1 << 24.
 */
static final int MAX_SEGMENTS = 1 << 16; // slightly conservative

/**
 * Number of unsynchronized retries in size and containsValue methods before resorting to locking. 
 * This is used to avoid unbounded retries if tables undergo continuous modification which would make it impossible to obtain an accurate result.
 */
static final int RETRIES_BEFORE_LOCK = 2;

/**
 * 在计算数组的下标时会用到该值：hashValue & segmentMask
 * 
 * segmentMask = segments.length - 1
 */
final int segmentMask;

/**
 * Shift value for indexing within segments.
 */
final int segmentShift;

/**
 * Segment数组，Segment的功能相似于HashTable。
 * 
 */
final Segment<K,V>[] segments;

/**
 * ConcurrentHashMap的构造函数
 * 参数：
 * 	initialCapacity：	ConcurrentHashMap的初始容量
 * 	loadFactor：		Segment的加载因子(Segment数组是不能够扩容的，ConcurrentHashMap的扩容是经过Segment的扩容实现的)
 * 	concurrencyLevel：	并发级别，默认为16，根据该参数计算出Segment数组的长度，Segment数组的长度必须是2的整数次幂，而且一旦设定，不可改变。
 * 		eg：指定concurrencyLevel为17，则Segment数组的长度为32。
 *
 */
@SuppressWarnings("unchecked")
public ConcurrentHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) {
	
    if (!(loadFactor > 0) || initialCapacity < 0 || concurrencyLevel <= 0) throw new IllegalArgumentException();
	
    if (concurrencyLevel > MAX_SEGMENTS) concurrencyLevel = MAX_SEGMENTS;
	
	// 根据concurrencyLevel参数计算出一个2的整数次幂的数，做为Segment数组的长度。
    // Find power-of-two sizes best matching arguments
    int sshift = 0;		// 2的指数
    int ssize = 1;		// Segment数组的长度：ssize=2^sshift
    while (ssize < concurrencyLevel) {
        ++sshift;
        ssize <<= 1;
    }
	
    this.segmentShift = 32 - sshift;
    this.segmentMask = ssize - 1;
	
	// 肯定Segment数组中第一个Segment(s0)的HashEntry数组的长度。
    if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY;
    int c = initialCapacity / ssize;
    if (c * ssize < initialCapacity) ++c;
    int cap = MIN_SEGMENT_TABLE_CAPACITY;	// HashEntry数组的长度，最小为2(最小值设为2，是为了不插入一个元素后，就开始扩容)
    while (cap < c)
        cap <<= 1;
	
    // new一个Segment对象
    Segment<K,V> s0 = new Segment<K,V>(loadFactor, (int)(cap * loadFactor), (HashEntry<K,V>[])new HashEntry[cap]);
	
	// new一个的Segment数组，大小为ssize
    Segment<K,V>[] ss = (Segment<K,V>[])new Segment[ssize];
	
	// 将S0放到Segment数组的第一个位置。Segment数组中其它位置的Segment在调用put()方法时会被初始化。
    UNSAFE.putOrderedObject(ss, SBASE, s0); // ordered write of segments[0]
    this.segments = ss;
}


/**
 * key和value都不能为null，不然报空指针异常。
 *
 */
@SuppressWarnings("unchecked")
public V put(K key, V value) {
    Segment<K,V> s;
    if (value == null) throw new NullPointerException();
	
	// 根据key计算出Segment数组的下标j，计算方法与HashMap获取数组下标的方法相似，都是使用 hashVale & (2^n-1)。
    int hash = hash(key);
    int j = (hash >>> segmentShift) & segmentMask;	// segmentMask = Segment数组的长度-1，此处相似于HashMap中的：h & (length-1);
	
	// 对segments[j]进行初始化
    if ((s = (Segment<K,V>)UNSAFE.getObject(segments, (j << SSHIFT) + SBASE)) == null) // nonvolatile; recheck; in ensureSegment
        s = ensureSegment(j);
	
	// 将key-value放到segments[j]的HashEntry数组的特定位置上。
    return s.put(key, hash, value, false);
}



/**
 * Returns the segment for the given index, creating it and
 * recording in segment table (via CAS) if not already present.
 *
 * [@param](https://my.oschina.net/u/2303379) k the index
 * [@return](https://my.oschina.net/u/556800) the segment
 */
@SuppressWarnings("unchecked")
private Segment<K,V> ensureSegment(int k) {
    final Segment<K,V>[] ss = this.segments;
    long u = (k << SSHIFT) + SBASE; // raw offset
    Segment<K,V> seg;
    if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) {
		
        Segment<K,V> proto = ss[0]; // Segment数组中的第一个Segment，即segments[0]
        int cap = proto.table.length;
        float lf = proto.loadFactor;
        int threshold = (int)(cap * lf);
        HashEntry<K,V>[] tab = (HashEntry<K,V>[])new HashEntry[cap];
		
        if ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) { // recheck
		
			// 新建一个Segment对象
			// 该对象的加载因子等于segments[0]的加载因子，该对象的HashEntry数组(table)的初始容量等于segments[0]的HashEntry数组(table)当前的容量。
			// 注:此时，segments[0]可能已经扩容屡次了。
            Segment<K,V> s = new Segment<K,V>(lf, threshold, tab);
			
			// 将新建的Segment对象添加到Segment数组(segments)指定的位置，经过循环和CAS来保证多线程环境下数据的安全
            while ((seg = (Segment<K,V>)UNSAFE.getObjectVolatile(ss, u)) == null) {
                if (UNSAFE.compareAndSwapObject(ss, u, null, seg = s))
                    break;
            }
        }
    }
    return seg;
}

/**
 * 
 * Returns the value to which the specified key is mapped, or null if this map contains no mapping for the key.
 */
public V get(Object key) {
    Segment<K,V> s; // manually integrate access methods to reduce overhead
    HashEntry<K,V>[] tab;
    int h = hash(key);
    long u = (((h >>> segmentShift) & segmentMask) << SSHIFT) + SBASE;
    if ((s = (Segment<K,V>)UNSAFE.getObjectVolatile(segments, u)) != null && (tab = s.table) != null) {
        for (HashEntry<K,V> e = (HashEntry<K,V>) UNSAFE.getObjectVolatile(tab, ((long)(((tab.length - 1) & h)) << TSHIFT) + TBASE); e != null; e = e.next) {
            K k;
            if ((k = e.key) == key || (e.hash == h && key.equals(k)))
                return e.value;
        }
    }
    return null;
}


// ************************************************ 补充：jdk1.6中ConcurrentHashMap的get方法 ************************************************
	
	/**
	 * 	jdk1.6中ConcurrentHashMap的get方法：
	 * 	    1)首先根据key获取对应的HashEntry，若找不到对应的HashEntry，则直接返回null。
	 * 	    2)若找到了对应的HashEntry，则以不加锁的方式获取value(即HashEntry.value)，若value!=null，则直接返回。
	 * 	        注：HashEntry的value属性是volatile的，故value!=null时可直接返回value。
	 * 	    3)若value==null，则以加锁的方式来获取value并返回。
	 *          注：HashEntry!=null,可是HashEntry.value==null的状况是因为发生了指令重排序形成的。
	 */
	public V get(Object key) {
		int hash = hash(key.hashCode());
		return segmentFor(hash).get(key, hash);
	}

	/**
	 * ConcurrentHashMap.Segment的get方法：采用乐观锁的方式来保证数据的同步。
	 * 
	 * Note：这里须要考虑到并发的情景：
	 * 	put方法中新建一个HashEntry的语句：tab[index] = new HashEntry<K,V>(key, hash, first, value); 
	 * 		1)这行代码能够分解为以下的3个步骤：
	 * 			①类的加载、链接(验证->准备->解析)。  
	 * 			②初始化对象。							注：初始化后，类的加载就完成了。  
	 * 			③将tab[index]指向刚分配的内存地址。	注：这一步和类的加载过程没有任何关系
	 * 		2)其中的②和③可能会被重排序：
	 * 			a compiler happens to reorder a HashEntry initialization with its table assignment
	 *			分配对象的内存空间 --> 将tab[index]指向刚分配的内存地址(即给tab[index]赋值) --> 初始化对象(给HashEntry的key、hash、next、value赋值)。
	 *		3)若是另外一个线程执行put方法时，tab[index]已经被赋值，HashEntry的key、hash也已经被赋值，可是value还没来的及赋值，此时当前正在执行get方法的线程极可能会遇到：
	 *		  e(即tab[index]) != null 且 e.hash == hash && key.equals(e.key) 且 e.value = null 的状况，
	 *		  故获取到e.value后须要判断一下e.value是否为空，若是e.value为空，则须要加锁从新读取。
	 */
	V get(Object key, int hash) {
		if (count != 0) { 		// read-volatile	(transient volatile int count;)
			HashEntry<K,V> e = getFirst(hash);
			while (e != null) {
				if (e.hash == hash && key.equals(e.key)) { // 若key.equals(e.key)，说明此时找到了该key对应的HashEntry
					V v = e.value;	
					if (v != null)  // 判断是否为空。
						return v;
					return readValueUnderLock(e); // recheck 加锁重读
				}
				e = e.next;
			}
		}
		return null;
	}

	/**
	 * ConcurrentHashMap.Segment的readValueUnderLock方法
	 * 
	 * 【Reads value field of an entry under lock. Called if value field ever appears to be null. 
	 * This is possible only if a compiler happens to reorder a HashEntry initialization with its table assignment, which is legal under memory model but is not known to ever occur.】
	 */
	V readValueUnderLock(HashEntry<K,V> e) {
		lock();
		try {
			return e.value;
		} finally {
			unlock();
		}
	}
	
	/**
	 * segmentFor的get方法
	 */
	final Segment<K,V> segmentFor(int hash) {
		return segments[(hash >>> segmentShift) & segmentMask];
	}
	
	/**
	 * ConcurrentHashMap list entry. Note that this is never exported out as a user-visible Map.Entry.
	 *
	 * Because the value field is volatile, not final, it is legal wrt the Java Memory Model for an unsynchronized reader to see null instead of initial value when read via a data race.  
	 * Although a reordering leading to this is not likely to ever actually occur, 
	 * the Segment.readValueUnderLock method is used as a backup in case a null (pre-initialized) value is ever seen in an unsynchronized access method.
	 */
	static final class HashEntry<K,V> {
		final K key;
		final int hash;
		volatile V value;			// value被volatile修饰：若是该HashEntry的value被其它线程修改了，volatile能够保证其它线程的get()方法获取到的value是最新的。
		final HashEntry<K,V> next; 

		HashEntry(K key, int hash, HashEntry<K,V> next, V value) {
			this.key = key;
			this.hash = hash;
			this.next = next;
			this.value = value;
		}

		@SuppressWarnings("unchecked")
		static final <K,V> HashEntry<K,V>[] newArray(int i) {
			return new HashEntry[i];
		}
	}
	
// ************************************************ jdk1.6中ConcurrentHashMap的get方法 ************************************************


/**
 * Segment相似一个HashTable
 * 
 * Segments are specialized versions of hash tables.  
 * This subclasses from ReentrantLock opportunistically, just to simplify some locking and avoid separate construction.
 */
static final class Segment<K,V> extends ReentrantLock implements Serializable {

    private static final long serialVersionUID = 2249069246763182397L;

    /**
     * The maximum number of times to tryLock in a prescan before possibly blocking on acquire in preparation for a locked segment operation. 
     * On multiprocessors, using a bounded number of retries maintains cache acquired while locating nodes.
     */
    static final int MAX_SCAN_RETRIES =
        Runtime.getRuntime().availableProcessors() > 1 ? 64 : 1;

    /**
     * entry数组，用来储存数据的
     * The per-segment table. Elements are accessed via entryAt/setEntryAt providing volatile semantics.
     */
    transient volatile HashEntry<K,V>[] table;

    /**
     * Segment中元素的数量
     * 
     * The number of elements. 
     * Accessed only either within locks or among other volatile reads that maintain visibility.
     */
    transient int count;

    /**
     * 对table的大小形成影响的操做(eg:put、remove)次数
     * 
     * The total number of mutative operations in this segment.
     * Even though this may overflows 32 bits, it provides sufficient accuracy for stability checks in CHM isEmpty() and size() methods.  
     * Accessed only either within locks or among other volatile reads that maintain visibility.
     */
    transient int modCount;

    /**
     * Segment的阀值，threshold = capacity * loadFactor
     */
    transient int threshold;

    /**
     * Segment的负载因子
     */
    final float loadFactor;

    Segment(float lf, int threshold, HashEntry<K,V>[] tab) {
        this.loadFactor = lf;
        this.threshold = threshold;
        this.table = tab;
    }

    final V put(K key, int hash, V value, boolean onlyIfAbsent) {
		
		// 获取Segment的独占锁，若是该key对应的node(HashEntry)存在，则node的值为null；若是node不存在，则new一个HashEntry并赋值给node。
        HashEntry<K,V> node = tryLock() ? null : scanAndLockForPut(key, hash, value);
        V oldValue;
        try {
            HashEntry<K,V>[] tab = table;
            int index = (tab.length - 1) & hash;
            HashEntry<K,V> first = entryAt(tab, index);
            for (HashEntry<K,V> e = first;;) {
                if (e != null) {
                    K k;
                    if ((k = e.key) == key ||
                        (e.hash == hash && key.equals(k))) {
                        oldValue = e.value;
                        if (!onlyIfAbsent) {
                            e.value = value;
                            ++modCount;
                        }
                        break;
                    }
                    e = e.next;
                } else {
                    if (node != null)			// node!=null说明该key对应的HashEntry以前不存在，此时node为scanAndLockForPut()方法中new的那个HashEntry
                        node.setNext(first);
                    else						// node=null 说明该key对应的HashEntry以前就存在，故这里new一个HashEntry并赋值给node。
                        node = new HashEntry<K,V>(hash, key, value, first); 
						
                    int c = count + 1;
                    if (c > threshold && tab.length < MAXIMUM_CAPACITY)	// 若Segment的容量达到阀值，则扩容。
                        rehash(node);
                    else
                        setEntryAt(tab, index, node);					// 若Segment的容量未达到阀值，则将node添加到链表的头部。
                    ++modCount;
                    count = c;
                    oldValue = null;
                    break;
                }
            }
        } finally {
			// 释放Segment的独占锁
            unlock();
        }
        return oldValue;
    }

    /**
     * 寻找该key对应的HashEntry，若是找到则返回null；若是没有找到，则new一个HashEntry并返回。
     * 在该方法返回前，当前线程一定已经持有该Segment的锁了。
     * 
     * Scans for a node containing given key while trying to acquire lock, creating and returning one if not found. 
     * Upon return, guarantees that lock is held. 
     *
     * @return a new node if key not found, else null
     */
    private HashEntry<K,V> scanAndLockForPut(K key, int hash, V value) {
        HashEntry<K,V> first = entryForHash(this, hash); // 这里的this指当前的Segment
        HashEntry<K,V> e = first;
        HashEntry<K,V> node = null;
        int retries = -1; // negative while locating node
		
        while (!tryLock()) {	// 循环tryLock()来确保获取到Segment的锁。
            HashEntry<K,V> f; // to recheck first below
            if (retries < 0) {
                if (e == null) {
                    if (node == null) // speculatively create node
                        node = new HashEntry<K,V>(hash, key, value, null);
                    retries = 0;
                }
                else if (key.equals(e.key))
                    retries = 0;
                else
                    e = e.next;
            }
			// 若是遍历的次数(retries)超过了MAX_SCAN_RETRIES(单核时值为1，多核时值为64)，则使用lock()方法阻塞式的获取锁。
            else if (++retries > MAX_SCAN_RETRIES) {
                lock();
                break;
            }
			// 若是有新的元素被添加到该链表(HashEntry)的头部，则从新遍历
            else if ((retries & 1) == 0 && (f = entryForHash(this, hash)) != first) { 
                e = first = f; // re-traverse if entry changed
                retries = -1;
            }
        }
        return node;
    }
	

    /**
     * 扩容为以前的2倍。
     * Doubles size of table and repacks entries, also adding the given node to new table
     */
    @SuppressWarnings("unchecked")
    private void rehash(HashEntry<K,V> node) {
        /*
         * Reclassify nodes in each list to new table.  Because we
         * Because we are using power-of-two expansion, the elements from each bin must either stay at same index, or move with a power of two offset. 
		 * We eliminate unnecessary node
         * creation by catching cases where old nodes can be
         * reused because their next fields won't change.
         * Statistically, at the default threshold, only about
         * one-sixth of them need cloning when a table
         * doubles. The nodes they replace will be garbage
         * collectable as soon as they are no longer referenced by
         * any reader thread that may be in the midst of
         * concurrently traversing table. Entry accesses use plain
         * array indexing because they are followed by volatile
         * table write.
         */
        HashEntry<K,V>[] oldTable = table;
        int oldCapacity = oldTable.length;
        int newCapacity = oldCapacity << 1;		// 扩容为以前的2倍
        threshold = (int)(newCapacity * loadFactor);
        HashEntry<K,V>[] newTable = (HashEntry<K,V>[]) new HashEntry[newCapacity];
        int sizeMask = newCapacity - 1;
        for (int i = 0; i < oldCapacity ; i++) {
            HashEntry<K,V> e = oldTable[i];
            if (e != null) {
                HashEntry<K,V> next = e.next;
                int idx = e.hash & sizeMask;
                if (next == null)   //  若是该链表上只有一个元素
                    newTable[idx] = e;
                else { // Reuse consecutive sequence at same slot
                    HashEntry<K,V> lastRun = e;
                    int lastIdx = idx;
                    for (HashEntry<K,V> last = next; last != null; last = last.next) {
                        int k = last.hash & sizeMask;
                        if (k != lastIdx) {
                            lastIdx = k;
                            lastRun = last;
                        }
                    }
                    newTable[lastIdx] = lastRun;
                    // Clone remaining nodes
                    for (HashEntry<K,V> p = e; p != lastRun; p = p.next) {
                        V v = p.value;
                        int h = p.hash;
                        int k = h & sizeMask;
                        HashEntry<K,V> n = newTable[k];
                        newTable[k] = new HashEntry<K,V>(h, p.key, v, n);
                    }
                }
            }
        }
        int nodeIndex = node.hash & sizeMask; // add the new node
        node.setNext(newTable[nodeIndex]);
        newTable[nodeIndex] = node;
        table = newTable;
    }


    /**
     * Scans for a node containing the given key while trying to acquire lock for a remove or replace operation. 
	 * Upon return, guarantees that lock is held.  
	 * Note that we must lock even if the key is not found, to ensure sequential consistency of updates.
     */
    private void scanAndLock(Object key, int hash) {
        // similar to but simpler than scanAndLockForPut
        HashEntry<K,V> first = entryForHash(this, hash);
        HashEntry<K,V> e = first;
        int retries = -1;
        while (!tryLock()) {
            HashEntry<K,V> f;
            if (retries < 0) {
                if (e == null || key.equals(e.key))
                    retries = 0;
                else
                    e = e.next;
            }
            else if (++retries > MAX_SCAN_RETRIES) {
                lock();
                break;
            }
            else if ((retries & 1) == 0 &&
                     (f = entryForHash(this, hash)) != first) {
                e = first = f;
                retries = -1;
            }
        }
    }
}

/**
 * 
 * value被volatile修饰：若是该HashEntry的value被其它线程修改了，volatile能够保证其它线程的get()方法获取到的value是最新的。
 * 
 * ConcurrentHashMap list entry.
 */
static final class HashEntry<K,V> {
    final int hash;
    final K key;
    volatile V value;
    volatile HashEntry<K,V> next;

    HashEntry(int hash, K key, V value, HashEntry<K,V> next) {
        this.hash = hash;
        this.key = key;
        this.value = value;
        this.next = next;
    }

    /**
     * Sets next field with volatile write semantics.  (See above
     * about use of putOrderedObject.)
     */
    final void setNext(HashEntry<K,V> n) {
        UNSAFE.putOrderedObject(this, nextOffset, n);
    }

    // Unsafe mechanics
    static final sun.misc.Unsafe UNSAFE;
    static final long nextOffset;
    static {
        try {
            UNSAFE = sun.misc.Unsafe.getUnsafe();
            Class k = HashEntry.class;
            nextOffset = UNSAFE.objectFieldOffset
                (k.getDeclaredField("next"));
        } catch (Exception e) {
            throw new Error(e);
        }
    }
}


/**
 * 
 * 首先以不加锁的方式获取3次(注:jdk6中是2次)，若是其中任意连续两次的modCounts相等，则直接返回，不然以加锁的方式从新获取并返回。
 * 
 * Returns the number of key-value mappings in this map.  
 * If the map contains more than <tt>Integer.MAX_VALUE</tt> elements, returns Integer.MAX_VALUE.
 */
public int size() {
    // Try a few times to get accurate count. On failure due to continuous async changes in table, resort to locking.
    final Segment<K,V>[] segments = this.segments;
    int size;
    boolean overflow; // true if size overflows 32 bits
    long sum;         // sum of modCounts
    long last = 0L;   // 记录上一次的sum
    int retries = -1; // 记录获取的次数(0表示第一次，1表示第二次，2表示第三次)。
    try {
        for (;;) {
			
			// 若是获取的次数超过3次，则给segments数组中的全部Segment加锁。
            if (retries++ == RETRIES_BEFORE_LOCK) {  // RETRIES_BEFORE_LOCK=2
                for (int j = 0; j < segments.length; ++j)
                    ensureSegment(j).lock(); 
            }
			
            sum = 0L;
            size = 0;
            overflow = false;
			
            for (int j = 0; j < segments.length; ++j) {
                Segment<K,V> seg = segmentAt(segments, j);
                if (seg != null) {
                    sum += seg.modCount;			// map的modCount等于全部Segment的modCount相加
                    int c = seg.count;
                    if (c < 0 || (size += c) < 0)	// map的size等于全部Segment的count相加 即：size += c 
                        overflow = true;
                }
            }
			
			// 判断本次获取的modCounts和上一次获取的modCounts是否相等，若是相等，则跳出循环。
            if (sum == last)  break;
            last = sum;
        }
    } finally {
		// 若是获取的次数超过3次，给segments数组中的全部Segment解锁。
        if (retries > RETRIES_BEFORE_LOCK) {	
            for (int j = 0; j < segments.length; ++j)
                segmentAt(segments, j).unlock();
        }
    }
    return overflow ? Integer.MAX_VALUE : size;
}

// ...
}node