Sizzle 源码分析(四)：Sizzle是如何选择元素的

前言

这篇文章我会将Sizzle整个筛选元素的流程所有讲解一遍。从它是如何找出种子集seed，又是如何将token转换为筛选规则，再到是如何经过规则进行筛选的全部流程。这里我会经过一个例子来进行说明，由token转换为筛选规则那里很是的绕，尤为是Sizzle还有缓存的逻辑夹杂在其中，并且最复杂的实际上是缓存。我我的的描述可能并不能让人听得很明白，因此有兴趣的人，能够结合个人说明去看一下源码，我到如今也是只看懂了百分之八十多，缓存的相关代码我并无理解的特别透彻，因此这里我只给你们分析一下我本身所理解到的，整个选择元素的主流程。javascript

例子: Sizzle('.container input[type=text]')

Sizzle的选择原理

Sizzle并非从左向右依次进行选择的，并非先选择出'.container'而后再去找其下的input。这样虽然看似合理，但实际上是很消耗时间的，由于根据DOM树的结构越往下分支越多，因此Sizzle会先在选择器的末尾找到一个种子集(也就是seed)，而后经过种子集一层一层往上判断，是否符合条件。vue

那么如何选择seed呢？这就是select函数干的事情了。java

Sizzle.select

这个函数，主要就作了两件事。node

将选择器字符串 tokenize
找出seed

一个选择字符串可能会存在多个关系选择器，好比body p>input:disabled。若是使用这些关系选择器来做为分割，咱们能够获得几组选择器，seed就是在最后一组选择器中的元素选择器, ID选择器, 或者class选择器，若是当最后一组选择器没有这三个选择器的话，那么就没有seed。jquery

以上述例子为例，seed就是整个document中的全部input。数组

若是在setDocument的时候, support.getElementsByClass = false得话，那么`seed`不包括class选择器缓存

例子的tokenize

函数

select = Sizzle.select = function(selector, context, results, seed) {
    var i, tokens, token, type, find,
        compiled = type selector === 'function' && selector,
        match = !seed && tokenize( (selector = compiled.selector || selector) );
    results = results || [];
    
    // 这里指选择字符串没有逗号的状况, 
    if (match.length === 1) {
        tokens = match[0] = match[0].slice(0);
        if (tokens.length > 2 && documentIsHTML && Expr.relative[tokens[1].type] ) {
            context = (Expr.find["ID"](token.matches[0]
                .replace(runescape, funescape), context) || [])[0];
            if (!context) {
                return results;
            } else if (compiled) {
               context = context.parentNode 
            }
            selector = selector.silce(tokens.shift().value.length);
        }
        i = matchExpr['needsContxt'].test(selector) ? 0 : tokens.length;
        
        // 这里开始找seed
        while (i--) {
            token = tokens[i];
            // 从后向前, 若是碰到关系选择器了,那就不找了
            if (Expr.relative[(type = token.type)]) {
                break;
            }
            
            // Expr.find 最多只有三个属性,这个是在setDocument的时候设置的
            // TAG CLASS ID
            if ((find = Expr.find[type])) {
                if ( (seed = find(
                    token.matches[0].replace(runescape, funescape),
                    rsibling.test(tokens[0].type) && testContext(context.parentNode) ||
                        context
                ) ) ) {
                    tokens.splice(i, 1);
                    // 因为已经抽出了seed 因此要重组selector
                    // 上面的例子跑到这里 selector就会变成 '.container [type=text]'
                    selector = seed.length && toSelector(tokens);
                    if (!selector) {
                        push.apply(results, seed);
                        return results
                    }
                    break;
                }
            }
        }
    }
    (compiled || compile(selecotr, match)) (
        seed,
        context,
        !documentIsHTML,
        results,
        !context || rsibling.test(selector) && testContext(context.parentNode) || context
    );
    
    // 注意: 这里并无return compile返回出来的闭包执行后的结果, 而是return 做为参数穿进去的results
    return results;
}
复制代码

compile

compile其实并非生成规则的函数，它算是一个总入口，主要的功能是将生成的规则缓存，从缓存中查找是否已经有对应的规则，返回一个superMatch函数, superMatch函数是作筛选的函数。markdown

函数

compile = Sizzle.compile = function(selector, match) {
    var i,
        setMatcher = [],
        elementMatchers = [],
        cached = compilerCache[selector + ' '];
    if (!cache) {
        if (!match) {
            match = tokenize(selector);
        }
        i = match.length;
        // 注意: 这里的match是整个二维数组, 是整个一个选择组, 因此这里只循环一次
        while(i--) {
            cached = matcherFromTokens(match[i]);
            // 在复杂的选择器的时候, 伪类函数会被标记, 这里就是判断是不是伪类
            if (cached[expando]) {
                setMatchers.push(cached);
            } else {
                elementMatchers.push(cached);
            }
        }
    }
    
    // 缓存
    cache = compilerCache(
        selector,
        // 这个函数返回superMatch函数
        matcherFromGroupMatchers(elementMatchers, setMatchers)
    )
}
复制代码

matcherFromTokens

matcherFromTokens会经过token生成规则，流程是这样的。它会先建立一个matchers数组，并建立一个baseMathcer函数，这个baseMatcher通常状况都为true。以后遍历整个token，只要没有遇到关系操做符，就将对应的filter函数推入matchers中；当遇到了关系操做符，会先将已经在matchers中的所有筛选函数，用elementMatcher函数包裹在一块儿，再使用addCombinator做为纽带返回一个函数，取代以前的matchers。如此循环，直到将整个token所有遍历结束。addCombinator主要的功能就是根据关系操做符来查找兄弟元素和父级元素。闭包

我会把matcherFromTokens，elementMatcher，addCombinator这三个函数都放在下面。app

函数

function matcherFromTokens(tokens) {
    var checkContext, matcher, j,
        len = tokens.length,
        // 判断是不是关系操做符开头
        leadingRelative = Expr.relative[ tokens[0].type ],
        // 若是不是关系符开头, 默认就是父祖集关系
        implicitRelative = leadingRelative || Expr.relative[' '],
        i = leadingRelative ? 1 : 0,
    
        // 这里就是baseMatcher
        // addCombinator中做为参数的fn 就是 filter
        matchContext = addCombinator(function(elem) {
            return elem === checkContext;
        }, implicitRelative, true),
        matchAnyContext = addCombinator(function(elem) {
            return indexOf(checkContext, elem) > -1;
        }, implicitRelative, true),
        
        // 这个就是最后规则的合集, 它先把baseMatcher放到了合集里面
        // 通常状况 (!leadingRelative && (xml || context !== outermostContext))会返回true 从而不去执行下面的函数
        matchers = [ function(elem, context, xml) {
            var ret = (!leadingRelative && (xml || context !== outermostContext)) || (
                ( checkContext = context ).nodeType ? 
                    matchContext(elem, context, xml) :
                    matchAnyContext(elem, context, xml) );
            checkContext = null;
            return ret;
        } ];
    // 正向遍历tokens
    for (; i < len; i++) {
        // 若是是关系符的话
        if ((matcher = Expr.relative[tokens[i].type])) {
            // 先将以有的规则用elementMatcher包裹在一块儿, 再用addCombinator建立关联;
            // 生成的新的matcher代替原来所有的matcher
            matchers = [addCombinator(elementMatcher(matchers), matcher)];
        // 若是是 TAG ATTR PESUDO ID CLASS CHILD
        } else {
            matcher = Expr.filter[tokens[i].type].apply(null, toekns[i].matches);
            
            // 若是是伪类, 这里我尝试了不少选择器可是都没有进入到这个if里面
            // 感受得是特别复杂的选择器了
            // 由于一直没试出来, 因此就没搞懂这里究竟是干啥的
            if (matcher[expando]) {
                j = ++i;
                for (; j < len; j++) {
                    if (Expr.relative[tokens[j].type]) {
                        break;
                    }
                }
                return setMatcher(
                    i > 1 && elementMatcher(matchers),
                    i > 1 && toSelector(
                    tokens
                        .slice(0, i - 1)
                        .concat({value: tokens[i - 2].type === ' ' ? '*' : ''})
                    ).replace(rtrim, '$1'),
                    matcher,
                    i < j && matcherFromTokens(tokens.slice(i, j)),
                    j < len && matcherFromTokens((tokens = tokens.slice(j))),
                    j < len && toSelector(tokens)
                );
            }
            matchers.push(matcher);
        }
    }
    // 最后再用elementMatcher裹一层, 返回一个函数
    return elementMatcher(matchers);
}

function addCombinator(matcher, combinator, base) {
    var dir = combinator.dir,
        skip = combinator.next,
        key = skip || dir,
        checkNonElements = base && key === 'parentNode',
        doneName = done++;
    // 若是是 > + 这两个关系符
    return combinator.first ?
        
    // 检查最近的父级或者兄弟元素
    function(elem, context, xml) {
        // 这个while循环elem是持续赋值的
        // 这里就是为何说是纽带的缘由了
        // 在这里循环以后, 找到的新元素放到以后的matcher里面, 构成了经过seed一级一级向上查找的逻辑
        while(elem = elem[dir]) {
            // 当遇到元素节点的时候
            if (elem.nodeType === 1 || checkNonElements) {
                return matcher(elem, context, xml);
            }
        }
        return false;
    } :
    
    // 检查所有父级或者兄弟元素
    function(elem, context, xml) {
        var oldCahce, uniqueCache, outerCache,
            newCache = [dirruns, doneName];
        
        if (xml) {
            while ((elem = elem[dir])) {
                if (elem.nodeType === 1 || checkNonElments) {
                    if (matcher(elem, context, xml)) {
                        return true;
                    }
                }
            }
        } else {
            while ((elem = elem[dir])) {
                // 这一块都是缓存
                // 缓存才是最让人看不懂的
                // 这一块,我也是没看的特别懂, 若是有人理解这里, 能够告知一下
                // 蟹蟹
                if (elem.nodeType === 1 || checkNonElements) {
                    outerCache = elem[expando] || (elem[expando] = {});
                    uniqueCache = outerCache[elem.uniqueID] || 
                        (outerCache[elem.uniqueID] = {} );
                    if (skip && skip === elem.nodeName.toLowerCase()) {
                        elem = elem[dir] || elem;
                    } else if ( (oldCache = uniqueCache[key]) &&
                        oldCache[0] === dirruns && oldCache[1] === doneName) {
                        return (newCache[2] = oldCache[2]);
                    } else {
                        // 这里是不走缓存的 上面两个if 应该都是从缓存中拿值
                        uniqueCache[key] = newCache;
                        if ( (newCache[2] = matcher(elem, context, xml)) ) {
                            return true
                        }
                    }
                }
            }
        }
        return false;
    }
}

// 这个方法就是把一堆matacher 揉成一个
function elementMatcher(matchers) {
    return matchers.length > 1 ?
        function(elem, context, xml) {
            var i = matchers.length;
            // 注意这里是i-- 说明这里是倒叙的
            // 这就是像剥洋葱同样, 一层一层判断规则
            while (i--) {
                //只要有一个不知足, 直接返回false
                if (!matchers[i](elem, context, xml)) {
                    return false;
                }
                return true;
            }
        } : 
        matchers[0];
}
复制代码

流程图

matcherFromGroupMatchers

在跑完了matcherFromTokens，咱们再回过头来继续看compile，当compile的所有的matcherFromTokens都跑完之后，就只剩返回作缓存和返回matcherFromGroupMatchers了。matcherFromGroupMatchers函数返回superMatcher函数，superMatcher函数使用来遍历seed，经过以前matcherFromTokes运行得到的规则，对seed进行筛选。

函数

function matcherFromGroupsMatchers(elementMatchers, setMatchers) {
    var bySet = setMatchers.length > 0,
        byElement = elementMatchers.length > 0,
        superMatcher = function(seed, context, xml, results, outermost) {
            var elem, j, matcher,
                matchedCount = 0,
                i = '0',
                unmatched = seed && [],
                setMatched = [],
                contextBackup = outermostContext,
                // 若是没有seed 那么就拿文档所有的元素当作seed
                elems = seed || byElement && Expr.find["TAG"]('*', outermost),
                // 缓存用
                dirrunsUnique = (dirruns += contextBackup == null ? 1 : Math.random() || 0.1),
                len = elems.length;
            
            if (outermost) {
                // 这个outermostContext会在baseMatcher的时候用做判断
                outermostContext = context == document || context || outermost;
            }
            
            for (; i !== len && (elem = elems[i] != null); i++) {
                if(byElement && elem) {
                    j = 0;
                    if (!context && elem.ownerDoucment != document) {
                        setDocumet(elem);
                        xml = !documentIsHtml;
                    }
                    // elementMatches会出现多个的状况就是有逗号的状况
                    // 这个时候只要知足一组规则就能够把当前的元素推到结果集中
                    // 我们的例子只有一组规则
                    while ( (matcher = elementMatches[j++]) ) {
                        if (matcher(elem, context || document, xml)) {
                            results.push(elem);
                            break;
                        }
                    }
                    // 缓存
                    if (outermost) {
                        dirruns = dirrunsUnique;
                    }
                }
                // 没有被匹配的那些元素
                if (bySet) {
                    if ((elem = !matcher && elem)) {
                        matchedCount--;
                    }
                    if (seed) {
                        unmatched.push(elem);
                    }
                }
            }
            matchedCount += i;
            
            // 这里的逻辑我并不太太懂, 由于我尝试的例子中, 并无走到这里的
            // 这应该也是复杂选择器才会出现, 我试过:not(:not)的嵌套, 也没走到这里
            // 但愿有懂的人 能讲解一下
            
            // 这里若是没走for循环的话, 那么i 是字符串'0' 而matchedCount是数字0
            // 再包括matchedCount会-- 有可能即便走了for循环 也会致使会不相等
            if (bySet && i !== matchedCount) {
                j = 0;
                while((matcher = setMatchers[j++])) {
                    matcher(unmatched, setMatched, context, xml);
                }
                if (seed) {
                    if (matchedCount > 0) {
                        while (i--) {
                            if ( !(unmatched[i] | setMatched[i]) ) {
                                setMatched[i] = pop.call(results);
                            }
                        }
                    }
                    setMatched = condense(setMatched);
                }
                push.apply(results, setMatched);
                
                if (outermost && !seed && setMatched.length > 0 &&
                    (matchedCount + setMatchers.length) > -1) {
                        //排序
                        Sizzle.uniqueSort(results);
                }
            }
            if (outermost) {
                dirruns = dirrunsUnique;
                outermoustContext = contextBackup;
            }
            // 这里虽然return的是unmatched 可是results才是最终的结果, 在select函数中最后return的是做为参数的result
            return unmatched;
        }
    return bySet ?
        // marFunction 就是给参数的函数打expando标记的
        markFunction(superMatcher) :
        superMatcher;
}
复制代码

总结

看Sizzle大概看了2个月, 在2020年以前把大概流程所有都看通了，算是过年了。在最后这段查找中，Sizzle用了大量的闭包，大量的柯里化函数，为了就是保证所有的filter函数入参，都为elem, context, xml。这是我看的第一个库，看完了真的收获不少，最开始由于看看司徒大大的书，一时兴起想把Sizzle看完，期间也以为太难了想放弃，可是最后磕磕绊绊终因而看下来了。此次看完了等把JavaScript框架设计都看完，再把jquery源码撸了，再撸vue，而后再过年。哈哈哈哈哈。