Sizzle 源码分析(三)：tokenize函数与Sizzle函数

时间 2019-12-17

标签 sizzle 源码分析 tokenize 函数繁體版

原文原文链接

前言

已经写了两篇文章了，个人思路就是按照整个函数的运行顺序进行分析，那么此次就轮到tokenize和Sizzle的主方法了。我仍是会按照原来的方式，先把这两个函数用到的一些内部的属性或者方法先放在最前面，而后在进行分析这两个函数，一些已经出现过的函数或者变量包括以前的正则，我就不重复列举了，不清楚的同窗能够去我以前的文章去查。Sizzle源码虽然看了很长时间了，可是进度真的好慢，好多东西须要打断点一点点的看，有一些兼容也不是很清楚，要换浏览器去测试。OK，牢骚发到这里，开始撸源码。javascript

用到的全局变量与方法

这里的全局指的是IIFE中的做用域 css

变量

tokenCache = createCache tokenize预编译以后的缓存，下一次若是遇到相同的选择器，直接返回结果
nonnativeSelectorCache = createCache 这个原生的querySelectorAll没法返回结果的选择器字符串的缓存，下次若是出现直接走Sizzle的方法，再也不走原生方法
Expr.prefilter 这东西是因为像attribute pesuod child的正则，捕获了太多的捕获组，要对这个捕获组进行二次加工。
Expr.cacheLength 常量，数字，控制缓存的数量，值是50。

方法

createCache 这个方法就是建立缓存

function createCache() {
    var keys = [];
    function cache(key, value) {
        //Array.prototype.push返回的是push以后当前数组的长度
        if (keys.push(key) > Expr.cacheLength) {
            //Array.prototype.shift返回的是被shift出去的值
            delete cache[keys.shift()];
        }
        return ( cache[key + ' '] = value );
    }
    return cache;
}
复制代码

push

push = arr.push;
slice = arr.slice;
try {
  push.apply(
    (arr = slice.call(preferredDoc.childNodes)),
    preferredDoc.childNodes
  );
  arr[preferredDoc.childNodes.length].nodeType
} catch(e) {
    push = { apply : arr.length ?
        function (target, els) {
            pushNative.apply(target, slice.call(els))
        } :
        function (target, els) {
            var j = traget.length,
                i = 0;
            while((traget[i++] = els[i++])){}
            traget.legth = j - 1;
        }
    }
}
复制代码

Expr.prefilter

Expr = {
    // ...
    prefilter: {
        //参数就是被attribute的正则 match 到的数组
        /***$1 属性名 $2 运算符 $3 - $5 都是属性值 $3 双引号 $4 单引号 $5 没引号 ***/
        'ATTR': function(match) {
            // 转码
            match[1] = match[1].replace(runescape, funescape);
            // 不论是从哪里捕获的都统一放到$3
            match[3] = (match[3] || match[4] || match[5] || '').replace(runescape, funescape);
            // 若是是 '~=' 的话, 须要留空格
            if (match[2] === '~=') {
                match[3] = " " + match[3] + " ";
            }
            return match.slice(0, 4);
        },
        /*** $1 (only|first|last|nth|nth-last) $2 (child|of-type) $3 括号中的所有内容 $4 even odd 或者 表达式2n+1 中的2n $5 2n的正负 +2n -2n 中的 + - $6 n的倍数 2n 中的 2 $7 运算符 + - $8 最后一个数 1 ***/
        'CHILD': function(match) {
            match[1] = match[1].toLowerCase();
            if (match[1].slice(0, 3) === 'nth') {
                // nth得有参数
                if(!match[3]) {
                    Siizle.error(match[0]);
                }
                match[4] = +(match[4] ?
                    match[5] + (match[6] || 1) :
                    2 * (match[3] === 'even' || match[3] === 'odd'));
                match[5] = +((match[7] + match[8]) || match[3] === 'odd');
            // 除了nth的其他的没有括号中的值
            } else if (match[3]) {
                Sizzle.error(match[0]);
            }
            
            return match;
        },
        /*** $1 伪类名 $2 括号中的所有内容 $3 在引号中的值 $4 单引号的值 $5 双引号的值 $6 没有引号的值 ***/
        'PESUDO': function(match) {
            var excess,
            // $6没有值而$2有值的状况: :not(:nth-child(2))
                unquoted = !match[6] && match[2];
            if (matchExpr['CHILD'].test(match[0])) {
                return null;
            }
            // 若是是引号中的内容
            if (match[3]) {
                match[2] = match[4] || match[5] || '';
            // 若是括号中的内容仍是个伪类
            } else if (unquoted && repseudo.test(unquoted) && 
                //递归调用tokenize
                (excess = tokenize(unquoted, true)) &&
                // 留最近的一个()
                // excess是个负数
                (excess = unquoted.indexOf(")", unquoted.length - excess) - unquoted.length)) {
                match[0] = match[0].slice(0, excess);
                match[2] = unquoted.slice(0, excess);
            }
            
            return match.slice(0, 3);   
        }
    }
    // ...
}
复制代码

testContext 检测一个节点做为Sizzle上下文的有效性

function testContext( context ) {
	return context && typeof context.getElementsByTagName !== "undefined" && context;
}
复制代码

tokenize

预编译

Sizzle在1.7以后加入了预编译的思想，其实你们都这么说，我就也跟着这么说了。我理解的预编译其实就是将所输入的东西，经过某种规则，进行转换，转换成另外一种格式。这也有另外一种说辞，也就是AST，我想这个词你们应该更清楚一点，不一样AST是什么的同窗能够去看一下这篇文章。java

Sizzle中的预编译就是tokenize函数，它将各类选择器按照转换成了一个对象，举个简单的例子node

var selector = 'a#link > p, .expando';
    // 会转换成一下的样子
    var tSelector = [
        [
            {
                type: 'TAG',
                value: 'a',
                match: ['a']
            },
            {
                type: 'ID',
                value: '#link',
                match: ['link']
            },
            {
                type: '>'
                value: '>'
            },
            {
                type: 'TAG',
                value: 'p',
                match: ['p']
            }
        ],
        [
            {
                type: 'CLASS',
                value: '.expando',
                match: ['expando']
            }
        ]
    ]
复制代码

函数源码

tokenize = Sizzle.tokenize = function(selector, parseOnly) {
    var matched, match, tokens, type,
        soFar, groups, preFilters,
        cached = tokenCache[selector + ' '];
    // 有没有缓存,若是有缓存 那么直接返回就ok
    if (cached) {
        // 若是是parseOnly的话, 缓存里面存的都是匹配上的字符,
        // 因此若是有的话, 那么不会有剩余的字符串因此返回0
        // 在返回的时候 会返回一个缓存的浅复制.
        return parseOnly ? 0 : cached.silce(0)
    }
    soFar = selector;
    groups = [];
    preFilters = Expr.preFilter;
    
    while (soFar) {
        // 若是是第一次进入, 或者是匹配到了逗号
        if (!matched || (match = rcomma.exec(soFar))) {
            // 若是是逗号的话 把逗号去了
            if (match) {
                soFar = soFar.slice(match[0].length) || soFar;
            }
            groups.push( (tokens = []) );
        }
        
        matched = false;
        
        // 若是是关系选择器
        if ( (match = rcombinators.exec(soFar))) {
            matched = match.shift();
            tokens.push({
                value: matched,
                type: match[0].replace(rtrim, " ")
            });
            soFar = soFar.slice(0, matched.length);
        }
        
        // 循环匹配
        // TAG CLASS ID ATTR CHILD PESUDO
        for (type in Expr.filter) {
            if (match = matchExpr[type] && (!preFilter[type] || (match = preFilter[type](match)))) {
                matched = match.shift();
                tokens.push({
                    value: matched,
                    type: type,
                    matches: match
                });
                soFar = soFar.slice(matched.length);
            }
        }
        if (!matched) {
            break;
        }
    }
    // 这里也是, 若是是parseOnly的话就返回没匹配到的剩下的字符串的长度
    // 若是不是的话, 就要看有没有剩下的字符串
    // 若是有, 那说明字符串不合法 直接报错
    // 若是没有的话 先存缓存, 而后再return一个副本出去
    return paresOnly ?
        soFar.length :
        soFar ? 
            Sizzle.error(selector) : 
            tokenCache(selector, groups).slice(0);
}
复制代码

这里会出现一种状况，好比选择字符串是':not(:nth-child(2))'这样的，进入了tokenize，这个时候到PESUDO，那么就要走preFilter['PESUDO']方法，这个字符串被匹配的时候是$2有值而$6没值的状况，因此会再次走tokenize函数(paresOnly = true)，造成递归。数组

Sizzle函数

Sizzle函数就是我们调用选择器走的第一个函数，前两篇文章说的方法都是在引入Sizzle时运行的。该方法会判断字符串是不是简单的class tag id。好比p，#id，.container。或者是否能够直接使用querySelectorAll，若是都不能够的话，就进入select方法。浏览器

函数源码

function Sizzle(selector, context, results, seed) {
    var m, i, elem, nid, match, groups, newSelector,
        newContext = context && context.ownerDocument,
        // 若是不传context, 那么默认就是9
        nodeType = context ? context.nodeType : 9;
    results = results || [];
    
    // 若是选择器是空, 或者选择器不是字符串, 又或者 context的节点类型不是 element document documetfragment之中的任何一个的话 直接返回[]
    if (typeof selector != 'string' || !selector ||
        nodeType !== 1 && nodeType !== 9 && noedeType !== 11) {
            return results;
    }
    if (!seed) {
        // 设置一次document可是通常都是直接返回了
        setDocument(context);
        context = context || doucment;
        if (documentIsHtml) {
            //若是是简单的选择id tag class这种状况
            if (nodeTpe !== 11 && (match = rquickExpr.exec(selector))) {
                //ID
                if (m = match[1]) {
                    // 是document节点
                    if (nodeType === 9) {
                        if ( (elem = context.getElementById(m)) ) {
                            if (elem.id === m) {
                                results.push(elem);
                                return results;
                            }
                        } else {
                            return results;
                        }
                    // 若是是元素节点
                    // 元素节点是没有getElememtById这个方法的只能经过document选，选完元素以后判断是否是被当前元素节点包含
                    } else {
                        if (newContext && (elem = newContext.getElementById(m)) &&
                            contains(context, elem) &&
                            elem.id === m) {
                                results.push(elem);
                                return results;
                        }
                    }
                // 标签选择
                } else if (match[2]) {
                    // 因为rquickExpr不会匹配到(*),因此不须要考虑兼容问题, 直接返回全部匹配到的元素就能够了
                    push.apply(results, context,getElementByTagName(selector));
                // class 选择
                } else if ((m = match[3]) && support.getElementsByClassName && 
                context.getElementsByClassName) {
                    push.apply(results, context,getElementsByClassName(m));
                    return results;
                }
            }
            // 若是支持querySelectorAll, 而且这个字符串并非以前出现过的没法用原生方法匹配的字符串, 而且没有兼容问题中没有这类的话
            if (support.qsa &&
                !nonnativeSelevtorCache[selector + ' '] &&
                (!rbuggyQSA || !rbuggyQSA.test(selector)) &&
                // 排除object对象
                (nodeType!==1 || context.nodeName.toLowerCase() !== 'object')) {
                newSelector = selector;
                newContext = context;
                
                // querySelector有一个问题，若是不是document.querySelectorAll而是element.querySelectorAll的话
                // 当选择字符串出现了关系选择符也就是 '> ~ + ,' 这些的话,选择出来的结果会有出入
                // 解决这个问题的方法就是给当前的element加一个id 再在本来的选择字符串的最前面添加这个id
                // 经过doucment.querySelectorAll选择元素

                if (nodeType === 1 &&
                    (rdescend.test(selector) || rcombinators.test(selector))) {
                    // 若是是兄弟选择器的话 那么要给其父集添加id
                    newContext = rsibling.test(selector) && testContext(context.parentNode) ||
                        context;
                    // 这里这个判断不是很清楚 若是有人知道是什么意思的话但愿能解答一下蟹蟹
                    // 如下是英文注释
                    // We can use :scope instead of the ID hack if the browser
                    // supports it & if we're not changing the context.
                    if (newContext !== context || !support.scope) {
                        if ((nid = context.getAttribute("id"))) {
                            nid = nid.replace(rcssescape, fcssescape);
                        } else {
                            context.setAttribute('id', (nid = expando));
                        }
                    }
                    
                    groups = tokenize(selector);
                    i = groups.length;
                    while(i--) {
                        groups[i] = (nid? '#' + nid : ':scope') + ' ' + 
                        toSelector(groups[i]);
                    }
                    newSelector.groups.join(',');
                }
                try {
                    push.apply(results,
                        newContext.querySelectorAll(newSelector)
                    );
                    return results;
                } catch(e) {
                    // 推入原生方法不能解析的字符串缓存
                    nonnatvieSelectorCache(selector, true);
                } finally {
                    if (nid === expando) {
                        context.removeAttribute('id');
                    }
                }
            }
        }
    }
    // 全部不行的都走select方法
    return select(selector.replace(rtrim, '$1'), context, results, seed);
}
复制代码

总结

到此Sizzle的前置函数应该就都看完了, 在以后应该就是如何选择元素了。下一章应该会写select方法吧，就这样。缓存