Ruby's Louvre

每天学习一点点算法

导航

选择器切割正则的进化

//标签,ID 类 伪类 属性的标识符 它们的主体部分,亲子相邻兄长并联通配符,空白
var reg = /[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:\[][\w\(\)\]]+|\s*[>+~,*]\s*|\s+/g


        
//让小括号里面的东西不被切割
var reg = /[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:\[](?:[\w\u00a1-\uFFFF-]|\([^\)]*\)|\])+|(?:\s*)[>+~,*](?:\s*)|\s+/g
        
//确保属性选择器作为一个完整的词素
var reg = /[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:](?:[\w\u00a1-\uFFFF-]|\S*\([^\)]*\))+|\[[^\]]*\]|(?:\s*)[>+~,*](?:\s*)|\s+/g
        
//缩小后代选择器的范围
var reg = /[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:](?:[\w\u00a1-\uFFFF-]|\S+\([^\)]*\))+|\[[^\]]*\]|(?:\s*)[>+~,*](?:\s*)|\s(?=[\w\u00a1-\uFFFF*#.[:])/g
        

解决左边没用的空白字符串

        var reg_split = /^\s+|[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:](?:[\w\u00a1-\uFFFF-]|\S+\([^\)]*\))+|\[[^\]]*\]|(?:\s*)[>+~,*](?:\s*)|\s+(?=[\w\u00a1-\uFFFF*#.[:])/g;
        var selectors = "  [aaa=99] > dfdsf ,ggg   eee  iii ";
        // var selectors = "  [aaa=eee] jjjj"
        // var selectors = " p span"
        var array = [],i=0,ri=0
        selectors.replace(reg_split,function(selector){
          if(selector == false){//如果为空白字符串
            i++ && (array[ri++] = " ");//并且并不是第一个捕获的
          }else if(selector.match(/^\s*([>+~,*])\s*$/)){
            array[ri++] = RegExp.$1;
          }else {
            array[ri++] = selector;
          }
        });
        alert("|"+array.join("|"));

修正无用空白字符的判定与伪类选择器的匹配正则

        var reg_split =/^\s+|[\w\u00a1-\uFFFF][\w\u00a1-\uFFFF-]*|[#.:][\w\u00a1-\uFFFF-]+(?:\([^\)]*\))?|\[[^\]]*\]|(?:\s*)[>+~,*](?:\s*)|\s(?=[\w\u00a1-\uFFFF*#.[:])/g;
        var selectors = "  div:eq(0) ";
        var parts = [],i=0,ri=0
         selectors.replace(reg_split,function(part){
            i++
            if(part == false ){//如果为空白字符串
                if(i)
                    parts[ri++] = " ";//并且并不是第一个捕获的
            }else if(part.match(/^\s*([>+~,*])\s*$/)){
                parts[ri++] = RegExp.$1;
            }else {
                parts[ri++] = part;
            }
        });
var reg_split = /^[\w\u00a1-\uFFFF\-\*]+|[#.:][\w\u00a1-\uFFFF-]+(?:\([^\)]*\))?|\[[^\]]*\]|(?:\s*)[>+~,](?:\s*)|\s(?=[\w\u00a1-\uFFFF*#.[:])|^\s+/;
var slim = /\s+|\s*[>+~,*]\s*$/
function spliter(expr) {
    var flag_break = false;
    var full = [];//这里放置切割单个选择器群组得到的词素,以“,”为界
    var parts = [];//这里放置切割单个选择器组得到的词素,以关系选择器为界
    do {
        expr = expr.replace(reg_split, function(part) {
            if (part === ",") {//这个切割器只处理到第一个并联选择器
                flag_break = true;
            } else {
                if (part.match(slim)) {//对关系,并联,通符符选择器两边的空白进行处理
                    //对parts进行反转,因为div.aaa,反转后先处理.aaa
                    full = full.concat(parts.reverse(), part.replace(/\s/g, ''));
                    parts = [];
                } else {
                    parts[parts.length] = part;
                }
            }
            return "";//去掉已经处理了的部分
        });
        if (flag_break)
            break;
    } while (expr)
    full = full.concat(parts.reverse());
    !full[0] && full.shift();//去掉开头第一个空白
    return full;
}
var expr = "  div  >  div#aaa,span"
console.log(spliter(expr));//["div",">","#aaa", "div"]
 

posted on 2010-12-05 21:02  司徒正美  阅读(1246)  评论(0编辑  收藏  举报