代码改变世界

嵌套结构的正则匹配

2010-09-02 00:04  BlueDream  阅读(802)  评论(1编辑  收藏  举报

var matchRecursive = function () {
    var formatParts = /^([\S\s]+?)\.\.\.([\S\s]+)/,
        metaChar = /[-[\]{}()*+?.\\^$|,]/g,
        escape = function (str) {
            return str.replace(metaChar, "\\$&");
        };

    return function (str, format) {
        var p = formatParts.exec(format);
        if (!p) throw new Error("format must include start and end tokens separated by '...'");
        if (p[1] == p[2]) throw new Error("start and end format tokens cannot be identical");

        var opener = p[1],
            closer = p[2],
            iterator = new RegExp(format.length == 5 ? "["+escape(opener+closer)+"]" : escape(opener)+"|"+escape(closer), "g"),
            results = [],
            openTokens, matchStartIndex, match;

        do {
            openTokens = 0;
            while (match = iterator.exec(str)) {
                if (match[0] == opener) {
                    if (!openTokens)
                        matchStartIndex = iterator.lastIndex;
                    openTokens++;
                } else if (openTokens) {
                    openTokens--;
                    if (!openTokens)
                        results.push(str.slice(matchStartIndex, match.index));
                }
            }
        } while (openTokens && (iterator.lastIndex = matchStartIndex));

        return results;
    };
}();
console.log(matchRecursive("12[3[12123]ghgjh][asd][asdas]123[!@3asd234]", "[...]")); //  ["3[12123]ghgjh", "asd", "asdas", "!@3asd234"]