JavaScript 上万关键字瞬间匹配 (转 整理)

;(function (window, document, ns, undefined) {
    var Node = function  (data, parentNode) {
        this._childCount = 0;
        this.data = data;
        this.parentNode = parentNode || null;
        this.childNodes = {};
    };
    Node.prototype = {
        constuctor : Node,
        appendChild : function  (data) {
            var child;
            if(!(child = this.getChild(data))){
                child = new Node(data, this);
                this.childNodes[data] = child;
                this._childCount ++;
            }
            return child;
        },
        getChild : function  (data) {
            if(this.childNodes.hasOwnProperty(data)){
                return this.childNodes[data];
            }
            return null;
        },
        removeChild : function  (data) {
            var child;
            if(child = this.getChild(data)){
                delete this.childNodes[data];
                this._childCount-- ;
            }
            return child;
        },
        clearChild : function  () {
            this.childNodes = {};
            this._childCount = 0;
            return this;
        },
        hasChild : function  (data) {
            if(!this._childCount){
                return false;
            }
            if(arguments.length > 0){
                for (var i = arguments.length; i--; ){
                    if(!this.getChild(arguments[i])){
                        return false;
                    }
                }
                return true;
            }
            return true;
        }
    };
 
    var TrieTree = function (aData) {
        var maxLength = 0;
        var currentStr;
        var currentChar;
        var root = new Node('');
        var currentNode;
        var childNode;
        for (var i = 0, l = aData.length; i < l; i++) {
            if(!(currentStr = aData[i])){
                continue;
            }
            currentNode = root;
 
            for (var j = 0, li = currentStr.length; j < li; j++ ){
                currentChar = currentStr.charAt(j);
                childNode = currentNode.getChild(currentChar);
                if(!childNode){
                    childNode = currentNode.appendChild(currentChar);
                }
 
                currentNode = childNode;
            }
        };
 
        this._root  = root;
 
    };   
 
    TrieTree.prototype = {
        constuctor : TrieTree,
        exactMatch : function  (str) {
            var currentNode = this._root;
            var childNode;
            for (var i = 0, l = str.length; i < l; i++){
                childNode = currentNode.getChild(str.charAt(i));
                if(!childNode){
                    return false;
                }
                currentNode = childNode;
            }
            if(currentNode.hasChild()){
                return false;
            }
            return true;
        },
        mmFetch : function (str) {
            var currentNode = this._root;
            var childNode;
            for (var i = 0, l = str.length; i < l; i++){
                childNode = currentNode.getChild(str.charAt(i));
                if(!childNode){
                    return false;
                }
                currentNode = childNode;
            }
            return true;
        }
    };
 
    ns.TrieTree = TrieTree;
 
}(window, document, window));
 
 
 
 
/*
    test case
    var dataSource = [
          '你好么',
          '你好呀',
          '你吃了么',
          '中国',
          '中南海',
          '中心思想',
          '上海',
          '上海人',
          '上证指数',
          '上海滩',
          '北京',
          '北京人',
          '北京你好',
          '北京猿人',
          '北京人在纽约'
      ];
      var tt  = new TrieTree(dataSource);
      document.write('<h1>数据源 : </h1></br>', dataSource.join('</br>'))
 
 
      var str = '上海滩';
      console.log(str, '\n前缀匹配 : ' + tt.mmFetch(str), '\n完全匹配 : ' + tt.exactMatch(str));

 

 

var treeSearch = {
    makeTree: function(strKeys) {
        "use strict";
        var tblCur = {},
            tblRoot,
            key,
            str_key,
            Length,
            j,
            i
            ;
        tblRoot = tblCur;
        for ( j = strKeys.length - 1; j >= 0; j -= 1) {
            str_key = strKeys[j];
            Length = str_key.length;
            for ( i = 0; i < Length; i += 1) {
                key = str_key.charAt(i);
                if (tblCur.hasOwnProperty(key)) { //生成子节点
                    tblCur = tblCur[key];
                } else {
                    tblCur = tblCur[key] = {};
                }
            }
            tblCur.end = true; //最后一个关键字没有分割符
            tblCur = tblRoot;
        }
        return tblRoot;
    },
    search: function(content, tblRoot) {
        "use strict";
        var tblCur,
            p_star = 0,
            n = content.length,
            p_end,
            match,  //是否找到匹配
            match_key,
            match_str,
            arrMatch = [],  //存储结果
            arrLength = 0   //arrMatch的长度索引
            ;
 
        while (p_star < n) {
            tblCur = tblRoot; //回溯至根部
            p_end = p_star;
            match_str = "";
            match = false;
            do {
                match_key = content.charAt(p_end);
                if (!(tblCur = tblCur[match_key])) { //本次匹配结束
                    p_star += 1;
                    break;
                }else{
                    match_str += match_key;
                }
                p_end += 1;
                if (tblCur.end === true) //是否匹配到尾部  //找到匹配关键字
                {
                    match = true;
                }
            } while (true);
 
            if (match === true) { //最大匹配
                arrMatch[arrLength] = { //增强可读性
                    key: match_str,
                    begin: p_star - 1,
                    end: p_end
                };
                arrLength += 1;
                p_star = p_end;
            }
        }
        return arrMatch;
    }
};
function test(strContent, strKeys) {
    var arrMatch,
        tblRoot = treeSearch.makeTree(strKeys),
        t = new Date();
 
 
    arrMatch = treeSearch.search(strContent, tblRoot);
 
    console.log("time is: " + (new Date() - t) + "mm");
 
    console.log(arrMatch);
}
var s = (function() {
    var Things = [' ', '\n', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'];
    var s = "";
    for (var i = 1000000; i >= 0; i--) {
        s += Things[parseInt(Math.random() * Things.length) % Things.length]
    };
    return s;
})()
test(s, ["abc", "efge", "fun", "tree"]);

 

 

 

原文:https://www.cnblogs.com/index-html/archive/2013/04/17/js_keyword_match.html

 

https://www.cnblogs.com/ry123/archive/2013/04/24/3039720.html

posted @ 2020-06-17 11:34  Shikyoh  阅读(387)  评论(0编辑  收藏  举报