语法树

 

个人感觉有点类似于有限状态机
package
com.smart.enumcompareto.test; import com.smart.enumcompareto.test.TernarySearchTrie.TSTNode; /** * 语法树匹配到的结果,记录最终节点和最终节点的index * * @author dell * */ public class MatchRet { private int index; private TSTNode node; public MatchRet(TSTNode node,int index){ this.index=index; this.node=node; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public TSTNode getNode() { return node; } public void setNode(TSTNode node) { this.node = node; } public String toString(){ return node.data; } }
package com.smart.enumcompareto.test;

public enum TokenType {
    Unknown,
    Basic,
    Suffix,
    Start,
    End
}
package com.smart.enumcompareto.test;

import java.util.ArrayList;

/**
 * 语法树的测试
 * 
 * @author dell
 * 
 */
public class TernarySearchTrie {

    public final class TSTNode {
        public String data = null;
        protected TSTNode loNode;
        protected TSTNode eqNode;
        protected TSTNode hiNode;
        protected TokenType splitchar;

        public TSTNode(TokenType type) {
            this.splitchar = type;
        }
    }

    public TSTNode rootNode;

    public TSTNode add(ArrayList<TokenType> word) {
        if (null == word) {
            throw new NullPointerException("空指针异常");
        }

        int charIndex = 0;
        if (null == rootNode) {
            rootNode = new TSTNode(word.get(0));
        }
        TSTNode currentNode = rootNode;
        while (true) {
            int charComp = word.get(charIndex).compareTo(currentNode.splitchar);
            if (charComp == 0) {
                charIndex++;
                if (charIndex == word.size()) {
                    return currentNode;
                }
                if (null == currentNode.eqNode) {
                    currentNode.eqNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {
                if (null == currentNode.loNode) {
                    currentNode.loNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.loNode;
            } else {
                if (null == currentNode.hiNode) {
                    currentNode.hiNode = new TSTNode(word.get(charIndex));
                }
                currentNode = currentNode.hiNode;
            }
        }
    }

    protected TSTNode getNode(ArrayList<TokenType> word) {
        if (null == word) {
            return null;
        }
        int len = word.size();
        if (len == 0)
            return null;
        TSTNode currentNode = rootNode; // 匹配过程中的当前节点的位置
        int charIndex = 0; // 表示当前要比较的字符在Key中的位置
        TokenType cmpChar = word.get(charIndex);
        int charComp;
        while (true) {
            if (currentNode == null) {// 没找到
                return null;
            }
            charComp = cmpChar.compareTo(currentNode.splitchar);
            if (charComp == 0) {// 相等往下走
                charIndex++;
                if (charIndex == len) {// 找到了
                    return currentNode;
                } else {
                    cmpChar = word.get(charIndex);// 词往下走
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {// 小于往左走
                currentNode = currentNode.loNode;
            } else {// 大于往右走
                currentNode = currentNode.hiNode;
            }
        }
    }

    public void tag(ArrayList<TokenType> tokens) {
        if (tokens == null || rootNode == null) {
            return;
        }
        for (int i = 0; i < tokens.size();) {
            MatchRet ret = matchLong(tokens, i);
            if (null != ret) {
                System.out.println(ret.toString());
                i = ret.getIndex();
            } else {
                System.out.println("null");
                i++;
            }
        }
    }

    public MatchRet matchLong(ArrayList<TokenType> tokens, int offset) {
        if (tokens == null || rootNode == null) {
            return null;
        }

        MatchRet ret = null;
        TSTNode currentNode = rootNode;
        int index = offset;
        while (currentNode != null) {
            int charComp = tokens.get(index).compareTo(currentNode.splitchar);
            if (charComp == 0) {
                index++;
                if (currentNode.data != null) {
                    ret = new MatchRet(currentNode, index);
                }
                if (index == tokens.size()) {
                    return ret;
                }
                currentNode = currentNode.eqNode;
            } else if (charComp < 0) {
                currentNode = currentNode.loNode;
            } else {
                currentNode = currentNode.hiNode;
            }
        }
        return ret;
    }

    public static void main(String[] args) {
        //testTag();
        testAddGet();
    }

    private static void testAddGet(){
        TernarySearchTrie tree=new TernarySearchTrie();
        ArrayList<TokenType> list=new ArrayList<TokenType>();
        list.add(TokenType.Start);
        list.add(TokenType.Suffix);
        list.add(TokenType.Basic);
        list.add(TokenType.End);
        
        TSTNode node=tree.add(list);
        node.data="我是start-suffix-basic-end组合";
        
        TSTNode ret=tree.getNode(list);
        if(ret==null){
            System.out.println("未匹配到");
        }
        else{
            System.out.println(ret.data);
        }
    }
    
    private static void testTag() {
        TernarySearchTrie tree = new TernarySearchTrie();
        ArrayList<TokenType> list = new ArrayList<TokenType>();

        list.add(TokenType.Basic);
        list.add(TokenType.Suffix);

        TSTNode node = tree.add(list);
        node.data = "我是basic-suffix组合";
        
        list = new ArrayList<TokenType>();
        list.add(TokenType.Unknown);
        list.add(TokenType.Suffix);
        
        TSTNode node_1 = tree.add(list);
        node_1.data = "我是unknown-suffix组合";

        list = new ArrayList<TokenType>();
        list.add(TokenType.Start);
        list.add(TokenType.Basic);
        list.add(TokenType.Suffix);
        list.add(TokenType.Unknown);
        list.add(TokenType.Suffix);
        list.add(TokenType.End);

        tree.tag(list);
    }

}

posted on 2012-10-19 09:07  雨渐渐  阅读(230)  评论(0编辑  收藏  举报

导航