|NO.Z.00042|——————————|BigDataEnd|——|Hadoop&ElasticSearch.V42|——|ELK.v42|原理剖析|数据结构.V2|

一、Trie(扩展)
### --- Trie 被称作做字典树、前缀树(Prefix Tree)、单词查找树

~~~     Trie 搜索字符串的效率主要跟字符串的长度有关(O(len(单词)))
~~~     使用 Trie 存储 cat->1、dog->2、doggy->3、does->4、cast->5、add->6 六个单词映射
### --- Trie时间复杂度:O(len(key))

~~~     FST,不但能共享前缀还能共享后缀。不但能判断查找的key是否存在,还能给出响应的输入output。
~~~     它在时间复杂度和空间复杂度上都做了最大程度的优化,
~~~     使得Lucene能够将Term Index完全加载到内存,
~~~     快速的定位Term找到响应的output(posting倒排列表)。
二、编程代码实现
### --- 相关方法

~~~     # 相关方法
    int size();
    boolean isEmpty();
    void clear();
    boolean contains(String str);
    V add(String str, V value);
    V remove(String str);
    boolean starsWith(String prefix);
### --- 编程实现

package com.yanqi.es;

import java.util.HashMap;

//指明trie存储value的泛型,具体类型是创建时指定
public class Trie<E> {

    //size属性
    private int size;
    //根节点
    private Node<E> root;
    //当前存储的单词容量
    int size() {
        return size;
    };

    //判断当前是否有数据存储
    boolean isEmpty() {
        return size == 0;
    };

    //清空数据
    void clear() {
        size = 0;
        root = null;
    };

    //是否包含某个单词
    boolean contains(String str) { final Node<E> node = search(str);
    return node != null && node.word;
    }

    //检查key是否合法
    public void checkKeyIsNull(String key) {
        //判断单词是否合法,null ,空字符串是不合适的
        if (null == key || key.length() == 0) {
            //抛出异常
            throw new IllegalArgumentException("key should not bu null..");
        }
    }

    //添加一个单词--》value,返回原理的old值
    // add("a",1)
    // add("a",2)
    E add(String str, E value) { checkKeyIsNull(str); //key不合法抛出异常,方法停止运行

        // 获取到根节点
        if (root == null) {
            root = new Node<>(null);
        }
        Node<E> node = root;
        //添加cat -->1 遍历key
        int len = str.length();
        for (int i = 0; i < len; i++) {
            //获取字母 c a t --->value
            char c = str.charAt(i);
            //判断childs是否为空
            HashMap<Character, Node<E>> childsMap = node.childs == null ? new HashMap<>() : node.childs;
            //把childsmap赋值给node.childs
            node.childs = childsMap;
            //根据c去当前节点的childs属性中判断是否有向下的分支
            Node<E> childNode = childsMap.get(c);
            if (childNode == null) {
                //创建一个新的节点
                childNode = new Node<>(node);
                //设置character属性
                childNode.character = c;
                //回填到父节点的childs中
                childsMap.put(c, childNode);
            }
            //节点已经存在
            node = childNode;
        }
        //原来是否有这个单词,覆盖value
        if (node.word) {
            //说明之前存在该单词
            E oldValue = node.value;
            node.value = value;
            return oldValue;
        }

        //新增一个单词
        node.word = true;
        node.value = value;
        size++;
        return null;
    }

    //删除一个单词 返回原来的value
    E remove(String str) {
        //首先找到最后一个节点
        Node<E> node = search(str);
        if (node == null || !node.word) {
            return null;
        }
        //开始删除动作
        size--;
        final E oldValue = node.value;
        //如果还有另外的子节点
        if (node.childs != null && !node.childs.isEmpty()) {
            node.word = false;
            node.value = null;
            // node.childs=null; 不能设置null
            return oldValue;
        }
        //没有子节点
        Node<E> parent = null;
        while ((parent = node.parent) != null) {
            parent.childs.remove(node.character);
            if (!parent.childs.isEmpty() || parent.word) {
                break;
            }
            node = parent;
        }
        return oldValue;
    }
    //在trie中搜索某个字符串
    private Node<E> search(String key) {
        //校验key
        checkKeyIsNull(key);
        //遍历输入的key,查找最后一个字母的节点
        final int len = key.length();
        Node<E> node = root;
        for (int i = 0; i < len; i++) {
            if (node == null || node.childs == null || node.childs.isEmpty()) {
                return null;
            }
            node = node.childs.get(key.charAt(i));
        }
        //node有可能为null
        return node;
    }
    //判断是否有某个前缀开头的单词
    boolean starsWith(String prefix) {
        final Node<E> node = search(prefix);
        return node != null;
    };

    //节点类
    private static class Node<E> {
        //value属性
        E value;
        //是否存储了某个单词
        boolean word;
        //分支信息的存储:character可以存储字母也可以是中文一个字
        HashMap<Character, Node<E>> childs;
        //父节点
        Node<E> parent;
        //当前节点character属性
        Character character;
        //构造函数
        public Node(Node<E> parent) {
            this.parent = parent;
        }
    }
}
### --- 编程实现测试类

package com.yanqi.es;

public class TrieTest {
    public static void main(String[] args) {
        final Trie<Integer> trie = new Trie<>();
        //返回原来的value值
        final Integer cat = trie.add("cat", 10);
        // System.out.println(cat);
        final Integer dog = trie.add("dog", 20);
        // System.out.println(dog);
        final Integer newCat = trie.add("cat", 100);
        // System.out.println(newCat);
        //
        // System.out.println(trie.contains("cat"));
        // System.out.println(trie.starsWith("do"));
        // System.out.println(trie.starsWith("a"));
        final Integer res = trie.remove("cat");
        System.out.println(res);
        System.out.println(trie.contains("cat"));
        System.out.println(trie.remove("doggy"));
    }
}
### --- 编译打印

D:\JAVA\jdk1.8.0_231\bin\java.exe "-javaagent:D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\lib\idea_rt.jar=58442:D:\IntelliJIDEA\IntelliJ IDEA 2019.3.3\bin" -Dfile.encoding=UTF-8 -classpath D:\JAVA\jdk1.8.0_231\jre\lib\charsets.jar;D:\JAVA\jdk1.8.0_231\jre\lib\deploy.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\access-bridge-64.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\cldrdata.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\dnsns.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\jaccess.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\jfxrt.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\localedata.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\nashorn.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunec.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunjce_provider.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunmscapi.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\sunpkcs11.jar;D:\JAVA\jdk1.8.0_231\jre\lib\ext\zipfs.jar;D:\JAVA\jdk1.8.0_231\jre\lib\javaws.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jce.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jfr.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jfxswt.jar;D:\JAVA\jdk1.8.0_231\jre\lib\jsse.jar;D:\JAVA\jdk1.8.0_231\jre\lib\management-agent.jar;D:\JAVA\jdk1.8.0_231\jre\lib\plugin.jar;D:\JAVA\jdk1.8.0_231\jre\lib\resources.jar;D:\JAVA\jdk1.8.0_231\jre\lib\rt.jar;E:\NO.Z.80000.Hadoop.project\elasticsearch\target\classes;C:\Users\Administrator\.m2\repository\org\elasticsearch\client\elasticsearch-rest-high-level-client\7.9.0\elasticsearch-rest-high-level-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\client\elasticsearch-rest-client\7.9.0\elasticsearch-rest-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpclient\4.5.10\httpclient-4.5.10.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpcore\4.4.12\httpcore-4.4.12.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpasyncclient\4.1.4\httpasyncclient-4.1.4.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\httpcore-nio\4.4.12\httpcore-nio-4.4.12.jar;C:\Users\Administrator\.m2\repository\commons-codec\commons-codec\1.11\commons-codec-1.11.jar;C:\Users\Administrator\.m2\repository\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\plugin\mapper-extras-client\7.9.0\mapper-extras-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\plugin\parent-join-client\7.9.0\parent-join-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\plugin\aggs-matrix-stats-client\7.9.0\aggs-matrix-stats-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\plugin\rank-eval-client\7.9.0\rank-eval-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\plugin\lang-mustache-client\7.9.0\lang-mustache-client-7.9.0.jar;C:\Users\Administrator\.m2\repository\com\github\spullara\mustache\java\compiler\0.9.6\compiler-0.9.6.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch\7.9.0\elasticsearch-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch-core\7.9.0\elasticsearch-core-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch-secure-sm\7.9.0\elasticsearch-secure-sm-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch-x-content\7.9.0\elasticsearch-x-content-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\yaml\snakeyaml\1.26\snakeyaml-1.26.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\core\jackson-core\2.10.4\jackson-core-2.10.4.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\dataformat\jackson-dataformat-smile\2.10.4\jackson-dataformat-smile-2.10.4.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\dataformat\jackson-dataformat-yaml\2.10.4\jackson-dataformat-yaml-2.10.4.jar;C:\Users\Administrator\.m2\repository\com\fasterxml\jackson\dataformat\jackson-dataformat-cbor\2.10.4\jackson-dataformat-cbor-2.10.4.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch-geo\7.9.0\elasticsearch-geo-7.9.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-core\8.6.0\lucene-core-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-analyzers-common\8.6.0\lucene-analyzers-common-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-backward-codecs\8.6.0\lucene-backward-codecs-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-grouping\8.6.0\lucene-grouping-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-highlighter\8.6.0\lucene-highlighter-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-join\8.6.0\lucene-join-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-memory\8.6.0\lucene-memory-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-misc\8.6.0\lucene-misc-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-queries\8.6.0\lucene-queries-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-queryparser\8.6.0\lucene-queryparser-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-sandbox\8.6.0\lucene-sandbox-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-spatial-extras\8.6.0\lucene-spatial-extras-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-spatial3d\8.6.0\lucene-spatial3d-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\apache\lucene\lucene-suggest\8.6.0\lucene-suggest-8.6.0.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\elasticsearch-cli\7.9.0\elasticsearch-cli-7.9.0.jar;C:\Users\Administrator\.m2\repository\net\sf\jopt-simple\jopt-simple\5.0.2\jopt-simple-5.0.2.jar;C:\Users\Administrator\.m2\repository\com\carrotsearch\hppc\0.8.1\hppc-0.8.1.jar;C:\Users\Administrator\.m2\repository\joda-time\joda-time\2.10.4\joda-time-2.10.4.jar;C:\Users\Administrator\.m2\repository\com\tdunning\t-digest\3.2\t-digest-3.2.jar;C:\Users\Administrator\.m2\repository\org\hdrhistogram\HdrHistogram\2.1.9\HdrHistogram-2.1.9.jar;C:\Users\Administrator\.m2\repository\org\elasticsearch\jna\5.5.0\jna-5.5.0.jar;C:\Users\Administrator\.m2\repository\junit\junit\4.12\junit-4.12.jar;C:\Users\Administrator\.m2\repository\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-core\2.5\log4j-core-2.5.jar;C:\Users\Administrator\.m2\repository\org\apache\logging\log4j\log4j-api\2.5\log4j-api-2.5.jar com.yanqi.es.TrieTest
100
false
null

Process finished with exit code 0

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

 

posted on   yanqi_vip  阅读(24)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示