Trie tree实践
1、Trie树
Trie树即字典树或前缀树,
2、实践
代码实践如下:
1 package cn.edu.buaa.trie; 2 3 import java.util.HashSet; 4 5 /** 6 * @author zsm 7 * @date 2016年10月25日 上午11:03:13 8 * @version 1.0 9 * @parameter 10 * @return 11 */ 12 public class Trie { 13 private TrieNode trieRoot; 14 private int treeSize; 15 16 public Trie() { 17 trieRoot = new TrieNode(); 18 treeSize = 0; 19 } 20 21 public TrieNode getRoot() { 22 return trieRoot; 23 } 24 25 public int getTreeSize() { 26 return treeSize; 27 } 28 29 /** 30 * 添加单词 31 */ 32 public void addWord(String word, int wordId) { 33 addWord(trieRoot, word, wordId); 34 } 35 36 public void addWord(TrieNode root, String word, int wordId) { 37 // 输入单词为空 38 if (null == word || word.length() == 0) { 39 return; 40 } 41 42 // 确定第一个字符在放在哪个孩子节点中 43 int k = TrieNode.getCharPosition(word.charAt(0)); 44 45 // 该孩子为空,初始化 46 if (root.childNodes[k] == null) { 47 root.childNodes[k] = new TrieNode(); 48 treeSize++; 49 root.childNodes[k].nodeChar = word.charAt(0); 50 } 51 // 单词出现在该孩子节点中 52 root.childNodes[k].wordSet.add(wordId); 53 54 word = word.substring(1); 55 // 说明是最后一个字符,该词词频数加1 56 if (word.length() == 0) { 57 root.childNodes[k].freq++; 58 } else { 59 addWord(root.childNodes[k], word, wordId); 60 } 61 } 62 63 /** 64 * 删除单词 65 */ 66 public void deleteWord(String word, int wordId) { 67 deleteWord(trieRoot, word, wordId); 68 } 69 70 enum DELETERES { 71 FAIL_EMPTYWORLD, FAIL_WORLD_NOT_EXIST, SUCCESS 72 }; 73 74 public DELETERES deleteWord(TrieNode root, String word, int wordId) { 75 // 输入单词为空 76 if (null == word || word.length() == 0) { 77 return DELETERES.FAIL_EMPTYWORLD; 78 } 79 80 int k = TrieNode.getCharPosition(word.charAt(0)); 81 82 // 第一个字符不在树中,说明没有要删除的单词 83 if (root.childNodes[k] == null) { 84 return DELETERES.FAIL_WORLD_NOT_EXIST; 85 } 86 87 // 第一个字符在树中 88 DELETERES res; 89 { 90 word = word.substring(1); 91 // 找到该单词 92 if (word.length() == 0 && root.childNodes[k].freq > 0) { 93 root.childNodes[k].freq--; 94 res = DELETERES.SUCCESS; 95 } else { 96 res = deleteWord(root.childNodes[k], word, wordId); 97 } 98 99 if (res == DELETERES.SUCCESS) { 100 // 从沿途节点移除该单词 101 root.childNodes[k].wordSet.remove(wordId); 102 // 没单词了,释放节点 103 if (root.childNodes[k].wordSet.size() == 0) { 104 root.childNodes[k] = null; 105 treeSize--; 106 } 107 } 108 return res; 109 } 110 } 111 112 /** 113 * 修改单词 114 */ 115 public void updateWord(String newWord, String oldWord, int wordId) { 116 updateWord(trieRoot, newWord, oldWord, wordId); 117 } 118 119 public void updateWord(TrieNode root, String newWord, String oldWord, int wordId) { 120 if (deleteWord(root, oldWord, wordId) == DELETERES.SUCCESS) { 121 addWord(root, newWord, wordId); 122 } 123 } 124 125 /** 126 * 找以给定单词为前缀的所有单词的id 127 */ 128 public HashSet<Integer> searchPrefixWord(String word) { 129 return searchPrefixWord(trieRoot, word); 130 } 131 132 public HashSet<Integer> searchPrefixWord(TrieNode root, String word) { 133 134 HashSet<Integer> wordSet = new HashSet<Integer>(); 135 136 // 输入单词为空 137 if (null == word || word.length() == 0) { 138 return wordSet; 139 } 140 141 int k = TrieNode.getCharPosition(word.charAt(0)); 142 // 单词里某个字符在树种不存在,说明没有该单词 143 if (root.childNodes[k] == null) { 144 return wordSet; 145 } 146 147 word = word.substring(1); 148 149 if (word.length() == 0) { 150 wordSet = root.childNodes[k].wordSet; 151 } else { 152 wordSet = searchPrefixWord(root.childNodes[k], word); 153 } 154 return wordSet; 155 } 156 157 /** 158 * 统计给定单词出现的次数 159 */ 160 public int wordCount(String word) { 161 return wordCount(trieRoot, word); 162 } 163 164 public int wordCount(TrieNode root, String word) { 165 166 // 输入单词为空 167 if (null == word || word.length() == 0) { 168 return 0; 169 } 170 171 int k = TrieNode.getCharPosition(word.charAt(0)); 172 // 单词里某个字符在树种不存在,说明没有该单词 173 if (root.childNodes[k] == null) { 174 return 0; 175 } 176 177 int count = 0; 178 word = word.substring(1); 179 180 if (word.length() == 0) { 181 count = root.childNodes[k].freq; 182 } else { 183 count = wordCount(root.childNodes[k], word); 184 } 185 186 return count; 187 } 188 } 189 190 /** 191 * Trie树的节点<br> 192 * 假定单词都由26个英文字母组成,Trie树根节点不存字符 193 */ 194 class TrieNode { 195 // 孩子节点 196 public TrieNode[] childNodes; 197 // 该节点的字符 198 public char nodeChar; 199 200 // 以该节点为结束的单词的词频 201 public int freq; 202 // 包含该节点的单词的id 203 public HashSet<Integer> wordSet; 204 205 // 初始化 206 public TrieNode() { 207 childNodes = new TrieNode[CHILD_NUM]; 208 freq = 0; 209 wordSet = new HashSet<Integer>(); 210 } 211 212 private static final int CHILD_NUM = 26; 213 214 public static int getCharPosition(char ch) { 215 return (ch - 'a'); 216 } 217 }
测试:
1 package cn.edu.buaa.trie; 2 3 /** 4 * @author zsm 5 * @date 2016年10月25日 下午3:12:02 6 * @version 1.0 7 * @parameter 8 * @return 9 */ 10 public class Main_Trie { 11 12 public static void main(String[] args) { 13 // TODO Auto-generated method stub 14 Trie trie = new Trie(); 15 String wd1 = "ab"; 16 String wd2 = "ac"; 17 String wd3 = "acd"; 18 19 String wd4 = "add"; 20 21 trie.addWord(wd1, 1); 22 trie.addWord(wd2, 2); 23 trie.addWord(wd2, 3); 24 trie.addWord(wd3, 4); 25 26 // wd1,wd2,wd2,wd3 27 System.out.println(trie.wordCount(wd2));// 2 28 System.out.println(trie.wordCount(wd3));// 1 29 System.out.println(trie.getTreeSize());// 4 30 System.out.println(); 31 32 trie.deleteWord(wd3, 4); 33 // wd1,wd2,wd2 34 System.out.println(trie.wordCount(wd2));// 2 35 System.out.println(trie.wordCount(wd3));// 0 36 System.out.println(trie.getTreeSize());// 3 37 System.out.println(); 38 39 trie.addWord(wd3, 4); 40 // wd1,wd2,wd2,wd3 41 System.out.println(trie.wordCount(wd2));// 2 42 System.out.println(trie.wordCount(wd3));// 1 43 System.out.println(trie.getTreeSize());// 4 44 System.out.println(); 45 46 trie.deleteWord(wd2, 2); 47 trie.deleteWord(wd2, 3); 48 // wd1,wd3 49 System.out.println(trie.wordCount(wd2));// 0 50 System.out.println(trie.wordCount(wd3));// 1 51 System.out.println(trie.getTreeSize());// 4 52 System.out.println(trie.searchPrefixWord("a"));// [1,4] 53 System.out.println(); 54 55 trie.updateWord(wd3, wd4, 4); 56 // wd1,wd3 57 System.out.println(trie.searchPrefixWord("a"));// [1,4] 58 System.out.println(trie.wordCount(wd2));// 0 59 System.out.println(trie.wordCount(wd3));// 1 60 System.out.println(trie.wordCount(wd4));// 0 61 System.out.println(trie.getTreeSize());// 4 62 System.out.println(); 63 64 trie.updateWord(wd4, wd3, 4); 65 // wd1,wd4 66 System.out.println(trie.searchPrefixWord("a"));// [1,4] 67 System.out.println(trie.wordCount(wd2));// 0 68 System.out.println(trie.wordCount(wd3));// 0 69 System.out.println(trie.wordCount(wd4));// 1 70 System.out.println(trie.getTreeSize());// 4 71 System.out.println(); 72 } 73 }
3、参考资料
http://www.cnblogs.com/huangxincheng/archive/2012/11/25/2788268.html