Java手写字典树(Trie树),实现敏感词过滤

1.简介

字典树:也叫做前缀树,是一种高效的存储、配对字符串的数据结构,存储过程如下:

假设我们有单词:app、apple、cat。如果存在链表中:

["app"、"apple"、"api"、"cat"],要保存14个字符,使用字典树之后就变成了9个字符

2.代码实现

话不多说,代码很简洁,直接看注释就能看懂,如下: 

package 算法;

import sun.reflect.generics.tree.Tree;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* @Author: stukk
* @Description: 字典树
**/
public class TrieTree {

private TreeNode root;

// 初始化树
public TrieTree() {
this.root = new TreeNode();
}

// 添加单词
public void addWord(String word) {
TreeNode treeNode = root;
for (char ch : word.toCharArray()) {
if (!treeNode.contains(ch)) {
treeNode.addChildren(ch, new TreeNode());
}
treeNode = treeNode.getChildren(ch);
}
treeNode.setIsWord(true);
}

// 查找前缀匹配
public TreeNode searchPrefix(String prefix) {
TreeNode treeNode = root;
for (char ch : prefix.toCharArray()) {
if (!treeNode.contains(ch)) {
return null;
}
treeNode = treeNode.getChildren(ch);
}
return treeNode;
}

// 查找单词
public boolean searchWord(String word) {
TreeNode node = searchPrefix(word);
return node != null && node.getWord();
}

// 敏感词过滤 true代表通过过滤
public boolean filter(String sentence) {
TreeNode treeNode = root;
int nowIndex = 0, startIndex = 0;
while (startIndex < sentence.length()) {
char ch = sentence.charAt(nowIndex);
if (treeNode.contains(ch)) {
treeNode = treeNode.getChildren(ch);
if (treeNode.getWord()) {
//是一个违禁词
return false;
}
nowIndex++;
} else {
startIndex++;
nowIndex = startIndex;
treeNode = root;
}
}
return true;
}

public void setWords(List<String> words) {
for (String word : words) {
addWord(word);
}
}

public static void main(String[] args) {
TrieTree trieTree = new TrieTree();
trieTree.setWords(Arrays.asList("你大爷"));
boolean filter = trieTree.filter("我*你大爷的");
if (filter) {
System.out.println("不违规");
} else {
System.out.println("违规");
}
}


}

//树的结点
class TreeNode {
private Map<Character, TreeNode> children;
private Boolean isWord;

public TreeNode() {
this.children = new HashMap<>();
this.isWord = false;
}

// 是否是这个字符
public boolean contains(char ch) {
return children.containsKey(ch);
}

// 添加字符
public void addChildren(char ch, TreeNode treeNode) {
children.put(ch, treeNode);
}

public TreeNode getChildren(char ch) {
return children.get(ch);
}

public void setIsWord(boolean ok) {
isWord = ok;
}

public Boolean getWord() {
return isWord;
}
}

posted on 2024-04-10 21:22  zyp_java_net  阅读(41)  评论(0编辑  收藏  举报

导航