敏感词过滤

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace C2buy.Framework.Utility.StopWords
{
public class TrieTree
{
private readonly Dictionary<char, TrieTree> Children;

public bool End { get; set; }

public TrieTree()
{
Children = new Dictionary<char, TrieTree>();
}

public void AddKey(string keyword)
{
if (String.IsNullOrEmpty(keyword))
{
return;
}

var cNode = this;

foreach (var key in keyword)
{
if (cNode.Children.ContainsKey(key))
{
cNode = cNode.Children[key];
}
else
{
var node = new TrieTree();
cNode.Children.Add(key, node);
cNode = node;
}
}
cNode.End = true;
}


public string Replace(string text)
{
for (var i = 0; i < text.Length; i++)
{
var cNode = this;
var key = text[i];
//碰到脏词的第一个词
if (cNode.Children.ContainsKey(key))
{
cNode = cNode.Children[key];
//查找是否包含脏词后面的词
var skip = 0;
for (var j = i + 1; j < text.Length; j++)
{
if (cNode.Children.ContainsKey(text[j]))
{
cNode = cNode.Children[text[j]];
skip = 0;
}
else
{
//允许略过过几个字符
skip++;
if (skip > 1)
{
break;
}
}
if (cNode.End)
{
var len = j + 1 - i;
text = text.Replace(text.Substring(i, len), string.Empty.PadLeft(len, '*'));
i += len;
break;
}
}
}
}
return text;
}

}
}

 

 

 

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace minganci
{
class Program
{
static void Main(string[] args)
{
var trie = new TrieTree();
var keywords = "傻逼,sb,草尼玛".Split(',');
foreach (var key in keywords)
{
trie.AddKey(key);
}

var text = @"我草泥马,你哥傻逼";
Console.WriteLine(text);
Console.Read();
}
}
}

posted @ 2012-11-28 17:24  net_miao  阅读(245)  评论(0编辑  收藏  举报