.NET下进行脏话过滤算法

通过字典replace来反向过滤句子的技术是不可取的
正确的做法是通过句子逐词定位去字典中对比思路是


public class Filtrate
{
public Dictionary<char, CharNode> dict = new Dictionary<char, CharNode>();

public Filtrate()
{
}
public Filtrate(IDictionary<string, string> ict)
{
Add(ict);
}
public void Add(IDictionary<string, string> ict)
{
foreach (KeyValuePair<string, string> item in ict)
{
Add(item.Key, item.Value);
}
}
public void Add(string oldStr, string newStr)
{
char _char = oldStr[0];
if (dict.ContainsKey(_char))
{
if (oldStr.Length == 1)
{
dict[_char].value = newStr;
}
else
{
oldStr = oldStr.Substring(1, oldStr.Length - 1);
dict[_char].Add(oldStr, newStr);
}
}
else
{
if (oldStr.Length == 1)
{
CharNode node = new CharNode(newStr, 1);
dict.Add(_char, node);
}
else
{
CharNode node = new CharNode(_char.ToString(), 1);
dict.Add(_char, node);
oldStr = oldStr.Substring(1, oldStr.Length - 1);
node.Add(oldStr, newStr);
}
}
}
public string Replace(string s)
{
int len = s.Length, i = 0;
StringBuilder sb = new StringBuilder(len);
string lows = s.ToLower();
char _char;
while (i < len)
{
_char = lows[i];
if (dict.ContainsKey(_char))
{
int L = dict[_char].replace(s, i, ref sb);
i += L;
}
else
{
sb.Append(s[i++]);
}
}
string result = sb.ToString();
sb.Remove(0, sb.Length);
return result;
}
}

public class CharNode
{
//private char name;
public string value;
public int index = 0;
public Dictionary<char, CharNode> dict = new Dictionary<char, CharNode>();
public CharNode(string _value, int _index)
{
//name = _name;
value = _value;
index = _index;
}
public int replace(string intpuntstring, int i, ref StringBuilder sb)
{
int len = i + 1;
if (len >= intpuntstring.Length)
goto theend;
char nextchar = intpuntstring[len];
if (dict.ContainsKey(nextchar))
{
return dict[nextchar].replace(intpuntstring, len, ref sb);
}
theend:
sb.Append(value);
return index;
}
public void Add(string oldStr, string newStr)
{
char _char = oldStr[0];
if (oldStr.Length == 1)
{
if (dict.ContainsKey(_char))
{
dict[_char].index = index + 1;
dict[_char].value = newStr;
}
else
{
dict.Add(_char, new CharNode(newStr, index + 1));
}
}
else
{
if (dict.ContainsKey(_char))
{
oldStr = oldStr.Substring(1, oldStr.Length - 1);
dict[_char].Add(oldStr, newStr);
}
else
{
CharNode node = new CharNode(value + _char, index + 1);
dict.Add(_char, node);
oldStr = oldStr.Substring(1, oldStr.Length - 1);
node.Add(oldStr, newStr);
}
}
}
}

定义好后实例化Filtrate 进行过滤:

Filtrate f = new Filtrate();
var filete = db.TN_Filtrate.ToList();
foreach (var i in filete)
{
f.Add(i.FiltrateWord.ToString(),"**");
f.Add(i.FiltrateWords.ToString(), "***");
f.Add(i.FiltrateIdiom.ToString (),"***");
f.Add(i.FiltrateString.ToString(), "***");
}
string content = fc["editorContent"];
string title = f.Replace(fc["txtTitle"]);

...。。。。。。。。。呵呵园子里面也是设置了脏话过滤的我只能截图要不然就写部下去了

posted @ 2011-09-05 16:49  指间缘  阅读(169)  评论(0编辑  收藏  举报