小荷才露尖尖角

导航

关键字过滤算法

 

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Collections;

namespace BLL.Common
{
#region 操作类
public class KeywordsFilter
{


#region 关键字过滤
/// <summary>
/// 关键字过滤
///
/// </summary>
/// <param name="keywords"></param>
/// <returns></returns>
public static string Filter(string keywords)
{

//需过滤关键字集合
List<string> badwords = new List<string>();

KeywordsFilterClass kf
= new KeywordsFilterClass();
keywords
= kf.BadwordInKeywords(keywords, badwords);
return keywords;
}
#endregion

}
#endregion

#region 关键字过滤类
/// <summary>
/// 关键字过滤类
/// </summary>
public class KeywordsFilterClass
{

private Dictionary<string, object> hash = new Dictionary<string, object>();
//脏字字典 开头脏字存储
private BitArray firstCharCheck = new BitArray(char.MaxValue);
//脏字字典 单个char存储
private BitArray allCharCheck = new BitArray(char.MaxValue);
private int maxLength = 0;

/// <summary>
/// 初始化 已存储的 过滤字符串
/// </summary>
/// <param name="words"></param>
private void InitHash(List<string> badwords)
{
foreach (string word in badwords)
{
//保存字典内不存在的脏字
if (!hash.ContainsKey(word))
{
hash.Add(word,
null);
//设置脏字计算长度
this.maxLength = Math.Max(this.maxLength, word.Length);
firstCharCheck[word[
0]] = true;
foreach (char c in word)
{
allCharCheck[c]
= true;
}
}
}

}
/// <summary>
/// 替换字符串中的脏字为指定的字符
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public string BadwordInKeywords(string text, List<string> badwords)
{
//初始化 脏字字典
this.InitHash(badwords);
int index = 0;

while (index < text.Length)
{
//判断开头脏字
if (!firstCharCheck[text[index]])
{
//未找到开头脏字 则索引累加
while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ;
}
for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++)
{
if (!allCharCheck[text[index + j - 1]])
{
break;
}
string sub = text.Substring(index, j);

if (hash.ContainsKey(sub))
{
text
= text.Replace(sub, "**");
//this.InitHash(badwords);
index += j;
break;
}
}

index
++;
}
return text;
}


}
#endregion
}

 

 

posted on 2010-09-19 16:31  小.荷  阅读(600)  评论(0编辑  收藏  举报