【转】关键字过滤算法
using System; using System.Collections.Generic; using System.Text; using System.Data; using System.Collections; namespace BLL.Common { #region 操作类 public class KeywordsFilter { #region 关键字过滤 /// <summary> /// 关键字过滤 /// /// </summary> /// <param name="keywords"></param> /// <returns></returns> public static string Filter(string keywords) { //需过滤关键字集合 List<string> badwords = new List<string>(); KeywordsFilterClass kf = new KeywordsFilterClass(); keywords = kf.BadwordInKeywords(keywords, badwords); return keywords; } #endregion } #endregion #region 关键字过滤类 /// <summary> /// 关键字过滤类 /// </summary> public class KeywordsFilterClass { private Dictionary<string, object> hash = new Dictionary<string, object>(); //脏字字典 开头脏字存储 private BitArray firstCharCheck = new BitArray(char.MaxValue); //脏字字典 单个char存储 private BitArray allCharCheck = new BitArray(char.MaxValue); private int maxLength = 0; /// <summary> /// 初始化 已存储的 过滤字符串 /// </summary> /// <param name="words"></param> private void InitHash(List<string> badwords) { foreach (string word in badwords) { //保存字典内不存在的脏字 if (!hash.ContainsKey(word)) { hash.Add(word, null); //设置脏字计算长度 this.maxLength = Math.Max(this.maxLength, word.Length); firstCharCheck[word[0]] = true; foreach (char c in word) { allCharCheck[c] = true; } } } } /// <summary> /// 替换字符串中的脏字为指定的字符 /// </summary> /// <param name="text"></param> /// <returns></returns> public string BadwordInKeywords(string text, List<string> badwords) { //初始化 脏字字典 this.InitHash(badwords); int index = 0; while (index < text.Length) { //判断开头脏字 if (!firstCharCheck[text[index]]) { //未找到开头脏字 则索引累加 while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ; } for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++) { if (!allCharCheck[text[index + j - 1]]) { break; } string sub = text.Substring(index, j); if (hash.ContainsKey(sub)) { text = text.Replace(sub, "**"); //this.InitHash(badwords); index += j; break; } } index++; } return text; } } #endregion }