BadWordParse 类:

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.IO;
namespace charCheck
{
public class BadWordParse
{
private HashSet<string> hash = new HashSet<string>();
private byte[] fastCheck = new byte[char.MaxValue];
private BitArray charCheck = new BitArray(char.MaxValue);
private int maxWordLength = 0;
private int minWordLength = int.MaxValue;
private bool _isHave = false;
private string _replaceString = "*";
private char _splitString = '|';
private string _newWord;
private string _badWordFilePath;
/// <summary>
/// 是否含有脏字
/// </summary>
public bool IsHave
{
get { return _isHave; }
}
/// <summary>
/// 替换后字符串
/// </summary>
public string ReplaceString
{
set { _replaceString = value; }
}
/// <summary>
/// 脏字字典切割符
/// </summary>
public char SplitString
{
set { _splitString = value; }
}
/// <summary>
/// 更新后的字符串
/// </summary>
public string NewWord
{
get { return _newWord; }
}
/// <summary>
/// 脏字字典文档路径
/// </summary>
public string BadWordFilePath
{
get { return _badWordFilePath; }
set { _badWordFilePath = value; }
}
public BadWordParse(string filePath)
{
_badWordFilePath = filePath;
string srList = string.Empty;
if (File.Exists(_badWordFilePath))
{
StreamReader sr = new StreamReader(_badWordFilePath, Encoding.GetEncoding("gb2312"));
srList = sr.ReadToEnd();
sr.Close();
sr.Dispose();
}
string[] badwords = srList.Split('|');
foreach (string word in badwords)
{
maxWordLength = Math.Max(maxWordLength, word.Length);
minWordLength = Math.Min(minWordLength, word.Length);
for (int i = 0; i < 7 && i < word.Length; i++)
{
fastCheck[word[i]] |= (byte)(1 << i);
}
for (int i = 7; i < word.Length; i++)
{
fastCheck[word[i]] |= 0x80;
}
if (word.Length == 1)
{
charCheck[word[0]] = true;
}
else
{
hash.Add(word);
}
}
}
public bool HasBadWord(string text)
{
int index = 0;
while (index < text.Length)
{
if ((fastCheck[text[index]] & 1) == 0)
{
while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
}
//单字节检测
if (minWordLength == 1 && charCheck[text[index]])
{
return true;
}
//多字节检测
for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
{
//快速排除
if ((fastCheck[text[index + j]] & (1 << Math.Min(j, 7))) == 0)
{
break;
}
if (j + 1 >= minWordLength)
{
string sub = text.Substring(index, j + 1);
if (hash.Contains(sub))
{
return true;
}
}
}
index++;
}
return false;
}
public string ReplaceBadWord(string text)
{
int index = 0;
for (index = 0; index < text.Length; index++)
{
if ((fastCheck[text[index]] & 1) == 0)
{
while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
}
//单字节检测
if (minWordLength == 1 && charCheck[text[index]])
{
//return true;
_isHave = true;
text = text.Replace(text[index], _replaceString[0]);
continue;
}
//多字节检测
for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
{
//快速排除
if ((fastCheck[text[index + j]] & (1 << Math.Min(j, 7))) == 0)
{
break;
}
if (j + 1 >= minWordLength)
{
string sub = text.Substring(index, j + 1);
if (hash.Contains(sub))
{
//替换字符操作
_isHave = true;
char cc = _replaceString[0];
string rp = _replaceString.PadRight((j + 1), cc);
text = text.Replace(sub, rp);
//记录新位置
index += j;
break;
}
}
}
}
_newWord = text;
return text;
}
}
}
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.IO;
namespace charCheck
{
public class BadWordParse
{
private HashSet<string> hash = new HashSet<string>();
private byte[] fastCheck = new byte[char.MaxValue];
private BitArray charCheck = new BitArray(char.MaxValue);
private int maxWordLength = 0;
private int minWordLength = int.MaxValue;
private bool _isHave = false;
private string _replaceString = "*";
private char _splitString = '|';
private string _newWord;
private string _badWordFilePath;
/// <summary>
/// 是否含有脏字
/// </summary>
public bool IsHave
{
get { return _isHave; }
}
/// <summary>
/// 替换后字符串
/// </summary>
public string ReplaceString
{
set { _replaceString = value; }
}
/// <summary>
/// 脏字字典切割符
/// </summary>
public char SplitString
{
set { _splitString = value; }
}
/// <summary>
/// 更新后的字符串
/// </summary>
public string NewWord
{
get { return _newWord; }
}
/// <summary>
/// 脏字字典文档路径
/// </summary>
public string BadWordFilePath
{
get { return _badWordFilePath; }
set { _badWordFilePath = value; }
}
public BadWordParse(string filePath)
{
_badWordFilePath = filePath;
string srList = string.Empty;
if (File.Exists(_badWordFilePath))
{
StreamReader sr = new StreamReader(_badWordFilePath, Encoding.GetEncoding("gb2312"));
srList = sr.ReadToEnd();
sr.Close();
sr.Dispose();
}
string[] badwords = srList.Split('|');
foreach (string word in badwords)
{
maxWordLength = Math.Max(maxWordLength, word.Length);
minWordLength = Math.Min(minWordLength, word.Length);
for (int i = 0; i < 7 && i < word.Length; i++)
{
fastCheck[word[i]] |= (byte)(1 << i);
}
for (int i = 7; i < word.Length; i++)
{
fastCheck[word[i]] |= 0x80;
}
if (word.Length == 1)
{
charCheck[word[0]] = true;
}
else
{
hash.Add(word);
}
}
}
public bool HasBadWord(string text)
{
int index = 0;
while (index < text.Length)
{
if ((fastCheck[text[index]] & 1) == 0)
{
while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
}
//单字节检测
if (minWordLength == 1 && charCheck[text[index]])
{
return true;
}
//多字节检测
for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
{
//快速排除
if ((fastCheck[text[index + j]] & (1 << Math.Min(j, 7))) == 0)
{
break;
}
if (j + 1 >= minWordLength)
{
string sub = text.Substring(index, j + 1);
if (hash.Contains(sub))
{
return true;
}
}
}
index++;
}
return false;
}
public string ReplaceBadWord(string text)
{
int index = 0;
for (index = 0; index < text.Length; index++)
{
if ((fastCheck[text[index]] & 1) == 0)
{
while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
}
//单字节检测
if (minWordLength == 1 && charCheck[text[index]])
{
//return true;
_isHave = true;
text = text.Replace(text[index], _replaceString[0]);
continue;
}
//多字节检测
for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
{
//快速排除
if ((fastCheck[text[index + j]] & (1 << Math.Min(j, 7))) == 0)
{
break;
}
if (j + 1 >= minWordLength)
{
string sub = text.Substring(index, j + 1);
if (hash.Contains(sub))
{
//替换字符操作
_isHave = true;
char cc = _replaceString[0];
string rp = _replaceString.PadRight((j + 1), cc);
text = text.Replace(sub, rp);
//记录新位置
index += j;
break;
}
}
}
}
_newWord = text;
return text;
}
}
}
测试代码:

string filePath = "F://charCheck/charCheck/badword.txt";
string testString = "";
System.IO.StreamReader sr = new System.IO.StreamReader(filePath, System.Text.Encoding.GetEncoding("gb2312"));
//testString = sr.ReadToEnd();
sr.Close();
sr.Dispose();
//uint t = GetTickCount();
BadWordParse bwp = new BadWordParse(filePath);
string parsedString = bwp.ReplaceBadWord(testString);
//uint time = GetTickCount() - t;
//Console.Write("使用时间:" + time.ToString());
//Console.Write("\r\n");
//Console.Write("原始字符串" + parsedString);
//Console.Write("\r\n");
//Console.Write("替换后字符串" + parsedString);
string testString = "";
System.IO.StreamReader sr = new System.IO.StreamReader(filePath, System.Text.Encoding.GetEncoding("gb2312"));
//testString = sr.ReadToEnd();
sr.Close();
sr.Dispose();
//uint t = GetTickCount();
BadWordParse bwp = new BadWordParse(filePath);
string parsedString = bwp.ReplaceBadWord(testString);
//uint time = GetTickCount() - t;
//Console.Write("使用时间:" + time.ToString());
//Console.Write("\r\n");
//Console.Write("原始字符串" + parsedString);
//Console.Write("\r\n");
//Console.Write("替换后字符串" + parsedString);
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 零经验选手,Compose 一天开发一款小游戏!
· 因为Apifox不支持离线,我果断选择了Apipost!
· 通过 API 将Deepseek响应流式内容输出到前端
2008-12-23 httpanalyzer 结合 HttpWebRequest Post的运用