DFA算法C#实现
/// <summary> /// 过滤词DFA算法实现 /// </summary> public class ForbiddentWordLibrary { /// <summary> /// 用分行过滤词文件来初始化过滤词库 /// </summary> /// <param name="path">文件路径</param> public ForbiddentWordLibrary( string path ) { try { words = new HashSet<string>(); using( var stream = new StreamReader( path, Encoding.UTF8 ) ) { while( !stream.EndOfStream ) { words.Add( stream.ReadLine().Trim() ); } } InitLibrary(); } catch( Exception ex ) { throw ex; } } /// <summary> /// 找到输入字符串内所有敏感词 /// </summary> /// <param name="input"></param> /// <returns></returns> public List<string> GetAllForbiddenWords( string input ) { List<string> result = new List<string>(); for( int i = 0; i < input.Length; i++ ) { int length = SearchFW( input, i ); if( length > 0 ) { result.Add( input.Substring( i, length ) ); i = i + length - 1; } } return result; } /// <summary> /// 搜索输入的字符串,查找所有敏感词,找到则返回敏感词长度 /// </summary> /// <param name="input">输入字符串</param> /// <param name="beginIndex">查找的起始位置</param> /// <returns></returns> private int SearchFW( string input, int beginIndex ) { bool flag = false; int len = 0; Hashtable ht = lib; for( int i = beginIndex; i < input.Length; i++ ) { var c = input[ i ]; var obj = ht[ c.ToString() ]; if( obj == null ) break; else { len++; ht = (Hashtable)obj; if( (int)ht[ "IsEnd" ] == 1 ) flag = true; } } if( !flag ) len = 0; return len; } /// <summary> /// 初始化词库结构 /// </summary> private void InitLibrary() { lib = new Hashtable( words.Count ); var tmp = lib; foreach( string k in words ) { for( int i = 0; i < k.Length; i++ ) { var c = k[ i ].ToString(); if( tmp.ContainsKey( c ) ) { tmp = (Hashtable)tmp[ c ]; } else { var nht = new Hashtable(); nht.Add( "IsEnd", 0 ); tmp.Add( c, nht ); tmp = nht; } if( i == k.Length - 1 ) { if( tmp.ContainsKey( "IsEnd" ) ) tmp[ "IsEnd" ] = 1; else tmp.Add( "IsEnd", 1 ); } } tmp = lib; } } /// <summary> /// 原始过滤词数据集 /// </summary> private HashSet<string> words; /// <summary> /// 过滤词库 /// </summary> private Hashtable lib; }