敏感词过滤

分享一个敏感词过滤方法

  1     public class WordFilterController : BaseController
  2     {
  3         /// <summary>  
  4         /// 敏感字数组  
  5         /// </summary>  
  6         public static string[] s_filters = null;
  7 
  8         /// <summary>  
  9         /// 初始化s_filters之后调用filter函数  
 10         /// </summary>  
 11         /// <param name="content">欲过滤的内容</param>  
 12         /// <param name="result_str">执行过滤之后的内容</param>  
 13         /// <param name="filter_deep">检测深度,即s_filters数组中的每个词中的插入几个字以内会被过滤掉,例:检测深度为2,s_filters中有个词是中国,那么“中国”、“中*国”,“中**国”都会被过滤掉(*是任意字)。</param>  
 14         /// <param name="check_only">是否只检测而不执行过滤操作</param>  
 15         /// <param name="bTrim">过滤之前是否要去掉头尾的空字符</param>  
 16         /// <param name="replace_str">将检测到的敏感字替换成的字符</param>  
 17         /// <returns></returns>  
 18         public static bool Filter(string content, out string result_str, int filter_deep = 1, bool check_only = false, bool bTrim = false, string replace_str = "*")
 19         {
 20             string result = content;
 21             if (bTrim)
 22             {
 23                 result = result.Trim();
 24             }
 25             result_str = result;
 26 
 27             if (s_filters == null)
 28             {
 29                 return false;
 30             }
 31 
 32             bool check = false;
 33             foreach (string str in s_filters)
 34             {
 35                 string s = str.Replace(replace_str, "");
 36                 if (s.Length == 0)
 37                 {
 38                     continue;
 39                 }
 40 
 41                 bool bFiltered = true;
 42                 while (bFiltered)
 43                 {
 44                     int result_index_start = -1;
 45                     int result_index_end = -1;
 46                     int idx = 0;
 47                     while (idx < s.Length)
 48                     {
 49                         string one_s = s.Substring(idx, 1);
 50                         if (one_s == replace_str)
 51                         {
 52                             continue;
 53                         }
 54                         if (result_index_end + 1 >= result.Length)
 55                         {
 56                             bFiltered = false;
 57                             break;
 58                         }
 59                         int new_index = result.IndexOf(one_s, result_index_end + 1, StringComparison.OrdinalIgnoreCase);
 60                         if (new_index == -1)
 61                         {
 62                             bFiltered = false;
 63                             break;
 64                         }
 65                         if (idx > 0 && new_index - result_index_end > filter_deep + 1)
 66                         {
 67                             bFiltered = false;
 68                             break;
 69                         }
 70                         result_index_end = new_index;
 71 
 72                         if (result_index_start == -1)
 73                         {
 74                             result_index_start = new_index;
 75                         }
 76                         idx++;
 77                     }
 78 
 79                     if (bFiltered)
 80                     {
 81                         if (check_only)
 82                         {
 83                             return true;
 84                         }
 85                         check = true;
 86                         string result_left = result.Substring(0, result_index_start);
 87                         for (int i = result_index_start; i <= result_index_end; i++)
 88                         {
 89                             result_left += replace_str;
 90                         }
 91                         string result_right = result.Substring(result_index_end + 1);
 92                         result = result_left + result_right;
 93                     }
 94                 }
 95             }
 96             result_str = result;
 97             return check;
 98         }
 99 
100     }

我用的是MVC框架,直接重写DefaultModelBinder里面的BindModel方法

 1     public class FilterModelBinder : DefaultModelBinder
 2     {
 3         public override object BindModel(ControllerContext controllerContext, ModelBindingContext bindingContext)
 4         {
 5             var value = base.BindModel(controllerContext, bindingContext);
 6             if (bindingContext.ModelType == typeof(string))
 7             {
 8                 string return_value = string.Empty;
 9                 WordFilterController.s_filters = new AppDbContext().WordFilters.Select(x => x.Word).ToArray();
10                 var flag =  WordFilterController.Filter(value as string,out return_value);
11                 if (flag)
12                 {
13                     return return_value;
14                 }
15             }
16             return value;
17         }
18     }

在网上还查到另一种重写IModelBinder的方法也可以实现,这边就不记录了

最后Global.asax在Application_Start()方法中加上

ModelBinders.Binders.DefaultBinder = new FilterModelBinder();

搞定~

 

【2018.04.09】更新

在后续项目测试中发现,这样给全部的model都绑上敏感词过滤有些蠢。主要是1.开发敏感词管理模块时,会自己把自己和谐了2.一些敏感词是数字或字符可能会和GUID冲突导致值传递出现BUG

所以稍微改了下,只针对部分entity进行过滤,并且排除ID

这是新的ModelBinder,这次只重写SetProperty方法

 1     public class WordFilterModelBinder : DefaultModelBinder
 2     {
 3         protected override void SetProperty(ControllerContext controllerContext, ModelBindingContext bindingContext, System.ComponentModel.PropertyDescriptor propertyDescriptor, object value)
 4         {                      
 5             if (propertyDescriptor.PropertyType == typeof(string) && propertyDescriptor.Name.ToLowerInvariant() != "id")
 6             {
 7                 WordFilterService.s_filters = (from x in new LibraryContext().WordFilters select x.Word).ToArray<string>();
 8                 var stringValue = (string)value;
 9                 bool flag = WordFilterService.filter(value as string, out stringValue, 1, false, false, "*");
10                 if (flag)
11                 {
12                     value = stringValue;
13                 }
14             }
15             base.SetProperty(controllerContext, bindingContext, propertyDescriptor, value);
16         }
17     }

然后同样修改Global.asax的Application_Start()方法

1             //ModelBinders.Binders.DefaultBinder = new SSOCT.Library.CustomModelBinder();
2             ModelBinders.Binders.Add(typeof(DAL.Models.Activity), new WordFilterModelBinder());
3             ModelBinders.Binders.Add(typeof(DAL.Models.Comment), new WordFilterModelBinder());

这样就可以针对性的进行过滤了,但是过滤层级只到Entity,不能精确到只针对某个字段

可能有更好的办法,现在想是用Attribute应该可以实现针对字段过滤,等后续再更新……

 

 

另外,敏感词库我也找了个比较全【2017版】的,直接发在这边了(直接复制链接下载):

https://files.cnblogs.com/files/cn2018/mgck2017.rar

posted @ 2018-04-02 14:32  AdolphChen  阅读(609)  评论(0编辑  收藏  举报