分享一个敏感词过滤方法数组
1 public class WordFilterController : BaseController 2 { 3 /// <summary> 4 /// 敏感字数组 5 /// </summary> 6 public static string[] s_filters = null; 7 8 /// <summary> 9 /// 初始化s_filters以后调用filter函数 10 /// </summary> 11 /// <param name="content">欲过滤的内容</param> 12 /// <param name="result_str">执行过滤以后的内容</param> 13 /// <param name="filter_deep">检测深度,即s_filters数组中的每一个词中的插入几个字之内会被过滤掉,例:检测深度为2,s_filters中有个词是中国,那么“中国”、“中*国”,“中**国”都会被过滤掉(*是任意字)。</param> 14 /// <param name="check_only">是否只检测而不执行过滤操做</param> 15 /// <param name="bTrim">过滤以前是否要去掉头尾的空字符</param> 16 /// <param name="replace_str">将检测到的敏感字替换成的字符</param> 17 /// <returns></returns> 18 public static bool Filter(string content, out string result_str, int filter_deep = 1, bool check_only = false, bool bTrim = false, string replace_str = "*") 19 { 20 string result = content; 21 if (bTrim) 22 { 23 result = result.Trim(); 24 } 25 result_str = result; 26 27 if (s_filters == null) 28 { 29 return false; 30 } 31 32 bool check = false; 33 foreach (string str in s_filters) 34 { 35 string s = str.Replace(replace_str, ""); 36 if (s.Length == 0) 37 { 38 continue; 39 } 40 41 bool bFiltered = true; 42 while (bFiltered) 43 { 44 int result_index_start = -1; 45 int result_index_end = -1; 46 int idx = 0; 47 while (idx < s.Length) 48 { 49 string one_s = s.Substring(idx, 1); 50 if (one_s == replace_str) 51 { 52 continue; 53 } 54 if (result_index_end + 1 >= result.Length) 55 { 56 bFiltered = false; 57 break; 58 } 59 int new_index = result.IndexOf(one_s, result_index_end + 1, StringComparison.OrdinalIgnoreCase); 60 if (new_index == -1) 61 { 62 bFiltered = false; 63 break; 64 } 65 if (idx > 0 && new_index - result_index_end > filter_deep + 1) 66 { 67 bFiltered = false; 68 break; 69 } 70 result_index_end = new_index; 71 72 if (result_index_start == -1) 73 { 74 result_index_start = new_index; 75 } 76 idx++; 77 } 78 79 if (bFiltered) 80 { 81 if (check_only) 82 { 83 return true; 84 } 85 check = true; 86 string result_left = result.Substring(0, result_index_start); 87 for (int i = result_index_start; i <= result_index_end; i++) 88 { 89 result_left += replace_str; 90 } 91 string result_right = result.Substring(result_index_end + 1); 92 result = result_left + result_right; 93 } 94 } 95 } 96 result_str = result; 97 return check; 98 } 99 100 }
我用的是MVC框架,直接重写DefaultModelBinder里面的BindModel方法框架
1 public class FilterModelBinder : DefaultModelBinder 2 { 3 public override object BindModel(ControllerContext controllerContext, ModelBindingContext bindingContext) 4 { 5 var value = base.BindModel(controllerContext, bindingContext); 6 if (bindingContext.ModelType == typeof(string)) 7 { 8 string return_value = string.Empty; 9 WordFilterController.s_filters = new AppDbContext().WordFilters.Select(x => x.Word).ToArray(); 10 var flag = WordFilterController.Filter(value as string,out return_value); 11 if (flag) 12 { 13 return return_value; 14 } 15 } 16 return value; 17 } 18 }
在网上还查到另外一种重写IModelBinder的方法也能够实现,这边就不记录了ide
最后Global.asax在Application_Start()方法中加上函数
ModelBinders.Binders.DefaultBinder = new FilterModelBinder();
搞定~测试
【2018.04.09】更新spa
在后续项目测试中发现,这样给所有的model都绑上敏感词过滤有些蠢。主要是1.开发敏感词管理模块时,会本身把本身和谐了2.一些敏感词是数字或字符可能会和GUID冲突致使值传递出现BUGcode
因此稍微改了下,只针对部分entity进行过滤,而且排除IDblog
这是新的ModelBinder,此次只重写SetProperty方法ip
1 public class WordFilterModelBinder : DefaultModelBinder 2 { 3 protected override void SetProperty(ControllerContext controllerContext, ModelBindingContext bindingContext, System.ComponentModel.PropertyDescriptor propertyDescriptor, object value) 4 { 5 if (propertyDescriptor.PropertyType == typeof(string) && propertyDescriptor.Name.ToLowerInvariant() != "id") 6 { 7 WordFilterService.s_filters = (from x in new LibraryContext().WordFilters select x.Word).ToArray<string>(); 8 var stringValue = (string)value; 9 bool flag = WordFilterService.filter(value as string, out stringValue, 1, false, false, "*"); 10 if (flag) 11 { 12 value = stringValue; 13 } 14 } 15 base.SetProperty(controllerContext, bindingContext, propertyDescriptor, value); 16 } 17 }
而后一样修改Global.asax的Application_Start()方法开发
1 //ModelBinders.Binders.DefaultBinder = new SSOCT.Library.CustomModelBinder(); 2 ModelBinders.Binders.Add(typeof(DAL.Models.Activity), new WordFilterModelBinder()); 3 ModelBinders.Binders.Add(typeof(DAL.Models.Comment), new WordFilterModelBinder());
这样就能够针对性的进行过滤了,可是过滤层级只到Entity,不能精确到只针对某个字段
可能有更好的办法,如今想是用Attribute应该能够实现针对字段过滤,等后续再更新……
另外,敏感词库我也找了个比较全【2017版】的,直接发在这边了(直接复制连接下载):
https://files.cnblogs.com/files/cn2018/mgck2017.rar