(来自QQ群"MSDN 微软开发者网络",作者:DEVIN  )


  1 using System;
  2 using System.Text;
  3 using System.Text.RegularExpressions;
  5 namespace Onfly.Common.Utility
  6 {
  7     /// <summary>
  8     /// 过滤类
  9     /// </summary>
 10     public class Filter
 11     {
 12         /// <summary>
 13         /// 需要过滤的字符(多个以|相隔)
 14         /// </summary>
 15         public static String keyWord = "";
 16         /// <summary>
 17         /// 需要过滤的字符(多个以|相隔)
 18         /// </summary>
 19         public static String KeyWord
 20         {
 21             get { return keyWord; }
 22             set { keyWord = value; }
 23         }
 24         /// <summary>
 25         /// 过滤 javascript
 26         /// </summary>
 27         /// <param name="content">需过滤文本内容</param>
 28         /// <returns></returns>
 29         public static String FilterScript(String content)
 30         {
 31             String commentPattern = @"(?'comment'<!--.*?--[ \n\r]*>)";
 32             String embeddedScriptComments = @"(\/\*.*?\*\/|\/\/.*?[\n\r])";
 33             String scriptPattern = String.Format(@"(?'script'<[ \n\r]*script[^>]*>(.*?{0}?)*<[ \n\r]*/script[^>]*>)", embeddedScriptComments);
 34             String pattern = String.Format(@"(?s)({0}|{1})", commentPattern, scriptPattern);
 35             return StripScriptAttributesFromTags(Regex.Replace(content, pattern, String.Empty, RegexOptions.IgnoreCase));
 36         }
 37         /// <summary>
 38         /// 过滤javascript属性值(如onclick等)
 39         /// </summary>
 40         /// <param name="content">需过滤文本内容</param>
 41         /// <returns></returns>
 42         private static String StripScriptAttributesFromTags(String content)
 43         {
 44             String eventAttribs = @"on(blur|c(hange|lick)|dblclick|focus|keypress|(key|mouse)(down|up)|(un)?load
 45                     |mouse(move|o(ut|ver))|reset|s(elect|ubmit))";
 47             String pattern = String.Format(@"(?inx)
 48                 \<(\w+)\s+
 49                     (
 50                         (?'attribute'
 51                         (?'attributeName'{0})\s*=\s*
 52                         (?'delim'['""]?)
 53                         (?'attributeValue'[^'"">]+)
 54                         (\3)
 55                     )
 56                     |
 57                     (?'attribute'
 58                         (?'attributeName'href)\s*=\s*
 59                         (?'delim'['""]?)
 60                         (?'attributeValue'javascript[^'"">]+)
 61                         (\3)
 62                     )
 63                     |
 64                     [^>]
 65                 )*
 66             \>", eventAttribs);
 67             Regex re = new Regex(pattern);
 68             // 使用MatchEvaluator的委托
 69             return re.Replace(content, new MatchEvaluator(StripAttributesHandler));
 70         }
 71         /// <summary>
 72         /// 取得属性值
 73         /// </summary>
 74         /// <param name="m"></param>
 75         /// <returns></returns>
 76         private static String StripAttributesHandler(Match m)
 77         {
 78             if (m.Groups["attribute"].Success)
 79             {
 80                 return m.Value.Replace(m.Groups["attribute"].Value, "");
 81             }
 82             else
 83             {
 84                 return m.Value;
 85             }
 86         }
 87         /// <summary>
 88         /// 去掉javascript(scr链接方式)
 89         /// </summary>
 90         /// <param name="content">需过滤文本内容</param>
 91         /// <returns></returns>
 92         public static String FilterAHrefScript(String content)
 93         {
 94             String newstr = FilterScript(content);
 95             String regexstr = @" href[ ^=]*= *[\s\S]*script *:";
 96             return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
 97         }
 98         /// <summary>
 99         /// 去掉链接文件
100         /// </summary>
101         /// <param name="content">需过滤文本内容</param>
102         /// <returns></returns>
103         public static String FilterSrc(String content)
104         {
105             String newstr = FilterScript(content);
106             String regexstr = @" src *= *['""]?[^\.]+\.(js|vbs|asp|aspx|php|jsp)['""]";
107             return Regex.Replace(newstr, regexstr, @"", RegexOptions.IgnoreCase);
108         }
109         /// <summary>
110         /// 过滤HTML
111         /// </summary>
112         /// <param name="content">需过滤文本内容</param>
113         /// <returns></returns>
114         public static String FilterHtml(String content)
115         {
116             String newstr = FilterScript(content);
117             String regexstr = @"<[^>]*>";
118             return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
119         }
120         /// <summary>
121         /// 过滤 OBJECT
122         /// </summary>
123         /// <param name="content">需过滤文本内容</param>
124         /// <returns></returns>
125         public static String FilterObject(String content)
126         {
127             String regexstr = @"(?i)<Object([^>])*>(\w|\W)*</Object([^>])*>";
128             return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
129         }
130         /// <summary>
131         /// 过滤iframe
132         /// </summary>
133         /// <param name="content">需过滤文本内容</param>
134         /// <returns></returns>
135         public static String FilterIframe(String content)
136         {
137             String regexstr = @"(?i)<Iframe([^>])*>(\w|\W)*</Iframe([^>])*>";
138             return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
139         }
140         /// <summary>
141         /// 过滤frameset
142         /// </summary>
143         /// <param name="content">需过滤文本内容</param>
144         /// <returns></returns>
145         public static String FilterFrameset(String content)
146         {
147             String regexstr = @"(?i)<Frameset([^>])*>(\w|\W)*</Frameset([^>])*>";
148             return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
149         }
150         /// <summary>
151         /// 移除非法或不友好字符
152         /// </summary>
153         /// <param name="content">关键字列表,多个以 | 分隔</param>
154         /// <returns></returns>
155         public static String FilterBadWords(String content)
156         {
157             //这里的非法和不友好字符由你任意加,用“|”分隔,支持正则表达式,由于本Blog禁止贴非法和不友好字符,所以这里无法加上。
158             if (content == "")
159                 return "";
160             String[] bwords = keyWord.Split('|');
161             if (bwords.Length < 1return content;
162             int i, j;
163             String str;
164             StringBuilder sb = new StringBuilder();
165             for (i = 0; i < bwords.Length; i++)
166             {
167                 str = bwords[i].ToString().Trim();
168                 String regStr, toStr;
169                 regStr = str;
170                 Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
171                 Match m = r.Match(content);
172                 if (m.Success)
173                 {
174                     j = m.Value.Length;
175                     sb.Insert(0"*", j);
176                     toStr = sb.ToString();
177                     content = Regex.Replace(content, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
178                 }
179                 sb.Remove(0, sb.Length);
180             }
181             return content;
182         }
183         /// <summary>
184         /// 过滤以上所有
185         /// </summary>
186         /// <param name="content">需过滤文本内容</param>
187         /// <returns></returns>
188         public static String FilterAll(String content)
189         {
190             content = FilterHtml(content);
191             content = FilterScript(content);
192             content = FilterAHrefScript(content);
193             content = FilterObject(content);
194             content = FilterIframe(content);
195             content = FilterFrameset(content);
196             content = FilterSrc(content);
197             content = FilterBadWords(content);
198             return content;
199         }
200     }
201 }
