过滤HTML标签

View Code
 1   /// <summary>
 2     /// 过滤字符
 3     /// </summary>
 4     /// <param name="html"></param>
 5     /// <returns></returns>
 6     public string checkStr(string html)
 7     {
 8         System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
 9         System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
10         System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
11         System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
12         System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
13         System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
14         System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
15         System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
16         System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
17          
18         // 过滤 <!--  -->
19         System.Text.RegularExpressions.Regex regex10 = new System.Text.RegularExpressions.Regex(@"<!--[\s\S]*?-->", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
20          // 过滤 <script>...</script>
21         System.Text.RegularExpressions.Regex regex11 = new System.Text.RegularExpressions.Regex(@"<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
22         // 过滤 <style>...</style>
23         System.Text.RegularExpressions.Regex regex12 = new System.Text.RegularExpressions.Regex(@"<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?style[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
24         // 过滤事作 匹配如:<img onclick="if(this.width>1000) return false;"/>
25         System.Text.RegularExpressions.Regex regex13 = new System.Text.RegularExpressions.Regex(@"<[^>]*?([^>]*?[\s]on[\w]+[\s]*?=[\s]*?([""']?)([^\2]+?)\2)+[^>]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
26         // 过滤HTML标签
27         System.Text.RegularExpressions.Regex regex14 = new System.Text.RegularExpressions.Regex(@"<[\s\S]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
28        
29         html = regex1.Replace(html, ""); //过滤<script></script>标记
30         html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
31         html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
32         html = regex4.Replace(html, ""); //过滤iframe
33         html = regex5.Replace(html, ""); //过滤frameset
34         html = regex6.Replace(html, ""); //过滤frameset
35         html = regex7.Replace(html, ""); //过滤frameset
36         html = regex8.Replace(html, ""); //过滤frameset
37         html = regex9.Replace(html, "");
38 
39         html = regex10.Replace(html, "");
40         html = regex11.Replace(html, "");
41         html = regex12.Replace(html, "");
42         html = regex13.Replace(html, "");
43         html = regex14.Replace(html, "");
44         
45         html = html.Replace(" ", "");
46         html = html.Replace("<>", "");
47         html = html.Replace("<strong>", "");
48         html = html.Replace("&lt;br&gt;", "");
49         html = html.Replace("\r", "");
50         html = html.Replace("\n", "");
51         html = html.Replace("&lt;p&gt;", "");
52         html = html.Replace("&lt;/p&gt;", "");
53         html = html.Replace("&lt;BR&gt", "");
54         html = html.Replace("&amp;", "");
55         html = html.Replace("&lt;P&gt;", "");
56         html = html.Replace("&lt;/P&gt;&lt;P&gt;&lt;/P&gt;", "");
57         html = html.Replace("&lt;/P&gt;&lt;/P&gt;", "");
58         html = html.Replace("/Div;", "");
59         html = html.Replace("Div;", "");
60         html = html.Replace("/div;", "");
61         html = html.Replace("div;", "");
62         html = html.Replace("nbsp;", "");
63         html = html.Replace("style=&quot;LINE-HEIGHT: 22px;", "");
64         html = html.Replace("class=p1", "");
65         
66         return html;
67     }

 

posted @ 2013-03-20 11:36  Hello Bug  阅读(265)  评论(0编辑  收藏  举报