过滤HTML标签
View Code
1 /// <summary> 2 /// 过滤字符 3 /// </summary> 4 /// <param name="html"></param> 5 /// <returns></returns> 6 public string checkStr(string html) 7 { 8 System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 9 System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 10 System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 11 System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 12 System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 13 System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 14 System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 15 System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 16 System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 17 18 // 过滤 <!-- --> 19 System.Text.RegularExpressions.Regex regex10 = new System.Text.RegularExpressions.Regex(@"<!--[\s\S]*?-->", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 20 // 过滤 <script>...</script> 21 System.Text.RegularExpressions.Regex regex11 = new System.Text.RegularExpressions.Regex(@"<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 22 // 过滤 <style>...</style> 23 System.Text.RegularExpressions.Regex regex12 = new System.Text.RegularExpressions.Regex(@"<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?style[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 24 // 过滤事作 匹配如:<img onclick="if(this.width>1000) return false;"/> 25 System.Text.RegularExpressions.Regex regex13 = new System.Text.RegularExpressions.Regex(@"<[^>]*?([^>]*?[\s]on[\w]+[\s]*?=[\s]*?([""']?)([^\2]+?)\2)+[^>]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 26 // 过滤HTML标签 27 System.Text.RegularExpressions.Regex regex14 = new System.Text.RegularExpressions.Regex(@"<[\s\S]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase); 28 29 html = regex1.Replace(html, ""); //过滤<script></script>标记 30 html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性 31 html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件 32 html = regex4.Replace(html, ""); //过滤iframe 33 html = regex5.Replace(html, ""); //过滤frameset 34 html = regex6.Replace(html, ""); //过滤frameset 35 html = regex7.Replace(html, ""); //过滤frameset 36 html = regex8.Replace(html, ""); //过滤frameset 37 html = regex9.Replace(html, ""); 38 39 html = regex10.Replace(html, ""); 40 html = regex11.Replace(html, ""); 41 html = regex12.Replace(html, ""); 42 html = regex13.Replace(html, ""); 43 html = regex14.Replace(html, ""); 44 45 html = html.Replace(" ", ""); 46 html = html.Replace("<>", ""); 47 html = html.Replace("<strong>", ""); 48 html = html.Replace("<br>", ""); 49 html = html.Replace("\r", ""); 50 html = html.Replace("\n", ""); 51 html = html.Replace("<p>", ""); 52 html = html.Replace("</p>", ""); 53 html = html.Replace("<BR>", ""); 54 html = html.Replace("&", ""); 55 html = html.Replace("<P>", ""); 56 html = html.Replace("</P><P></P>", ""); 57 html = html.Replace("</P></P>", ""); 58 html = html.Replace("/Div;", ""); 59 html = html.Replace("Div;", ""); 60 html = html.Replace("/div;", ""); 61 html = html.Replace("div;", ""); 62 html = html.Replace("nbsp;", ""); 63 html = html.Replace("style="LINE-HEIGHT: 22px;", ""); 64 html = html.Replace("class=p1", ""); 65 66 return html; 67 }