String文本去除HTML格式

View Code
1 /// <summary>
2 /// String文本去除HTML格式
3 /// </summary>
4 /// <param name="Htmlstring"></param>
5 /// <returns></returns>
6 public static string NoHTML(string Htmlstring)
7 {
8
9 //删除脚本
10 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
11 //删除HTML
12 Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
13 Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
14
15 Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
16
17 Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
18
19 Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
20
21 Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
22
23 Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
24
25 Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
26
27 Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
28
29 Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
30
31 Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
32
33 Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
34
35 Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
36
37 Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
38
39 Htmlstring.Replace("<", "");
40
41 Htmlstring.Replace(">", "");
42
43 Htmlstring.Replace("\r\n", "");
44
45 Htmlstring = System.Web.HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
46
47
48 return Htmlstring;
49
50 }
posted @ 2011-03-24 13:00  Barret  阅读(431)  评论(0编辑  收藏  举报