C# 删除String中的HTML标记的类(转载)
1using System;
2using System.Data;
3using System.Configuration;
4using System.Web;
5using System.Text.RegularExpressions;
6
7namespace ConsoleApplication1
8{
9 class Class1
10 {
11 /// <summary>
12 /// 去除HTML标记
13 /// </summary>
14 /// <param name="strHtml">包括HTML的源码 </param>
15 /// <returns>已经去除后的文字</returns>
16 public static string StripHTML(string strHtml)
17 {
18 string[] aryReg ={
19 @"<script[^>]*?>.*?</script>",
20
21 @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
22 @"([\r\n])[\s]+",
23 @"&(quot|#34);",
24 @"&(amp|#38);",
25
26 @"&(lt|#60);",
27 @"&(gt|#62);",
28 @"&(nbsp|#160);",
29 @"&(iexcl|#161);",
30 @"&(cent|#162);",
31 @"&(pound|#163);",
32 @"&(copy|#169);",
33 @"&#(\d+);",
34 @"-->",
35 @"<!--.*\n"
36
37 };
38
39 string[] aryRep = {
40 "",
41 "",
42 "",
43 "\"",
44 "&",
45 "<",
46 ">",
47 " ",
48 "\xa1",//chr(161),
49 "\xa2",//chr(162),
50 "\xa3",//chr(163),
51 "\xa9",//chr(169),
52 "",
53 "\r\n",
54 ""
55 };
56
57 string newReg = aryReg[0];
58 string strOutput = strHtml;
59 for (int i = 0; i < aryReg.Length; i++)
60 {
61 Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
62 strOutput = regex.Replace(strOutput, aryRep[i]);
63 }
64
65 strOutput.Replace("<", "");
66 strOutput.Replace(">", "");
67 strOutput.Replace("\r\n", "");
68
69
70 return strOutput;
71 }
72 }
73}
74
75
76
2using System.Data;
3using System.Configuration;
4using System.Web;
5using System.Text.RegularExpressions;
6
7namespace ConsoleApplication1
8{
9 class Class1
10 {
11 /// <summary>
12 /// 去除HTML标记
13 /// </summary>
14 /// <param name="strHtml">包括HTML的源码 </param>
15 /// <returns>已经去除后的文字</returns>
16 public static string StripHTML(string strHtml)
17 {
18 string[] aryReg ={
19 @"<script[^>]*?>.*?</script>",
20
21 @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
22 @"([\r\n])[\s]+",
23 @"&(quot|#34);",
24 @"&(amp|#38);",
25
26 @"&(lt|#60);",
27 @"&(gt|#62);",
28 @"&(nbsp|#160);",
29 @"&(iexcl|#161);",
30 @"&(cent|#162);",
31 @"&(pound|#163);",
32 @"&(copy|#169);",
33 @"&#(\d+);",
34 @"-->",
35 @"<!--.*\n"
36
37 };
38
39 string[] aryRep = {
40 "",
41 "",
42 "",
43 "\"",
44 "&",
45 "<",
46 ">",
47 " ",
48 "\xa1",//chr(161),
49 "\xa2",//chr(162),
50 "\xa3",//chr(163),
51 "\xa9",//chr(169),
52 "",
53 "\r\n",
54 ""
55 };
56
57 string newReg = aryReg[0];
58 string strOutput = strHtml;
59 for (int i = 0; i < aryReg.Length; i++)
60 {
61 Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
62 strOutput = regex.Replace(strOutput, aryRep[i]);
63 }
64
65 strOutput.Replace("<", "");
66 strOutput.Replace(">", "");
67 strOutput.Replace("\r\n", "");
68
69
70 return strOutput;
71 }
72 }
73}
74
75
76