[通用类库]去除HTML的类
2007-08-23 10:03 水随风 阅读(296) 评论(0) 收藏 举报 1
using System;
2
using System.Data;
3
using System.Configuration;
4
using System.Web;
5
using System.Text.RegularExpressions;
6
using System.Web.Security;
7
using System.Web.UI;
8
using System.Web.UI.WebControls;
9
using System.Web.UI.WebControls.WebParts;
10
using System.Web.UI.HtmlControls;
11
12
/// <summary>
13
/// Summary description for CutHtml
14
/// </summary>
15
public class CutHtml
16
{
17
public CutHtml()
18
{
19
//
20
// TODO: Add constructor logic here
21
//
22
}
23
public string ScriptHtml(string HtmlStr)
24
{
25
string[] aryReg = {
26
@"<script[^>]*?>.*?</script>",
27
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
28
@"([\r\n])[\s]+",
29
@"&(quot|#34);",
30
@"&(amp|#38);",
31
@"&(lt|#60);",
32
@"&(gt|#62);",
33
@"&(nbsp|#160);",
34
@"&(iexcl|#161);",
35
@"&(cent|#162);",
36
@"&(pound|#163);",
37
@"&(copy|#169);",
38
@"&#(\d+);",
39
@"-->",
40
@"<!--.*\n"
41
};
42
43
string[] aryRep = {
44
"",
45
"",
46
"",
47
"\"",
48
"&",
49
"<",
50
">",
51
" ",
52
"\xa1",//chr(161),
53
"\xa2",//chr(162),
54
"\xa3",//chr(163),
55
"\xa9",//chr(169),
56
"",
57
"\r\n",
58
""
59
};
60
string newReg = aryReg[0];
61
62
string strOutput = HtmlStr;
63
64
for (int i = 0; i < aryReg.Length; i++)
65
{
66
67
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
68
69
strOutput = regex.Replace(strOutput, aryRep[i]);
70
71
}
72
73
strOutput.Replace("<", "");
74
75
strOutput.Replace(">", "");
76
77
strOutput.Replace("\r\n", "");
78
79
return strOutput;
80
}
81
//去除html
82
public static string ParseTags(string HTMLStr)
83
{
84
85
return Regex.Replace(HTMLStr, "<[^>]*>", "");
86
87
}
88
////去除图片标签
89
//public static string GetImgUrl(string HTMLStr)
90
//{
91
92
// string str = string.Empty;
93
94
// string sPattern = @"^<img\s+[^>]*>";
95
96
// Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^>]*>",
97
98
// RegexOptions.Compiled);
99
100
// Match m = r.Match(HTMLStr.ToLower());
101
102
// if (m.Success)
103
104
// str = m.Result("${url}");
105
106
// return str;
107
108
//}
109
110
}
111

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111
