c# 一个应用于网页抓取的方法!有效截取字符串
1 public static string GetWebPageContent(string strUrl, string strBegin, string strEnd, int iBegin, int iEnd, string[] arrOld, string[] arrNew)
2 {
3 string strOriginal = "";
4 string strDestination = "";
5 try
6 {
7 WebClient WClient = new WebClient();
8 WClient.Encoding = System.Text.Encoding.Default;
9 strOriginal = WClient.DownloadString(strUrl);
10 int iii = strOriginal.Length;
11 }
12 catch (Exception e)
13 {
14 throw e;
15 }
16
17 int iStart = 0, iTerminal = 0, iLength = 0;
18 string[] strBeginSeparators = new string[] { strBegin };
19 string[] strEndSeparators = new string[] { strEnd };
20 string[] arrBegin = strOriginal.Split(strBeginSeparators, StringSplitOptions.None);
21 string[] arrEnd = strOriginal.Split(strEndSeparators, StringSplitOptions.RemoveEmptyEntries);
22 if (iBegin < 1 || iBegin >= arrBegin.Length) //判断截取索引开始位置以及需要截取的长度
23 if (arrBegin.Length > 1)
24 iStart = strOriginal.LastIndexOf(strBegin) + strBegin.Length;
25 else //如果不存在分割字符,则把起始截取位置置于末尾
26 iStart = strOriginal.Length;
27 else
28 {
29 int offset;
30 for (int i = 0; i < iBegin; i++)
31 {
32 iStart += arrBegin[i].Length;
33 }
34 iStart += (iBegin - 1) * strBegin.Length + strBegin.Length;
35 if (arrBegin.Length <= 1) //如果不存在分割字符,则把起始截取位置置于末尾
36 iStart=strOriginal.Length;
37 }
38
39 if (iEnd < 1 || iEnd >= arrEnd.Length)
40 {
41 if (strOriginal.LastIndexOf(strEnd) > iStart)
42 {
43 iTerminal = strOriginal.LastIndexOf(strEnd) ;
44 }
45 else //如果后面已经没有结束字符了,则取一个空值
46 iTerminal = iStart ;
47 }
48
49 else
50 {
51 for (int i = 0; i < arrEnd.Length; i++)
52 {
53 if (strOriginal.LastIndexOf(strEnd) <= iStart ) //如果后面已经没有结束字符了,则取一个空值,推出循环
54 {
55 iTerminal = iStart;
56 break;
57 }
58 // 继续取值
59 iTerminal += arrEnd[i].Length + strEnd.Length;
60 if(iTerminal>iStart)
61 {
62 if(arrEnd.Length>iEnd+i)
63 {
64 for (int m = 1; m < iEnd; m++ )
65 {
66 iTerminal += arrEnd[i + m].Length + strEnd.Length;
67 }
68 iTerminal = iTerminal - strEnd.Length;
69 }
70 else
71 {
72 iTerminal = strOriginal.LastIndexOf(strEnd);
73 }
74 break ;
75 }
76 }
77 //
78 }
79
80 iLength = iTerminal - iStart;// +strEnd.Length; 判断如何截取结束
81 try
82 {
83 strDestination = strOriginal.Substring(iStart, iLength);
84 }
85 catch (Exception ex)
86 {
87 throw ex;
88 }
89
90 if (arrOld != null && arrNew != null)
91 {
92 for (int i = 0; i < arrOld.Length; i++)
93 {
94 strDestination = strDestination.Replace(arrOld[i], arrNew[i]);
95 }
96 }
97 if (arrOld != null && arrNew == null)
98 {
99 for (int i = 0; i < arrOld.Length; i++)
100 {
101 strDestination = strDestination.Replace(arrOld[i], "");
102 }
103 }
104 return strDestination;
105 }
2 {
3 string strOriginal = "";
4 string strDestination = "";
5 try
6 {
7 WebClient WClient = new WebClient();
8 WClient.Encoding = System.Text.Encoding.Default;
9 strOriginal = WClient.DownloadString(strUrl);
10 int iii = strOriginal.Length;
11 }
12 catch (Exception e)
13 {
14 throw e;
15 }
16
17 int iStart = 0, iTerminal = 0, iLength = 0;
18 string[] strBeginSeparators = new string[] { strBegin };
19 string[] strEndSeparators = new string[] { strEnd };
20 string[] arrBegin = strOriginal.Split(strBeginSeparators, StringSplitOptions.None);
21 string[] arrEnd = strOriginal.Split(strEndSeparators, StringSplitOptions.RemoveEmptyEntries);
22 if (iBegin < 1 || iBegin >= arrBegin.Length) //判断截取索引开始位置以及需要截取的长度
23 if (arrBegin.Length > 1)
24 iStart = strOriginal.LastIndexOf(strBegin) + strBegin.Length;
25 else //如果不存在分割字符,则把起始截取位置置于末尾
26 iStart = strOriginal.Length;
27 else
28 {
29 int offset;
30 for (int i = 0; i < iBegin; i++)
31 {
32 iStart += arrBegin[i].Length;
33 }
34 iStart += (iBegin - 1) * strBegin.Length + strBegin.Length;
35 if (arrBegin.Length <= 1) //如果不存在分割字符,则把起始截取位置置于末尾
36 iStart=strOriginal.Length;
37 }
38
39 if (iEnd < 1 || iEnd >= arrEnd.Length)
40 {
41 if (strOriginal.LastIndexOf(strEnd) > iStart)
42 {
43 iTerminal = strOriginal.LastIndexOf(strEnd) ;
44 }
45 else //如果后面已经没有结束字符了,则取一个空值
46 iTerminal = iStart ;
47 }
48
49 else
50 {
51 for (int i = 0; i < arrEnd.Length; i++)
52 {
53 if (strOriginal.LastIndexOf(strEnd) <= iStart ) //如果后面已经没有结束字符了,则取一个空值,推出循环
54 {
55 iTerminal = iStart;
56 break;
57 }
58 // 继续取值
59 iTerminal += arrEnd[i].Length + strEnd.Length;
60 if(iTerminal>iStart)
61 {
62 if(arrEnd.Length>iEnd+i)
63 {
64 for (int m = 1; m < iEnd; m++ )
65 {
66 iTerminal += arrEnd[i + m].Length + strEnd.Length;
67 }
68 iTerminal = iTerminal - strEnd.Length;
69 }
70 else
71 {
72 iTerminal = strOriginal.LastIndexOf(strEnd);
73 }
74 break ;
75 }
76 }
77 //
78 }
79
80 iLength = iTerminal - iStart;// +strEnd.Length; 判断如何截取结束
81 try
82 {
83 strDestination = strOriginal.Substring(iStart, iLength);
84 }
85 catch (Exception ex)
86 {
87 throw ex;
88 }
89
90 if (arrOld != null && arrNew != null)
91 {
92 for (int i = 0; i < arrOld.Length; i++)
93 {
94 strDestination = strDestination.Replace(arrOld[i], arrNew[i]);
95 }
96 }
97 if (arrOld != null && arrNew == null)
98 {
99 for (int i = 0; i < arrOld.Length; i++)
100 {
101 strDestination = strDestination.Replace(arrOld[i], "");
102 }
103 }
104 return strDestination;
105 }
在一个朋友写的代码的基础上改进了一下,刚研究出来,分享一下!