c# 一个应用于网页抓取的方法!有效截取字符串

  1 public static string GetWebPageContent(string strUrl, string strBegin, string strEnd, int iBegin, int iEnd, string[] arrOld, string[] arrNew)
  2        {
  3            string strOriginal = "";
  4            string strDestination = "";
  5            try
  6            {
  7                WebClient WClient = new WebClient();
  8                WClient.Encoding = System.Text.Encoding.Default;
  9                strOriginal = WClient.DownloadString(strUrl);
 10                int iii = strOriginal.Length;
 11            }

 12            catch (Exception e)
 13            {
 14                throw e;
 15            }

 16
 17            int iStart = 0, iTerminal = 0, iLength = 0;
 18            string[] strBeginSeparators = new string[] { strBegin };
 19            string[] strEndSeparators = new string[] { strEnd };
 20            string[] arrBegin = strOriginal.Split(strBeginSeparators, StringSplitOptions.None);
 21            string[] arrEnd = strOriginal.Split(strEndSeparators, StringSplitOptions.RemoveEmptyEntries);
 22            if (iBegin < 1 || iBegin >= arrBegin.Length)      //判断截取索引开始位置以及需要截取的长度
 23                if (arrBegin.Length > 1)
 24                    iStart = strOriginal.LastIndexOf(strBegin) + strBegin.Length;
 25                else      //如果不存在分割字符,则把起始截取位置置于末尾
 26                    iStart = strOriginal.Length;  
 27            else
 28            {
 29                int offset;
 30                for (int i = 0; i < iBegin; i++)
 31                {
 32                    iStart += arrBegin[i].Length;
 33                }

 34                iStart += (iBegin - 1* strBegin.Length + strBegin.Length;
 35                if (arrBegin.Length <= 1)   //如果不存在分割字符,则把起始截取位置置于末尾
 36                    iStart=strOriginal.Length;
 37            }

 38
 39            if (iEnd < 1 || iEnd >= arrEnd.Length)
 40            {
 41                if (strOriginal.LastIndexOf(strEnd) > iStart)
 42                {
 43                    iTerminal = strOriginal.LastIndexOf(strEnd) ;
 44                }

 45                else                  //如果后面已经没有结束字符了,则取一个空值
 46                    iTerminal = iStart ;  
 47            }

 48                
 49            else
 50            {
 51                for (int i = 0; i < arrEnd.Length; i++)
 52                {
 53                    if (strOriginal.LastIndexOf(strEnd) <=  iStart )   //如果后面已经没有结束字符了,则取一个空值,推出循环
 54                    {
 55                        iTerminal = iStart;
 56                        break
 57                    }

 58                    // 继续取值
 59                    iTerminal += arrEnd[i].Length + strEnd.Length;
 60                    if(iTerminal>iStart)
 61                    {
 62                        if(arrEnd.Length>iEnd+i)
 63                        {
 64                            for (int m = 1; m < iEnd; m++ )
 65                            {
 66                                iTerminal += arrEnd[i + m].Length + strEnd.Length;
 67                            }

 68                            iTerminal = iTerminal - strEnd.Length;
 69                        }

 70                        else
 71                        {
 72                            iTerminal = strOriginal.LastIndexOf(strEnd);
 73                        }

 74                        break ;
 75                    }

 76                }

 77                //
 78            }

 79
 80            iLength = iTerminal - iStart;// +strEnd.Length;    判断如何截取结束
 81            try
 82            {
 83                strDestination = strOriginal.Substring(iStart, iLength);
 84            }

 85            catch (Exception ex)
 86            {
 87                throw ex;
 88            }

 89
 90            if (arrOld != null && arrNew != null)
 91            {
 92                for (int i = 0; i < arrOld.Length; i++)
 93                {
 94                    strDestination = strDestination.Replace(arrOld[i], arrNew[i]);
 95                }

 96            }

 97            if (arrOld != null && arrNew == null)
 98            {
 99                for (int i = 0; i < arrOld.Length; i++)
100                {
101                    strDestination = strDestination.Replace(arrOld[i], "");
102                }

103            }

104            return strDestination;
105        }

在一个朋友写的代码的基础上改进了一下,刚研究出来,分享一下!
posted @ 2007-11-11 16:04  zhangsir  阅读(667)  评论(0编辑  收藏  举报