调用远程页面并解析

    
        一、 获取远程页面数据请求方法

        /// <summary>
        /// 获取远程服务器页面文件
        /// </summary>
        /// <param name="Url">访问地址</param>
        /// <param name="encoding">编码格式</param>
        /// <returns>string</returns>
        public  string GetStringByUrl(string Url, System.Text.Encoding encoding)
        {
            if (Url.Equals("about:blank")) return null; ;
            if (!Url.StartsWith("http://") && !Url.StartsWith("https://")) { Url = "http://" + Url; }
            StreamReader sreader = null;
            string result = string.Empty;
            try
            {
                HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);

                //httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";
                httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; EmbeddedWB 14.52 from: http://www.baidu.com/ EmbeddedWB 14.52; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
                httpWebRequest.Accept = "*/*";
                httpWebRequest.KeepAlive = true;
                httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");

                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                if (httpWebResponse.StatusCode == HttpStatusCode.OK)
                {
                    sreader = new StreamReader(httpWebResponse.GetResponseStream(), encoding);
                    char[] cCont = new char[256];
                    int count = sreader.Read(cCont, 0, 256);
                    while (count > 0)
                    {
                        String str = new String(cCont, 0, count);
                        result += str;
                        count = sreader.Read(cCont, 0, 256);
                    }
                }
                if (null != httpWebResponse) { httpWebResponse.Close(); }
                return result;
            }
            catch (WebException e)
            {
                WriteLogContent(e.ToString(), "Log");
                return "";
            }
            finally
            {
                if (sreader != null)
                {
                    sreader.Close();
                }
            }
        }


        /// <summary>
        /// 发送post请求
        /// </summary>
        /// <param name="url">目标地址</param>
        /// <param name="PostVars">发送参数</param>
        /// <param name="encoding">编码格式</param>
        /// <returns></returns>
        public static string SendPostRequest(string url, System.Collections.Specialized.NameValueCollection PostVars, Encoding encoding)
        {
            try
            {
                System.Net.WebClient WebClientObj = new System.Net.WebClient();
                byte[] byRemoteInfo = WebClientObj.UploadValues(url, "POST", PostVars);
                string sRemoteInfo = encoding.GetString(byRemoteInfo);
                return sRemoteInfo;
            }
            catch (Exception ex)
            {
                new Common().WriteLogContent(ex.ToString(), "Log");
                return "";
            }
        }


      二、解析获取后的页面数据
          例1
  data = new string[2];
                        PostVars = new System.Collections.Specialized.NameValueCollection();
                        PostVars.Add("pageindex", "1");
                        PostVars.Add("lottory", "TC22X5Data");
                        PostVars.Add("pl3", "");
                        PostVars.Add("name", "22选5");
                        PostVars.Add("isgp", "0");

                        string content = Common.SendPostRequest(requestUrl, PostVars, Encoding.UTF8);
                        Regex reg = new Regex(@"&gt;(\d{7})  &lt;/td&gt;&lt;td align='center' style='width: 60%;'&gt; &lt;span id='MyGridView_ctl02_lblHao'&gt;(\d{2} \d{2} \d{2} \d{2} \d{2})&lt;");
                        MatchCollection matchs = reg.Matches(content);
                        if (matchs.Count > 0)
                        {
                            data[0] = matchs[0].Groups[1].Value;
                            data[1] = matchs[0].Groups[2].Value.Replace(' ', ',');
                        }

          例2
   data = new string[2];
                        requestUrl += "tjssc/";
                        string content = new Common().GetStringByUrl(requestUrl, Encoding.GetEncoding("gb2312"));
                        Regex reg1 = new Regex("<td class=\"qihao\">(\\d{11})期</td>");
                        Regex reg2 = new Regex("<input type=\"button\" value=\"(\\d{1})\" class=\"q_orange\" />");
                        MatchCollection matchs1 = reg1.Matches(content);
                        if (matchs1.Count > 0)
                        {
                            data[0] = matchs1[0].Groups[1].Value;
                        }
                        MatchCollection matchs2 = reg2.Matches(content);
                        if (matchs2.Count > 0)
                        {
                            data[1] = ""+matchs2[0].Groups[1].Value + matchs2[1].Groups[1].Value + matchs2[2].Groups[1].Value + matchs2[3].Groups[1].Value + matchs2[4].Groups[1].Value;
                        }

posted @ 2012-10-08 15:15  狄大人  阅读(517)  评论(0编辑  收藏  举报