调用远程页面并解析
一、 获取远程页面数据请求方法
/// <summary>
/// 获取远程服务器页面文件
/// </summary>
/// <param name="Url">访问地址</param>
/// <param name="encoding">编码格式</param>
/// <returns>string</returns>
public string GetStringByUrl(string Url, System.Text.Encoding encoding)
{
if (Url.Equals("about:blank")) return null; ;
if (!Url.StartsWith("http://") && !Url.StartsWith("https://")) { Url = "http://" + Url; }
StreamReader sreader = null;
string result = string.Empty;
try
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(Url);
//httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)";
httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; EmbeddedWB 14.52 from: http://www.baidu.com/ EmbeddedWB 14.52; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
httpWebRequest.Accept = "*/*";
httpWebRequest.KeepAlive = true;
httpWebRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
if (httpWebResponse.StatusCode == HttpStatusCode.OK)
{
sreader = new StreamReader(httpWebResponse.GetResponseStream(), encoding);
char[] cCont = new char[256];
int count = sreader.Read(cCont, 0, 256);
while (count > 0)
{
String str = new String(cCont, 0, count);
result += str;
count = sreader.Read(cCont, 0, 256);
}
}
if (null != httpWebResponse) { httpWebResponse.Close(); }
return result;
}
catch (WebException e)
{
WriteLogContent(e.ToString(), "Log");
return "";
}
finally
{
if (sreader != null)
{
sreader.Close();
}
}
}
/// <summary>
/// 发送post请求
/// </summary>
/// <param name="url">目标地址</param>
/// <param name="PostVars">发送参数</param>
/// <param name="encoding">编码格式</param>
/// <returns></returns>
public static string SendPostRequest(string url, System.Collections.Specialized.NameValueCollection PostVars, Encoding encoding)
{
try
{
System.Net.WebClient WebClientObj = new System.Net.WebClient();
byte[] byRemoteInfo = WebClientObj.UploadValues(url, "POST", PostVars);
string sRemoteInfo = encoding.GetString(byRemoteInfo);
return sRemoteInfo;
}
catch (Exception ex)
{
new Common().WriteLogContent(ex.ToString(), "Log");
return "";
}
}
二、解析获取后的页面数据
例1
data = new string[2];
PostVars = new System.Collections.Specialized.NameValueCollection();
PostVars.Add("pageindex", "1");
PostVars.Add("lottory", "TC22X5Data");
PostVars.Add("pl3", "");
PostVars.Add("name", "22选5");
PostVars.Add("isgp", "0");
string content = Common.SendPostRequest(requestUrl, PostVars, Encoding.UTF8);
Regex reg = new Regex(@">(\d{7}) </td><td align='center' style='width: 60%;'> <span id='MyGridView_ctl02_lblHao'>(\d{2} \d{2} \d{2} \d{2} \d{2})<");
MatchCollection matchs = reg.Matches(content);
if (matchs.Count > 0)
{
data[0] = matchs[0].Groups[1].Value;
data[1] = matchs[0].Groups[2].Value.Replace(' ', ',');
}
例2
data = new string[2];
requestUrl += "tjssc/";
string content = new Common().GetStringByUrl(requestUrl, Encoding.GetEncoding("gb2312"));
Regex reg1 = new Regex("<td class=\"qihao\">(\\d{11})期</td>");
Regex reg2 = new Regex("<input type=\"button\" value=\"(\\d{1})\" class=\"q_orange\" />");
MatchCollection matchs1 = reg1.Matches(content);
if (matchs1.Count > 0)
{
data[0] = matchs1[0].Groups[1].Value;
}
MatchCollection matchs2 = reg2.Matches(content);
if (matchs2.Count > 0)
{
data[1] = ""+matchs2[0].Groups[1].Value + matchs2[1].Groups[1].Value + matchs2[2].Groups[1].Value + matchs2[3].Groups[1].Value + matchs2[4].Groups[1].Value;
}