网页内容抓取

#region 构造提交参数   
StringBuilder sb = new StringBuilder();   
sb.AppendFormat("callCount=1");   
sb.AppendFormat("&page=/{0}/members/", urlName);   
sb.AppendFormat("&httpSessionId=");   
sb.AppendFormat("&scriptSessionId=D4DAC4AD9C3BF9B71C82802BDDBA0C25369");   
sb.AppendFormat("&c0-scriptName=CircleBean");   
sb.AppendFormat("&c0-methodName=getNewCircleUsers");   
sb.AppendFormat("&c0-id=0");//保留字符   
sb.AppendFormat("&c0-param0=number:{0}", circleId);//11   
sb.AppendFormat("&c0-param1=number:{0}", pageSize);//数量   
sb.AppendFormat("&c0-param2=number:{0}", pageSize * i);//0,30,60   
sb.AppendFormat("&c0-param3=boolean:true");   
sb.AppendFormat("&batchId={0}", i);   
i++;   
  
//callCount=1   
//page=/dnkxin/members/   
//httpSessionId=   
//scriptSessionId=D4DAC4AD9C3BF9B71C82802BDDBA0C25369   
//c0-scriptName=CircleBean   
//c0-methodName=getNewCircleUsers   
//c0-id=0   
//c0-param0=number:15057111            //(<body onload="MembersPage.init('15057111', '/style/pinkstar/','http://blog.163.com','')">)   
//c0-param1=number:10   
//c0-param2=number:0   
//c0-param3=boolean:true   
//batchId=0  
#endregion   
  
string content = "";   
try  
{   
    httpHelper.ContentType = "text/plain";   
    content = httpHelper.GetHtml(url, cookie, sb.ToString(), true, refUrl);//使用Post方式提交内容,并返回页面结果   
    re = new Regex(circleReg.ToString(), RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace);   
    mc = re.Match(content);   
}   
catch (Exception ex)   
{   
    LogTextHelper.WriteLine(ex.ToString());   
    break;   
}
posted @ 2012-03-28 15:49  世之云枭  阅读(228)  评论(0编辑  收藏  举报