public abstract class WebControler
{
#region ConstString
protected string sUserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
protected string sContentType = "application/x-www-form-urlencoded";
protected string sRequestEncoding = "utf-8";
protected string sResponseEncoding = "utf-8";
protected CookieContainer _cookieContainer = null;
#endregion
///
/// 了类中去继承,根据不同网站的验证方式实现登陆验证
/// 以获取用户唯一标识的cookie
///
protected virtual void CheckLogin()
{
_cookieContainer = new CookieContainer();
}
///
/// 读取访问URL所返回的HTML字符串
///
///
<param name="url" />
///
public static string GetHtmlFromUrl(string url)
{
CheckLogin() ;
WebRequest request = WebRequest.Create(url);
HttpWebRequest httpRequest = request as HttpWebRequest;
if (httpRequest == null)
{
throw new ApplicationException(
string.Format("Invalid url string: {0}", url)
);
}
//有些网站加入了限制,只有先从首页或验证页面访问才能访问,一般都记录到cookie中
//这里就是将验证后的cookie容器赋给采集的client
httpRequest.CookieContainer = _cookieContainer;
httpRequest.UserAgent = sUserAgent;
httpRequest.Accept = "*/*";
httpRequest.Headers.Add("Accept-Language", "zh-cn");
httpRequest.KeepAlive = true;
httpRequest.Timeout = 10000;
httpRequest.Method = "GET";
HttpWebResponse response = (HttpWebResponse)httpRequest.GetResponse();
string sResponse = string.Empty;
if (response.StatusCode == HttpStatusCode.OK)
{
using (StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")))
{
sResponse = reader.ReadToEnd();
reader.Close();
}
}
response.Close();
return sResponse;
}
}