C# WebClient Get获取网页内容
//不知道怎么删除,只好留着
1. Get方式:
WebClient web = new WebClient(); var html = web.DownloadString(url);
2. Post方式
1 /// <summary> 2 /// 3 /// </summary> 4 /// <param name="web"></param> 5 /// <param name="url"></param> 6 /// <param name="queryString">格式: paramname=value@name2=value2</param> 7 /// <param name="clearHeads"></param> 8 /// <returns></returns> 9 public static string Post(this MyWebClient web, string url, string queryString, bool clearHeads=false) 10 { 11 string postString = queryString;// WebUtility.UrlEncode( queryString);//这里即为传递的参数,可以用工具抓包分析,也可以自己分析,主要是form里面每一个name都要加进来 12 byte[] postData = Encoding.UTF8.GetBytes(postString);//编码,尤其是汉字,事先要看下抓取网页的编码方式 13 web.RequestConentLength = postData.Length; 14 if (clearHeads) 15 { 16 web.Headers.Clear(); 17 web.Headers.Add("Content-Type", "application/x-www-form-urlencoded");//采取POST方式必须加的header,如果改为GET方式的话就去掉这句话即可 18 } 19 20 byte[] responseData = web.UploadData(url, "POST", postData);//得到返回字符流 21 string srcString = Encoding.UTF8.GetString(responseData);//解码 22 return srcString; 23 }
3. Headers设置
1 web.Headers.Add(HttpRequestHeader.Accept, "*/*"); 2 web.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate"); 3 web.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.9"); 4 //web.Headers.Add(HttpRequestHeader.Connection, "keep-alive"); 5 web.Headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"); 6 web.Headers.Add(HttpRequestHeader.Host, "wenshu.court.gov.cn"); 7 web.Headers.Add("Origin", "http://wenshu.court.gov.cn"); 8 //web.Headers.Add("Proxy-Connection", "keep-alive"); 9 web.Headers.Add(HttpRequestHeader.UserAgent, userAgent); 10 web.Headers.Add("X-Requested-With", "XMLHttpRequest"); 11 web.Headers.Add(HttpRequestHeader.Referer, WebUtility.UrlEncode(Referer1));
4.Cookie、超时等高可用基类
1 public class MyWebClient : WebClient 2 { 3 public CookieContainer Cookies ; 4 5 public MyWebClient(CookieContainer cookieContainer) 6 { 7 this.Cookies = cookieContainer; 8 } 9 10 public int TimeoutSeconds { get; set; } = 60; 11 12 public WebRequest Request { get; set; } 13 14 public int RequestConentLength; 15 16 protected override WebRequest GetWebRequest(Uri address) 17 { 18 HttpWebRequest request = base.GetWebRequest(address) as HttpWebRequest; 19 20 if (request != null) 21 { 22 request.Method = "Post"; 23 request.CookieContainer = Cookies; 24 request.Timeout = 1000 * TimeoutSeconds; 25 request.ContentLength = RequestConentLength; 26 } 27 28 Request = request; 29 return request; 30 } 31 32 public WebResponse Response { get; set; } 33 34 protected override WebResponse GetWebResponse(WebRequest request) 35 { 36 this.Response = base.GetWebResponse(request); 37 return this.Response; 38 } 39 40 public string GetCookieValue(string cookieName) 41 { 42 var cookies = this.Cookies.GetCookies(this.Request.RequestUri); 43 var ck = cookies[cookieName]; 44 return ck?.Value; 45 } 46 }
特别注意,一个浏览器对于网站多次请求的情况,要创建多个WebClient对象,但是它们应该共用一个CookieContainer。 而当写爬虫,模拟多个浏览器会话时,又不应全部使用同样的CookieContainer对象,以避免会话冲突。
附赠一个C#调用JS脚本的代码:
1 public string CallJs(string jsCall , string jsFunctions) 2 { 3 Type obj = Type.GetTypeFromProgID("ScriptControl"); 4 if (obj == null) return null; 5 object ScriptControl = Activator.CreateInstance(obj); 6 obj.InvokeMember("Language", BindingFlags.SetProperty, null, ScriptControl, new object[] { "JavaScript" }); 7 //string js = "function time(a, b, msg){ var sum = a + b; return new Date().getTime() + ': ' + msg + ' = ' + sum }"; 8 obj.InvokeMember("AddCode", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { jsFunctions }); 9 10 //return obj.InvokeMember("Eval", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { "time(3, 5, '3 + 5')" }).ToString(); 11 return obj.InvokeMember("Eval", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { jsCall }).ToString(); 12 }
使用示例:
string js = "function jsfunction(parm){ return parm + "abc"; }";
string val = CallJs($"jsfunction('{csvar}')", js.ToString());