C# WebClient Get获取网页内容

//不知道怎么删除,只好留着

 1. Get方式:

WebClient web = new WebClient();
var html = web.DownloadString(url);

2. Post方式

 1         /// <summary>
 2         /// 
 3         /// </summary>
 4         /// <param name="web"></param>
 5         /// <param name="url"></param>
 6         /// <param name="queryString">格式: paramname=value@name2=value2</param>
 7         /// <param name="clearHeads"></param>
 8         /// <returns></returns>
 9         public static string Post(this MyWebClient web, string url, string queryString, bool clearHeads=false)
10         {
11             string postString = queryString;// WebUtility.UrlEncode( queryString);//这里即为传递的参数,可以用工具抓包分析,也可以自己分析,主要是form里面每一个name都要加进来  
12             byte[] postData = Encoding.UTF8.GetBytes(postString);//编码,尤其是汉字,事先要看下抓取网页的编码方式 
13             web.RequestConentLength = postData.Length;
14             if (clearHeads)
15             {
16                 web.Headers.Clear();
17                 web.Headers.Add("Content-Type", "application/x-www-form-urlencoded");//采取POST方式必须加的header,如果改为GET方式的话就去掉这句话即可  
18             }
19             
20             byte[] responseData = web.UploadData(url, "POST", postData);//得到返回字符流  
21             string srcString = Encoding.UTF8.GetString(responseData);//解码  
22             return srcString;
23         }

3. Headers设置

 1                     web.Headers.Add(HttpRequestHeader.Accept, "*/*");
 2                     web.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
 3                     web.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.9");
 4                     //web.Headers.Add(HttpRequestHeader.Connection, "keep-alive");
 5                     web.Headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
 6                     web.Headers.Add(HttpRequestHeader.Host, "wenshu.court.gov.cn");
 7                     web.Headers.Add("Origin", "http://wenshu.court.gov.cn");
 8                     //web.Headers.Add("Proxy-Connection", "keep-alive"); 
 9                     web.Headers.Add(HttpRequestHeader.UserAgent, userAgent);
10                     web.Headers.Add("X-Requested-With", "XMLHttpRequest");
11                     web.Headers.Add(HttpRequestHeader.Referer, WebUtility.UrlEncode(Referer1));

4.Cookie、超时等高可用基类

 1     public class MyWebClient : WebClient
 2     { 
 3         public CookieContainer Cookies ;
 4 
 5         public MyWebClient(CookieContainer cookieContainer)
 6         {
 7             this.Cookies = cookieContainer;
 8         }
 9          
10         public int TimeoutSeconds { get; set; } = 60;
11 
12         public WebRequest Request { get; set; }
13 
14         public int RequestConentLength;
15          
16         protected override WebRequest GetWebRequest(Uri address)
17         { 
18             HttpWebRequest request = base.GetWebRequest(address) as HttpWebRequest;
19             
20             if (request != null)
21             {
22                 request.Method = "Post";
23                 request.CookieContainer = Cookies;
24                 request.Timeout = 1000 * TimeoutSeconds;
25                 request.ContentLength = RequestConentLength;
26             }
27  
28             Request = request;
29             return request;
30         }
31 
32         public WebResponse Response { get; set; }
33 
34         protected override WebResponse GetWebResponse(WebRequest request)
35         {
36             this.Response = base.GetWebResponse(request);
37             return this.Response;
38         }
39 
40         public string GetCookieValue(string cookieName)
41         {
42             var cookies = this.Cookies.GetCookies(this.Request.RequestUri);
43             var ck = cookies[cookieName];
44             return ck?.Value;
45         }
46     }

特别注意,一个浏览器对于网站多次请求的情况,要创建多个WebClient对象,但是它们应该共用一个CookieContainer。 而当写爬虫,模拟多个浏览器会话时,又不应全部使用同样的CookieContainer对象,以避免会话冲突。

 

附赠一个C#调用JS脚本的代码:

 1         public string CallJs(string jsCall , string jsFunctions)
 2         {
 3             Type obj = Type.GetTypeFromProgID("ScriptControl");
 4             if (obj == null) return null;
 5             object ScriptControl = Activator.CreateInstance(obj);
 6             obj.InvokeMember("Language", BindingFlags.SetProperty, null, ScriptControl, new object[] { "JavaScript" });
 7             //string js = "function time(a, b, msg){ var sum = a + b; return new Date().getTime() + ': ' + msg + ' = ' + sum }";
 8             obj.InvokeMember("AddCode", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { jsFunctions });
 9 
10             //return obj.InvokeMember("Eval", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { "time(3, 5, '3 + 5')" }).ToString();
11             return obj.InvokeMember("Eval", BindingFlags.InvokeMethod, null, ScriptControl, new object[] { jsCall }).ToString();
12         }

使用示例:

string js = "function jsfunction(parm){ return parm + "abc"; }";
string
val = CallJs($"jsfunction('{csvar}')", js.ToString());

 

posted @ 2018-05-31 11:57  乁卬杨  阅读(1568)  评论(0编辑  收藏  举报