C#网页抓取 HttpWebRequest
之前觉得很简单,真做起来,转了不到弯唉!
代码
public class DownLoadDBHandler : IHttpHandler
{
public void ProcessRequest(HttpContext context)
{
HttpRequest Request = context.Request;
HttpResponse Response = context.Response;
HttpServerUtility Server = context.Server;
context.Response.ContentType = "text/html;charset=utf-8";
string url = Request.Form["url"];
if (string.IsNullOrEmpty(url))
{
Response.Write("<h1>url required!</h1>");
Response.End();
return;
}
FileStream fs = new FileStream(Server.MapPath(".") + "\\" + System.Guid.NewGuid().ToString() + ".css", FileMode.OpenOrCreate);
//TextWriter tw = new StreamWriter(fs, System.Text.Encoding.UTF8);
//HttpRuntime.ProcessRequest(new SimpleWorkerRequest("https://files.cnblogs.com/wucg/site.css","",tw ));
//myReq.ContentType = "application/x-www-form-urlencoded";
//Stream s = myReq.GetRequestStream();//是用来写入post参数的流,不是返回流s
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(url); // 从该URL读取数据
myReq.Method = "GET";
WebResponse myResponse = myReq.GetResponse();
Stream s = myResponse.GetResponseStream();
BinaryReader br = new BinaryReader(s);
byte[] buf = br.ReadBytes((int)myResponse.ContentLength);
fs.Write(buf, 0, buf.Length);
myResponse.Close();
s.Close();
br.Close();
fs.Close();
Response.Write("<h1>done</h1>");
Response.End();
}
public bool IsReusable
{
get
{
return false;
}
}
}
以下为备忘,用作以后参考
代码
protected void Button1_Click(object sender, EventArgs e)
{
FileStream fs = new FileStream(Server.MapPath("~/tmp1.htm"),FileMode.OpenOrCreate);
TextWriter tw = new StreamWriter(fs, System.Text.Encoding.UTF8);
//TextWriter tw = new StreamWriter(fs, System.Text.Encoding.GetEncoding("gb2312"));
HttpRuntime.ProcessRequest(new MyRequest("TestFormView.aspx", "info=hello中", tw)); //第二个参数info,相当于请求
的url后带的参数。
sh();
tw.Close();
Response.Write("ok");
}
public class MyRequest : SimpleWorkerRequest
{
private TextWriter Output;
public MyRequest(string a1, string a2, TextWriter a3):base(a1, a2, a3)
{
Output = a3;
}
public override void SendResponseFromMemory(byte[] data, int length)
{
Output.Write(System.Text.Encoding.UTF8.GetChars(data, 0, length));
}
}
// Set the 'Method' property of the 'Webrequest' to 'POST'.
myHttpWebRequest.Method = "POST";
Console.WriteLine ("\nPlease enter the data to be posted to the (http://www.contoso.com/codesnippets/next.asp) Uri :");
// Create a new string object to POST data to the Url.
string inputData = Console.ReadLine ();
string postData = "firstone=" + inputData;
ASCIIEncoding encoding = new ASCIIEncoding ();
byte[] byte1 = encoding.GetBytes (postData);
// Set the content type of the data being posted.
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
// Set the content length of the string being posted.
myHttpWebRequest.ContentLength = byte1.Length;
Stream newStream = myHttpWebRequest.GetRequestStream ();
newStream.Write (byte1, 0, byte1.Length);
Console.WriteLine ("The value of 'ContentLength' property after sending the data is {0}", myHttpWebRequest.ContentLength);
// Close the Stream object.
newStream.Close ();
如果要向指定的页面提交参数,webrequest提供了一个流,朝里面写就可以了
public virtual Stream GetRequestStream()
这里有两个地方注意下。第一,如果是以GET方式提交的话,参数直接写到WebRequest构造函数的URL里,如果是以POST方式提交,那就获取这
个流,把参数写进流里,注意在写之前必须指定Method 为POST。第二,写入之后要关闭这个流。
public class Test
{
// Specify the URL to receive the request.
public static void Main (string[] args)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create (args[0]);
// Set some reasonable limits on resources used by this request
request.MaximumAutomaticRedirections = 4;
request.MaximumResponseHeadersLength = 4;
// Set credentials to use for this request.
request.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse ();
Console.WriteLine ("Content length is {0}", response.ContentLength);
Console.WriteLine ("Content type is {0}", response.ContentType);
// Get the stream associated with the response.
Stream receiveStream = response.GetResponseStream ();
// Pipes the stream to a higher level stream reader with the required encoding format.
StreamReader readStream = new StreamReader (receiveStream, Encoding.UTF8);
Console.WriteLine ("Response stream received.");
Console.WriteLine (readStream.ReadToEnd ());
response.Close ();
readStream.Close ();
}
}