用socket来代替HttpWebRequest和HttpWebResponse
通常我们爬虫不知道对方的ip,只知道它们的url,先把客户端代码修改如下:
public string Get(string url, params Encoding[] encoding) { string responseText = string.Empty; _statu = HttpRequestStatus.Busy; try { Regex reg = new Regex("(http://)?(?<name>[^/?]+)"); Match m = reg.Match(url); var hostName = m.Groups["name"].Value; IPHostEntry hosts = Dns.GetHostByName(hostName); if (!url.StartsWith("http://")) url = "http://" + hostName + "/"; if (!url.EndsWith("/")) url += "/"; IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80); Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);//创建Socket socket.Connect(ipPoint); ///向服务器发送信息 //{GET /index.php HTTP/1.0Content-Type: application/x-www-form-urlencoded StringBuilder bufRequest = new StringBuilder(); bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n"); bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n"); bufRequest.Append("\r\n"); string requestContent = bufRequest.ToString(); byte[] bs = Encoding.ASCII.GetBytes(requestContent); socket.Send(bs);//发送信息 /**/ ///接受从服务器返回的信息 byte[] recvBytes = new byte[1024]; int bytes; Encoding coder = Encoding.UTF8; if (encoding.Count() > 0) { coder = encoding[0]; } do { bytes = socket.Receive(recvBytes, recvBytes.Length, 0); responseText += coder.GetString(recvBytes, 0, bytes); } while (bytes != 0); /**/ ///一定记着用完socket后要关闭 socket.Close(); } catch (Exception ex) { } return responseText; }
以上是用socket同步的方式来实现的,以下是异步方式
public class StateObject:IDisposable { public Socket workSocket = null; public const int BufferSize = 256; public byte[] buffer = new byte[BufferSize]; // public StringBuilder sb = new StringBuilder(); public MemoryStream Stream = new MemoryStream(); public string header = null; public static Encoding Encoding = Encoding.UTF8; public Action<Stream> HandAction; public void Dispose() { if (workSocket != null) { workSocket.Shutdown(SocketShutdown.Both); workSocket.Close(); } } } public class AsynchronousClient:IDisposable { public StateObject State { set; get; } public void StartClient(IPEndPoint ipPoint, string data, Action<Stream> actionHandle) { try { //IPHostEntry ipHostInfo = Dns.Resolve("host.contoso.com"); //IPAddress ipAddress = ipHostInfo.AddressList[0]; //IPEndPoint remoteEP = new IPEndPoint(ipAddress, port); Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); StateObject state = new StateObject(); state.workSocket = client; state.header = data; state.HandAction = actionHandle; State = state; // state.header = "GET "+data+" HTTP/1.1"; client.BeginConnect(ipPoint, new AsyncCallback(ConnectCallback), state); } catch { } } void ConnectCallback(IAsyncResult ar) { StateObject state = (StateObject)ar.AsyncState; Socket client = state.workSocket; client.EndConnect(ar); Trace.WriteLine(string.Format("Socket connected to {0}", client.RemoteEndPoint.ToString())); byte[] byteData = StateObject.Encoding.GetBytes(state.header); // 开始发送数据到远程设备. client.BeginSend(byteData, 0, byteData.Length, 0, new AsyncCallback(SendCallback), state); } void SendCallback(IAsyncResult ar) { StateObject state = (StateObject)ar.AsyncState; Socket client = state.workSocket; // 完成数据发送. int bytesSent = client.EndSend(ar); Trace.WriteLine(string.Format("Sent {0} bytes to server.", bytesSent.ToString())); try { //byte[] recvBytes = new byte[1024]; //int bytes; //string responseText = string.Empty; //do //{ // bytes = client.Receive(recvBytes, recvBytes.Length, 0); // responseText += StateObject.Encoding.GetString(recvBytes, 0, bytes); //} while (bytes != 0); client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state); } catch (Exception ex) { } } void ReceiveCallback(IAsyncResult ar) { // 从输入参数异步state对象中获取state和socket对象 StateObject state = (StateObject)ar.AsyncState; Socket client = state.workSocket; //从远程设备读取数据 int bytesRead = client.EndReceive(ar); if (bytesRead > 0) { // 有数据,存储. // state.sb.Append(StateObject.Encoding.GetString(state.buffer, 0, bytesRead)); state.Stream.Write(state.buffer, 0, bytesRead); // 继续读取. client.BeginReceive(state.buffer, 0, StateObject.BufferSize, 0, new AsyncCallback(ReceiveCallback), state); } else { client.Shutdown(SocketShutdown.Both); client.Close(); client = null; //state.HandAction.Invoke(state.sb.ToString()); state.HandAction(state.Stream); } } public void Dispose() { State.Dispose(); } }
调用方式:
IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80); AsynchronousClient client = new AsynchronousClient(); string responseText = string.Empty; string requestText=string.Empty;// = "GET " + url + " HTTP/1.0 \r\n Content-Type: application/x-www-form-urlencoded"; StringBuilder bufRequest = new StringBuilder(); bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n"); bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n"); bufRequest.Append("\r\n"); requestText = bufRequest.ToString(); if (!string.IsNullOrEmpty(header)) requestText += header; client.StartClient(ipPoint, requestText, new Action<Stream>(x => { byte[] recvBytes = new byte[1024]; int bytes; Encoding coder = Encoding.UTF8; x.Seek(0, SeekOrigin.Begin); do { bytes =x.Read(recvBytes,0, recvBytes.Length); responseText += Encoding.UTF8.GetString(recvBytes, 0, bytes); } while (bytes != 0); client.Dispose(); }));
windows技术爱好者