使用socket获取网页html
Imports System.Text.RegularExpressions Imports System.Text Imports System.Net.Sockets Module Module1 Sub Main() Console.WriteLine(HtmlHelp.Get("www.baidu.com", System.Text.Encoding.UTF8)) Console.ReadKey() End Sub Public Class HtmlHelp ''' <summary> ''' 发出请求并获取响应 ''' </summary> ''' <returns></returns> Private Shared Function GetResponse(ByVal host As String, ByVal port As Integer, ByVal body As String, ByVal encode As Encoding) As String Dim strResult As String = String.Empty Dim bteSend As Byte() = Encoding.ASCII.GetBytes(body) Dim bteReceive As Byte() = New Byte(1023) {} Dim intLen As Integer = 0 Using socket As New Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp) Try socket.ReceiveTimeout = 5000 '超时时间 socket.Connect(host, port) If socket.Connected Then socket.Send(bteSend, bteSend.Length, 0) While (InlineAssignHelper(intLen, socket.Receive(bteReceive, bteReceive.Length, 0))) > 0 strResult += encode.GetString(bteReceive, 0, intLen) End While End If socket.Close() Catch ex As Exception Console.WriteLine(ex.Message) '这里处理错误 'strResult中存储了获取到的网页html End Try End Using Return strResult End Function ''' <summary> ''' 解析URL ''' </summary> ''' <returns></returns> Private Shared Function ParseURL(ByVal url As String) As UrlInfo Dim urlInfo As New UrlInfo() Dim strTemp As String() = Nothing urlInfo.Host = "" urlInfo.Port = 80 urlInfo.File = "/" urlInfo.Body = "" Dim intIndex As Integer = url.ToLower().IndexOf("http://") If intIndex <> -1 Then url = url.Substring(7) intIndex = url.IndexOf("/") If intIndex = -1 Then urlInfo.Host = url Else urlInfo.Host = url.Substring(0, intIndex) url = url.Substring(intIndex) intIndex = urlInfo.Host.IndexOf(":") If intIndex <> -1 Then strTemp = urlInfo.Host.Split(":"c) urlInfo.Host = strTemp(0) Integer.TryParse(strTemp(1), urlInfo.Port) End If intIndex = url.IndexOf("?") If intIndex = -1 Then urlInfo.File = url Else strTemp = url.Split("?"c) urlInfo.File = strTemp(0) urlInfo.Body = strTemp(1) End If End If End If Return urlInfo End Function ''' <summary> ''' GET请求 ''' </summary> ''' <returns></returns> Public Shared Function [Get](ByVal url As String, ByVal encode As Encoding) As String Dim urlInfo As UrlInfo = ParseURL(url) Dim strRequest As String = String.Format("GET {0}?{1} HTTP/1.1" & vbCr & vbLf & "Host:{2}:{3}" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf, urlInfo.File, urlInfo.Body, urlInfo.Host, urlInfo.Port.ToString()) Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode) End Function ''' <summary> ''' POST请求 ''' </summary> ''' <returns></returns> Public Shared Function Post(ByVal url As String, ByVal encode As Encoding) As String Dim urlInfo As UrlInfo = ParseURL(url) Dim strRequest As String = String.Format("POST {0} HTTP/1.1" & vbCr & vbLf & "Host:{1}:{2}" & vbCr & vbLf & "Content-Length:{3}" & vbCr & vbLf & "Content-Type:application/x-www-form-urlencoded" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf & "{4}", urlInfo.File, urlInfo.Host, urlInfo.Port.ToString(), urlInfo.Body.Length, urlInfo.Body) Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode) End Function Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T target = value Return value End Function ''' <summary> ''' Url结构 ''' </summary> Private Structure UrlInfo Public Host As String Public Port As Integer Public File As String Public Body As String End Structure End Class End Module