使用socket获取网页html

Imports System.Text.RegularExpressions
Imports System.Text
Imports System.Net.Sockets
Module Module1

    Sub Main()
        Console.WriteLine(HtmlHelp.Get("www.baidu.com", System.Text.Encoding.UTF8))
        Console.ReadKey()
    End Sub

    Public Class HtmlHelp
        ''' <summary>
        ''' 发出请求并获取响应
        ''' </summary>
        ''' <returns></returns>
        Private Shared Function GetResponse(ByVal host As String, ByVal port As Integer, ByVal body As String, ByVal encode As Encoding) As String
            Dim strResult As String = String.Empty
            Dim bteSend As Byte() = Encoding.ASCII.GetBytes(body)
            Dim bteReceive As Byte() = New Byte(1023) {}
            Dim intLen As Integer = 0

            Using socket As New Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
                Try
                    socket.ReceiveTimeout = 5000 '超时时间
                    socket.Connect(host, port)
                    If socket.Connected Then
                        socket.Send(bteSend, bteSend.Length, 0)
                        While (InlineAssignHelper(intLen, socket.Receive(bteReceive, bteReceive.Length, 0))) > 0
                            strResult += encode.GetString(bteReceive, 0, intLen)
                        End While
                    End If
                    socket.Close()
                Catch ex As Exception
                    Console.WriteLine(ex.Message)
                    '这里处理错误
                    'strResult中存储了获取到的网页html
                End Try
            End Using

            Return strResult
        End Function

        ''' <summary>
        ''' 解析URL
        ''' </summary>
        ''' <returns></returns>
        Private Shared Function ParseURL(ByVal url As String) As UrlInfo
            Dim urlInfo As New UrlInfo()
            Dim strTemp As String() = Nothing
            urlInfo.Host = ""
            urlInfo.Port = 80
            urlInfo.File = "/"
            urlInfo.Body = ""
            Dim intIndex As Integer = url.ToLower().IndexOf("http://")
            If intIndex <> -1 Then
                url = url.Substring(7)
                intIndex = url.IndexOf("/")
                If intIndex = -1 Then
                    urlInfo.Host = url
                Else
                    urlInfo.Host = url.Substring(0, intIndex)
                    url = url.Substring(intIndex)
                    intIndex = urlInfo.Host.IndexOf(":")
                    If intIndex <> -1 Then
                        strTemp = urlInfo.Host.Split(":"c)
                        urlInfo.Host = strTemp(0)
                        Integer.TryParse(strTemp(1), urlInfo.Port)
                    End If
                    intIndex = url.IndexOf("?")
                    If intIndex = -1 Then
                        urlInfo.File = url
                    Else
                        strTemp = url.Split("?"c)
                        urlInfo.File = strTemp(0)
                        urlInfo.Body = strTemp(1)
                    End If
                End If
            End If
            Return urlInfo
        End Function

        ''' <summary>
        ''' GET请求
        ''' </summary>
        ''' <returns></returns>
        Public Shared Function [Get](ByVal url As String, ByVal encode As Encoding) As String
            Dim urlInfo As UrlInfo = ParseURL(url)
            Dim strRequest As String = String.Format("GET {0}?{1} HTTP/1.1" & vbCr & vbLf & "Host:{2}:{3}" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf, urlInfo.File, urlInfo.Body, urlInfo.Host, urlInfo.Port.ToString())
            Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode)
        End Function

        ''' <summary>
        ''' POST请求
        ''' </summary>
        ''' <returns></returns>
        Public Shared Function Post(ByVal url As String, ByVal encode As Encoding) As String
            Dim urlInfo As UrlInfo = ParseURL(url)
            Dim strRequest As String = String.Format("POST {0} HTTP/1.1" & vbCr & vbLf & "Host:{1}:{2}" & vbCr & vbLf & "Content-Length:{3}" & vbCr & vbLf & "Content-Type:application/x-www-form-urlencoded" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf & "{4}", urlInfo.File, urlInfo.Host, urlInfo.Port.ToString(), urlInfo.Body.Length, urlInfo.Body)
            Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode)
        End Function
        Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
            target = value
            Return value
        End Function

        ''' <summary>
        ''' Url结构
        ''' </summary>
        Private Structure UrlInfo
            Public Host As String
            Public Port As Integer
            Public File As String
            Public Body As String
        End Structure

    End Class

End Module

  

posted on 2017-07-21 16:57  小白兔与小灰兔  阅读(298)  评论(0编辑  收藏  举报