GetContent

Sub GetContent(ByVal URL As String, ByVal SheetName As String)
    Dim strText As String
    Dim i As Long
    Dim OneSpan
    Dim IsContent As Boolean

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", URL, False
        .Send
        strText = .responsetext
    End With

    Dim arr() As String
    ReDim arr(1 To 1) As String

    With CreateObject("htmlfile")
        .write strText
        i = 0
        For Each OneSpan In .getElementsByTagName("span")
            s = RegReplace(OneSpan.innerhtml, "<.*?>")
            s = Replace(s, " ", "")

            If s = " 排行榜" Then IsContent = False
            If IsContent Then
                i = i + 1
                ReDim Preserve arr(1 To i)
                arr(i) = s
                'Debug.Print s
                'If i = 100 Then Exit For
            End If
            If s = "分类:" Then IsContent = True

        Next
    End With

    Dim brr() As String
    ReDim brr(1 To 1)

    brr(1) = arr(1)
    M = 1
    For n = 2 To i
        If RegTest(arr(n - 1), "[A-D].") Or RegTest(arr(n - 1), "^\d*?.??$") Then
            brr(M) = brr(M) & arr(n)
        Else
            M = M + 1
            ReDim Preserve brr(1 To M)
            brr(M) = arr(n)
        End If
    Next n

    For i = 1 To M
        'Debug.Print brr(i)
        If i = 150 Then Exit For
    Next i


    Set sht = AddWorksheet(ThisWorkbook, SheetName)

    With sht
        .Cells.ClearContents
        .Range("A1:A1").Value = Array("内容")
        .Range("A2").Resize(M, 1).Value = _
        Application.WorksheetFunction.Transpose(brr)
    End With




End Sub

Sub TestRegReplace()
    s = "215MY"
    s = RegReplace(s, "[A-Z]")
    Debug.Print s
End Sub
Public Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
'传递参数 :原字符串, 匹配模式 ,替换字符
    Dim Regex As Object
    Dim newText As String
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    newText = Regex.Replace(OrgText, RepStr)
    RegReplace = newText
    Set Regex = Nothing
End Function

Public Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
    Dim Regex As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    RegTest = Regex.TEST(OrgText)
    Set Regex = Nothing
End Function

Sub dd()
    Debug.Print RegTest("13.", "^\d+?.$")
End Sub

Function AddWorksheet(ByVal Wb As Workbook, ByVal ShtName As String, Optional ReplaceSymbol As Boolean = True) As Worksheet
    Dim sht As Worksheet
    If Len(ShtName) = 0 Or Len(ShtName) > 31 Then
        Set AddWorksheet = Nothing
        MsgBox "Worksheet名称长度不符!", vbInformation, "AddWorksheet"
        Exit Function
    Else
        On Error Resume Next
        Set sht = Wb.Worksheets(ShtName)
        If Err.Number = 9 Then
            Set sht = Wb.Worksheets.Add(After:=Wb.Worksheets(Wb.Worksheets.Count))
            Err.Clear
            On Error GoTo 0
            On Error Resume Next
            sht.Name = ShtName
            If Err.Number = 1004 Then
                Err.Clear
                On Error GoTo 0
                If ReplaceSymbol Then
                    arr = Array("/", "\", "?", "*", "[", "]")
                    For i = LBound(arr) To UBound(arr)
                        ShtName = Replace(ShtName, arr(i), "")
                    Next i
                    Set AddWorksheet = AddWorksheet(Wb, ShtName)    '再次调用
                Else
                    Set AddWorksheet = Nothing
                    MsgBox "Worksheet名称含有特殊符号!", vbInformation, "AddWorksheet"
                End If
            Else
                Set AddWorksheet = sht
            End If
        ElseIf Err.Number = 0 Then
            Set AddWorksheet = sht
        End If
    End If
End Function

  

posted @ 2017-08-05 22:27  wangway  阅读(1058)  评论(0编辑  收藏  举报