GetTitleAndUrl

Sub GetTitleAndUrl()
    Dim strText As String
    Dim i As Long
    Dim OneA
    Dim IsContent As Boolean
    Dim PageIndex As Long
    Dim URL As String
    For PageIndex = 1 To 10
        URL = "http://blog.sina.com.cn/s/articlelist_1511572751_0_" & PageIndex & ".html"
        
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", URL, False
            .Send
            strText = .responsetext
        End With
        
        Dim arr() As String
        ReDim arr(1 To 2, 1 To 1) As String
        
        With CreateObject("htmlfile")
            .write strText
            i = 0
            For Each OneA In .getElementsByTagName("a")
                
                s = OneA.href
                
                
                If s Like "*http://blog.sina.com.cn/s/blog_*" Then
                    
                    i = i + 1
                    ReDim Preserve arr(1 To 2, 1 To i)
                    
                    arr(1, i) = OneA.innerhtml
                    arr(2, i) = s
                    
                End If
            Next
        End With
        
        With Sheets("标题")
            endrow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row + 1
            Set Rng = .Cells(endrow, 1)
            Set Rng = Rng.Resize(UBound(arr, 2), UBound(arr))
            Rng.Value = Application.WorksheetFunction.Transpose(arr)
        End With
        
        
    Next PageIndex
    
End Sub

Sub TestRegReplace()
    s = "215MY"
    s = RegReplace(s, "[A-Z]")
    Debug.Print s
End Sub
Public Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
'传递参数 :原字符串, 匹配模式 ,替换字符
    Dim Regex As Object
    Dim newText As String
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    newText = Regex.Replace(OrgText, RepStr)
    RegReplace = newText
    Set Regex = Nothing
End Function

Public Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
    Dim Regex As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    RegTest = Regex.TEST(OrgText)
    Set Regex = Nothing
End Function

Sub dd()
      Debug.Print RegTest("13.", "^\d+?.$")
End Sub

  

posted @ 2017-08-05 22:25  wangway  阅读(306)  评论(0编辑  收藏  举报