下载优化

'提取试卷优化
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
    Dim lngRetVal As Long
    lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
    If lngRetVal = 0 Then
        DeleteUrlCacheEntry ImageURL  '清除缓存
        'MsgBox "成功"
    Else
        'MsgBox "失败"
    End If
End Sub

Sub LoopDownloadExam()
    Dim Wb As Workbook
    Dim Sht As Worksheet
    Set Wb = Application.ThisWorkbook
    Set Sht = Wb.ActiveSheet
    With Sht
        EndRow = .Cells(.Cells.Rows.Count, 2).End(xlUp).Row
        For i = 2 To EndRow
            If .Cells(i, 2).Text Like "http*" Then
                NewGetEaxmContent .Cells(i, 2).Text
            End If
        Next i
    End With
    Set Wb = Nothing
    Set Sht = Nothing
    
End Sub

Sub DownloadExam()
    Dim Rng As Range
    Set Rng = Application.ActiveCell
    If Rng.Text Like "http*" Then
        NewGetEaxmContent Rng.Text
    End If
    Set Rng = Nothing
End Sub




Sub NewGetEaxmContent(ByVal Url As String)
    Dim ContentCode As String
    Dim dPos As Object
    Set dPos = CreateObject("Scripting.Dictionary")
  
    'send request
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", Url, False
        .Send
        WebText = .responsetext
        'Debug.Print WebText
        ' Stop
    End With
    
    
    With CreateObject("htmlfile")
        .write WebText
        Set examdiv = .getElementById("sina_keyword_ad_area2")
        
        Title = Replace(.getElementsByTagName("title")(0).innerText, "新浪博客", "")
        docPath = ThisWorkbook.Path & "\" & Title & ".doc"
        If Dir(docPath) <> "" Then
            MsgBox "该份试卷已经存在!"
            GoTo ErrorExit
        End If
        'Debug.Print Title
        ContentCode = Split(WebText, "sina_keyword_ad_area2")(1)
        ContentCode = Split(ContentCode, "正文结束")(0)
        ContentCode = Replace(ContentCode, Title, "")
        ContentCode = Replace(ContentCode, "宋体", "")
        ContentCode = Replace(ContentCode, "楷体", "")
        'Debug.Print ContentCode
        'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690
        'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690
        Open ThisWorkbook.Path & "\html.txt" For Output As #1     '生成CSV文件
        Print #1, ContentCode   '写入CSV的内容
        Close #1    '关闭文件句柄
        
        
        '获取试卷文本内容
        ExamText = examdiv.innerText
        
        'For Each oneP In examdiv.getElementsByTagName("p")
        'Debug.Print oneP.innerText
        'Next oneP
        
        imgIndex = 0
        For Each oneimg In examdiv.getElementsByTagName("img")
            imgIndex = imgIndex + 1
            imgUrl = oneimg.real_src
            imgPath = ThisWorkbook.Path & "\" & imgIndex & ".jpg"
            DownloadImageName imgUrl, imgPath
            sp = Split(imgUrl, "&")(0)
            Debug.Print sp
            Debug.Print InStr(ContentCode, sp)
            
            cnt = Split(ContentCode, sp)(1)
            spos = RegGet(cnt, "([\u4e00-\u9fa5]{5,})")
            dPos(spos) = imgPath
            Debug.Print spos
        Next oneimg
        
        
        
        
        '输出题目内容到Word文档
        Dim wdApp As Object
        Dim Doc As Object
        
        On Error Resume Next
        Set wdApp = GetObject(, "Word.Application")
        On Error GoTo 0
        If Not wdApp Is Nothing Then
            wdApp.Visible = True
            On Error Resume Next
            Set Doc = wdApp.Documents(docName)
            On Error GoTo 0
            If Doc Is Nothing Then
                Set Doc = wdApp.Documents.Add()
                Doc.SaveAs docPath
            End If
        Else
            Set wdApp = CreateObject("Word.Application")
            wdApp.Visible = True
            If Dir(docPath) <> "" Then
                Set Doc = wdApp.Documents.Open(docPath)
            Else
                Set Doc = wdApp.Documents.Add()
                Doc.SaveAs docPath
            End If
        End If
        
        Doc.Activate
        
        wdApp.Selection.homekey 6
        For Each oneP In examdiv.getElementsByTagName("p")
            pText = oneP.innerText
            
            For Each oneimg In dPos.keys
                If InStr(pText, oneimg) > 0 Then
                    ImagePath = dPos(oneimg)
                    '插入图片
                    wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
                    wdApp.Selection.TypeParagraph
                    On Error Resume Next
                    Kill ImagePath
                    On Error GoTo 0
                    
                    
                    
                    Exit For
                End If
            Next oneimg
            
            wdApp.Selection.Typetext pText
            wdApp.Selection.TypeParagraph
            'Debug.Print oneP.innerText
        Next oneP
        Doc.Save
        Doc.Close True
        wdApp.Quit
    End With
    
ErrorExit:
    
    Set dPos = Nothing
    Set wdApp = Nothing
    Set Doc = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
    Dim Regex As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    RegTest = Regex.test(OrgText)
    Set Regex = Nothing
End Function
Private Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
    Dim Regex As Object
    Dim Mh As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    If Regex.test(OrgText) Then
        Set Mh = Regex.Execute(OrgText)
        RegGet = Mh.Item(0).submatches(0)
    Else
        RegGet = ""
    End If
    Set Regex = Nothing
End Function
Private Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String
'传递参数 :原字符串, 匹配模式 ,替换字符
    Dim Regex As Object
    Dim newText As String
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    newText = Regex.Replace(OrgText, RepStr)
    RegReplace = newText
    Set Regex = Nothing
End Function
Private Function RegGetArray(ByVal OrgText As String, ByVal Pattern As String) As String()
    Dim Reg As Object, Mh As Object, OneMh As Object
    Dim Arr() As String, Index As Long
    Dim Elm As String
    Set Reg = CreateObject("Vbscript.Regexp")
    With Reg
        .MultiLine = True
        .Global = True
        .Ignorecase = False
        .Pattern = Pattern
        Set Mh = .Execute(OrgText)
        
        Index = 0
        ReDim Arr(1 To 1)
        For Each OneMh In Mh
            Index = Index + 1
            ReDim Preserve Arr(1 To Index)
            'If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)
            Arr(Index) = OneMh.submatches(0)
            'Debug.Print OneMh.submatches(0)
        Next OneMh
    End With
    RegGetArray = Arr
    Set Reg = Nothing
    Set Mh = Nothing
End Function

Private Function RegGetLast(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
    Dim Regex As Object
    Dim Mh As Object
    Set Regex = CreateObject("VBScript.RegExp")
    With Regex
        .Global = True
        .Pattern = Pattern
    End With
    If Regex.test(OrgText) Then
        Set Mh = Regex.Execute(OrgText)
        'RegGetLast = Mh.Item(0).submatches(0)
        For Each OneMh In Mh
            RegGetLast = OneMh.submatches(0)
        Next OneMh
    Else
        RegGetLast = ""
    End If
    Set Regex = Nothing
End Function

  

posted @ 2018-03-12 07:35  wangway  阅读(237)  评论(0编辑  收藏  举报