下载优化
'提取试卷优化 Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String) Dim lngRetVal As Long lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0) If lngRetVal = 0 Then DeleteUrlCacheEntry ImageURL '清除缓存 'MsgBox "成功" Else 'MsgBox "失败" End If End Sub Sub LoopDownloadExam() Dim Wb As Workbook Dim Sht As Worksheet Set Wb = Application.ThisWorkbook Set Sht = Wb.ActiveSheet With Sht EndRow = .Cells(.Cells.Rows.Count, 2).End(xlUp).Row For i = 2 To EndRow If .Cells(i, 2).Text Like "http*" Then NewGetEaxmContent .Cells(i, 2).Text End If Next i End With Set Wb = Nothing Set Sht = Nothing End Sub Sub DownloadExam() Dim Rng As Range Set Rng = Application.ActiveCell If Rng.Text Like "http*" Then NewGetEaxmContent Rng.Text End If Set Rng = Nothing End Sub Sub NewGetEaxmContent(ByVal Url As String) Dim ContentCode As String Dim dPos As Object Set dPos = CreateObject("Scripting.Dictionary") 'send request With CreateObject("MSXML2.XMLHTTP") .Open "GET", Url, False .Send WebText = .responsetext 'Debug.Print WebText ' Stop End With With CreateObject("htmlfile") .write WebText Set examdiv = .getElementById("sina_keyword_ad_area2") Title = Replace(.getElementsByTagName("title")(0).innerText, "新浪博客", "") docPath = ThisWorkbook.Path & "\" & Title & ".doc" If Dir(docPath) <> "" Then MsgBox "该份试卷已经存在!" GoTo ErrorExit End If 'Debug.Print Title ContentCode = Split(WebText, "sina_keyword_ad_area2")(1) ContentCode = Split(ContentCode, "正文结束")(0) ContentCode = Replace(ContentCode, Title, "") ContentCode = Replace(ContentCode, "宋体", "") ContentCode = Replace(ContentCode, "楷体", "") 'Debug.Print ContentCode 'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690 'http://s15.sinaimg.cn/mw690/001Eip7Fzy7iGylGjfg3e&690 Open ThisWorkbook.Path & "\html.txt" For Output As #1 '生成CSV文件 Print #1, ContentCode '写入CSV的内容 Close #1 '关闭文件句柄 '获取试卷文本内容 ExamText = examdiv.innerText 'For Each oneP In examdiv.getElementsByTagName("p") 'Debug.Print oneP.innerText 'Next oneP imgIndex = 0 For Each oneimg In examdiv.getElementsByTagName("img") imgIndex = imgIndex + 1 imgUrl = oneimg.real_src imgPath = ThisWorkbook.Path & "\" & imgIndex & ".jpg" DownloadImageName imgUrl, imgPath sp = Split(imgUrl, "&")(0) Debug.Print sp Debug.Print InStr(ContentCode, sp) cnt = Split(ContentCode, sp)(1) spos = RegGet(cnt, "([\u4e00-\u9fa5]{5,})") dPos(spos) = imgPath Debug.Print spos Next oneimg '输出题目内容到Word文档 Dim wdApp As Object Dim Doc As Object On Error Resume Next Set wdApp = GetObject(, "Word.Application") On Error GoTo 0 If Not wdApp Is Nothing Then wdApp.Visible = True On Error Resume Next Set Doc = wdApp.Documents(docName) On Error GoTo 0 If Doc Is Nothing Then Set Doc = wdApp.Documents.Add() Doc.SaveAs docPath End If Else Set wdApp = CreateObject("Word.Application") wdApp.Visible = True If Dir(docPath) <> "" Then Set Doc = wdApp.Documents.Open(docPath) Else Set Doc = wdApp.Documents.Add() Doc.SaveAs docPath End If End If Doc.Activate wdApp.Selection.homekey 6 For Each oneP In examdiv.getElementsByTagName("p") pText = oneP.innerText For Each oneimg In dPos.keys If InStr(pText, oneimg) > 0 Then ImagePath = dPos(oneimg) '插入图片 wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True wdApp.Selection.TypeParagraph On Error Resume Next Kill ImagePath On Error GoTo 0 Exit For End If Next oneimg wdApp.Selection.Typetext pText wdApp.Selection.TypeParagraph 'Debug.Print oneP.innerText Next oneP Doc.Save Doc.Close True wdApp.Quit End With ErrorExit: Set dPos = Nothing Set wdApp = Nothing Set Doc = Nothing End Sub Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean '传递参数 :原字符串, 匹配模式 Dim Regex As Object Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With RegTest = Regex.test(OrgText) Set Regex = Nothing End Function Private Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String '传递参数 :原字符串, 匹配模式 Dim Regex As Object Dim Mh As Object Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With If Regex.test(OrgText) Then Set Mh = Regex.Execute(OrgText) RegGet = Mh.Item(0).submatches(0) Else RegGet = "" End If Set Regex = Nothing End Function Private Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String '传递参数 :原字符串, 匹配模式 ,替换字符 Dim Regex As Object Dim newText As String Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With newText = Regex.Replace(OrgText, RepStr) RegReplace = newText Set Regex = Nothing End Function Private Function RegGetArray(ByVal OrgText As String, ByVal Pattern As String) As String() Dim Reg As Object, Mh As Object, OneMh As Object Dim Arr() As String, Index As Long Dim Elm As String Set Reg = CreateObject("Vbscript.Regexp") With Reg .MultiLine = True .Global = True .Ignorecase = False .Pattern = Pattern Set Mh = .Execute(OrgText) Index = 0 ReDim Arr(1 To 1) For Each OneMh In Mh Index = Index + 1 ReDim Preserve Arr(1 To Index) 'If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0) Arr(Index) = OneMh.submatches(0) 'Debug.Print OneMh.submatches(0) Next OneMh End With RegGetArray = Arr Set Reg = Nothing Set Mh = Nothing End Function Private Function RegGetLast(ByVal OrgText As String, ByVal Pattern As String) As String '传递参数 :原字符串, 匹配模式 Dim Regex As Object Dim Mh As Object Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With If Regex.test(OrgText) Then Set Mh = Regex.Execute(OrgText) 'RegGetLast = Mh.Item(0).submatches(0) For Each OneMh In Mh RegGetLast = OneMh.submatches(0) Next OneMh Else RegGetLast = "" End If Set Regex = Nothing End Function