GetTextAndImageCreateExamPaper
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long '下载网络图片 Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String) Dim lngRetVal As Long lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0) If lngRetVal = 0 Then DeleteUrlCacheEntry ImageURL '清除缓存 'MsgBox "成功" Else 'MsgBox "失败" End If End Sub Sub OneKeyCreateExam() Dim ImgNames As Variant Dim strText As String Dim i As Long, n As Long, m As Long Dim OneTagP As Object Dim OneTagA As Object Dim TagP As Object Dim PosText As String Dim Arr() As String ReDim Arr(1 To 1) As String Dim Brr() As String ReDim Brr(1 To 1) Dim ImageURL As String Dim FilePath As String Dim FileName As String Dim dContent As Object Set dContent = CreateObject("Scripting.Dictionary") Dim dImageName As Object Set dImageName = CreateObject("Scripting.Dictionary") Dim StartTime As Variant '开始时间 Dim UsedTime As Variant '使用时间 StartTime = VBA.Timer '记录开始时间 AppSettings On Error GoTo ErrHandler '设置URL,访问网页获取网页源码 URL = ActiveSheet.Range("A2").Text With CreateObject("MSXML2.XMLHTTP") .Open "GET", URL, False .Send strText = .responsetext End With '创建网页文件 With CreateObject("htmlfile") .write strText '获取标题 FileName = .getElementsByTagName("h2")(0).innerhtml Debug.Print FileName Application.StatusBar = ">>>>>>正在下载图片>>>>>>" i = 0 '初始化序号 For Each OneTagA In .getElementsByTagName("a") '循环所有A标签 If OneTagA.HasChildNodes Then If OneTagA.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then '获取之前的一个段落 Set TagP = OneTagA.PreviousSibling Do While TagP.tagName <> "P" Set TagP = TagP.PreviousSibling Loop i = i + 1 '文字内容提取 PosText = TagP.innerhtml PosText = RegReplace(PosText, "<.*?>") PosText = Replace(PosText, " ", "") '获取图片URL ImageURL = OneTagA.FirstChild.getAttribute("real_src") ImageName = "Image" & i & ".jpg" ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName DownloadImageName ImageURL, ImagePath '下载图片 '获取图片 If dImageName.Exists(PosText) = False Then dImageName(PosText) = ImageName Else dImageName(PosText) = dImageName(PosText) & "|" & ImageName End If End If End If Next Application.StatusBar = ">>>>>>正在获取文本>>>>>>" i = 0 '初始化序号 n = 0 '初始化序号 For Each OneTagP In .getElementsByTagName("p") '文字内容提取 PosText = OneTagP.innerhtml PosText = RegReplace(PosText, "<.*?>") PosText = Replace(PosText, " ", "") i = i + 1 If PosText = "喜欢" Then Exit For '提前结束循环 If i > 20 Then '开始记录试卷内容 If Len(PosText) > 0 Then '保留非空数组 n = n + 1 ReDim Preserve Arr(1 To n) Arr(n) = PosText '存入数组 'Debug.Print n; " "; PosText 'dContent(PosText) = n End If End If Next End With Application.StatusBar = ">>>>>>正在创建Word文档>>>>>>" FilePath = ThisWorkbook.Path & "\" & FileName & ".doc" On Error Resume Next Kill FilePath On Error GoTo 0 Dim wdApp As Object Dim Doc As Object Set wdApp = CreateObject("Word.Application") Set Doc = wdApp.documents.Add() Doc.Activate For i = 1 To UBound(Arr) PosText = Arr(i) wdApp.Selection.TypeText Text:=PosText wdApp.Selection.TypeParagraph If dImageName.Exists(PosText) Then '如果含有图片 If InStr(dImageName(PosText), "|") = 0 Then '如果只含有一张图片 ImageName = dImageName(PosText) ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True wdApp.Selection.TypeParagraph Else ImgNames = Split(dImageName(PosText), "|") For n = LBound(ImgNames) To UBound(ImgNames) Step 1 ImageName = ImgNames(n) ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName wdApp.Selection.InlineShapes.AddPicture FileName:=ImagePath, LinkToFile:=False, SaveWithDocument:=True wdApp.Selection.TypeParagraph Next n End If End If Next i Doc.SaveAs FilePath Doc.Close wdApp.Quit Application.StatusBar = ">>>>>>正在删除Image图片>>>>>>" For Each Key In dImageName.keys If InStr(dImageName(Key), "|") = 0 Then ImageName = dImageName(Key) ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName Kill ImagePath Else ImgNames = Split(dImageName(Key), "|") For n = LBound(ImgNames) To UBound(ImgNames) Step 1 ImageName = ImgNames(n) ImagePath = ThisWorkbook.Path & Application.PathSeparator & ImageName Kill ImagePath Next n End If Next Key UsedTime = VBA.Timer - StartTime MsgBox "本次运行耗时:" & Format(UsedTime, "#0.0000秒") ErrorExit: Set wdApp = Nothing Set Doc = Nothing AppSettings False Exit Sub ErrHandler: If Err.Number <> 0 Then MsgBox Err.Description & "!", vbCritical, "QQ 84857038" Debug.Print Err.Description Err.Clear Resume ErrorExit End If End Sub Public Function RegReplace(ByVal OrgText As String, ByVal Pattern As String, Optional RepStr As String = "") As String '传递参数 :原字符串, 匹配模式 ,替换字符 Dim Regex As Object Dim newText As String Set Regex = CreateObject("VBScript.RegExp") With Regex .Global = True .Pattern = Pattern End With newText = Regex.Replace(OrgText, RepStr) RegReplace = newText Set Regex = Nothing End Function Public Sub AppSettings(Optional IsStart As Boolean = True) If IsStart Then Application.ScreenUpdating = False Application.DisplayAlerts = False Application.Calculation = xlCalculationManual Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>" Else Application.ScreenUpdating = True Application.DisplayAlerts = True Application.Calculation = xlCalculationAutomatic Application.StatusBar = False End If End Sub