利用com调用ie进行html解析

别的就不多说了,直接上代码,代码很简单的,不懂的留言。

1using System;
 2using System.Collections.Generic;
 3using System.Linq;
 4using System.Text;
 5using System.Runtime.InteropServices;
 6using mshtml;
 7using System.Threading;
 8using System.Runtime.InteropServices.ComTypes;
 9using System.IO;
10
11namespace Eric.Utilities.Html
12{
13    public enum HRESULT
14    {
15        E_FAIL = -2147467259,
16        E_INVALIDARG = -2147024809,
17        E_NOINTERFACE = -2147467262,
18        E_NOTIMPL = -2147467263,
19        E_UNEXPECTED = -2147418113,
20        S_FALSE = 1,
21        S_OK = 0
22    }
23
24    [ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25    public interface IPersist
26    {
27        void GetClassID(ref Guid pClassID);
28    }
29
30    [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31    public interface IPersistStreamInit : IPersist
32    {
33        new void GetClassID(ref Guid pClassID);
34        [PreserveSig]
35        int IsDirty();
36        [PreserveSig]
37        HRESULT Load(IStream pstm);
38        [PreserveSig]
39        HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40        [PreserveSig]
41        HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42        [PreserveSig]
43        HRESULT InitNew();
44    }
45
46    public class HtmlParser
47    {
48        public IHTMLDocument3 Parse(string url)
49        {
50            HTMLDocument objMSHTML = new HTMLDocument();
51            IHTMLDocument2 objMSHTML2;
52            IHTMLDocument3 objMSHTML3;
53
54            IPersistStreamInit objIPS;
55            objIPS = objMSHTML as IPersistStreamInit;
56            objIPS.InitNew();
57            objIPS = null;
58
59            objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60            while (objMSHTML2.readyState != "complete")
61            {
62                Thread.Sleep(1000);
63            }
64            objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65            return objMSHTML3;
66        }
67
68        public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69        {
70            string tmpFile = Path.GetTempFileName();
71            File.WriteAllText(tmpFile, html, encoding);
72            return Parse(tmpFile);
73        }
74    }
75}

posted on   小橋流水  阅读(438)  评论(0编辑  收藏  举报

编辑推荐:
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
阅读排行:
· 周边上新:园子的第一款马克杯温暖上架
· Open-Sora 2.0 重磅开源!
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· [AI/GPT/综述] AI Agent的设计模式综述

导航

统计

点击右上角即可分享
微信分享提示