利用com调用ie进行html解析
利用com调用ie进行html解析,部分代码参考互联网。
别的就不多说了,直接上代码,代码很简单的,不懂的留言。
Code
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Runtime.InteropServices;
6using mshtml;
7using System.Threading;
8using System.Runtime.InteropServices.ComTypes;
9using System.IO;
10
11namespace Eric.Utilities.Html
12{
13 public enum HRESULT
14 {
15 E_FAIL = -2147467259,
16 E_INVALIDARG = -2147024809,
17 E_NOINTERFACE = -2147467262,
18 E_NOTIMPL = -2147467263,
19 E_UNEXPECTED = -2147418113,
20 S_FALSE = 1,
21 S_OK = 0
22 }
23
24 [ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25 public interface IPersist
26 {
27 void GetClassID(ref Guid pClassID);
28 }
29
30 [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31 public interface IPersistStreamInit : IPersist
32 {
33 new void GetClassID(ref Guid pClassID);
34 [PreserveSig]
35 int IsDirty();
36 [PreserveSig]
37 HRESULT Load(IStream pstm);
38 [PreserveSig]
39 HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40 [PreserveSig]
41 HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42 [PreserveSig]
43 HRESULT InitNew();
44 }
45
46 public class HtmlParser
47 {
48 public IHTMLDocument3 Parse(string url)
49 {
50 HTMLDocument objMSHTML = new HTMLDocument();
51 IHTMLDocument2 objMSHTML2;
52 IHTMLDocument3 objMSHTML3;
53
54 IPersistStreamInit objIPS;
55 objIPS = objMSHTML as IPersistStreamInit;
56 objIPS.InitNew();
57 objIPS = null;
58
59 objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60 while (objMSHTML2.readyState != "complete")
61 {
62 Thread.Sleep(1000);
63 }
64 objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65 return objMSHTML3;
66 }
67
68 public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69 {
70 string tmpFile = Path.GetTempFileName();
71 File.WriteAllText(tmpFile, html, encoding);
72 return Parse(tmpFile);
73 }
74 }
75}
76
说一个问题,进行parsehtml的时候,是先建立了一个临时文件,想不建立临时文件直接进行,但是不得法,网上有通过makup service进行的,但我发现不好用。不知道大家有没有好的办法。
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Runtime.InteropServices;
6using mshtml;
7using System.Threading;
8using System.Runtime.InteropServices.ComTypes;
9using System.IO;
10
11namespace Eric.Utilities.Html
12{
13 public enum HRESULT
14 {
15 E_FAIL = -2147467259,
16 E_INVALIDARG = -2147024809,
17 E_NOINTERFACE = -2147467262,
18 E_NOTIMPL = -2147467263,
19 E_UNEXPECTED = -2147418113,
20 S_FALSE = 1,
21 S_OK = 0
22 }
23
24 [ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25 public interface IPersist
26 {
27 void GetClassID(ref Guid pClassID);
28 }
29
30 [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31 public interface IPersistStreamInit : IPersist
32 {
33 new void GetClassID(ref Guid pClassID);
34 [PreserveSig]
35 int IsDirty();
36 [PreserveSig]
37 HRESULT Load(IStream pstm);
38 [PreserveSig]
39 HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40 [PreserveSig]
41 HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42 [PreserveSig]
43 HRESULT InitNew();
44 }
45
46 public class HtmlParser
47 {
48 public IHTMLDocument3 Parse(string url)
49 {
50 HTMLDocument objMSHTML = new HTMLDocument();
51 IHTMLDocument2 objMSHTML2;
52 IHTMLDocument3 objMSHTML3;
53
54 IPersistStreamInit objIPS;
55 objIPS = objMSHTML as IPersistStreamInit;
56 objIPS.InitNew();
57 objIPS = null;
58
59 objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60 while (objMSHTML2.readyState != "complete")
61 {
62 Thread.Sleep(1000);
63 }
64 objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65 return objMSHTML3;
66 }
67
68 public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69 {
70 string tmpFile = Path.GetTempFileName();
71 File.WriteAllText(tmpFile, html, encoding);
72 return Parse(tmpFile);
73 }
74 }
75}
76
本文基于署名 2.5 中国大陆许可协议发布,欢迎转载,演绎或用于商业目的,但是必须保留本文的署名小橋流水(包含链接)。如您有任何疑问或者授权方面的协商,请给我发邮件。