C#彻底解决Web Browser 跨域读取Iframes内容

用C# winform的控件web browser 读取网页内容,分析一下数据,做一些采集工作。

如果是同一个域名下面还是好办的,基本上用HtmlAgilityPack就完全可以解决问题。 

但是现在遇到跨域问题,比如我需要打开页面上存在的广告联盟的地址,进行保存。 

这就是牵扯到跨域。 一般的错误是:拒绝访问。

"Access is denied. (Exception from HRESULT: 0x80070005 (E_ACCESSDENIED))"。 

因为你没有在这个网站去修改另一个网站数据的权利。 

怎么办?很困恼吧。现在就告诉大家一个好办法。 

直接上代码了。 

工具类,大家保存成一个类。需要引用mshtml

  1 using System;
  2 using System.Runtime.InteropServices;
  3 using System.Windows.Forms;
  4 using mshtml;
  5 
  6 namespace WebBrowserTest
  7 {
  8 
  9     // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! 
 10 
 11     [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"),
 12 
 13     InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
 14 
 15     public interface IServiceProvider
 16     {
 17 
 18         [return: MarshalAs(UnmanagedType.I4)]
 19 
 20         [PreserveSig]
 21 
 22         int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject);
 23 
 24     }
 25 
 26     public enum OLECMDF
 27     {
 28 
 29         OLECMDF_DEFHIDEONCTXTMENU = 0x20,
 30 
 31         OLECMDF_ENABLED = 2,
 32 
 33         OLECMDF_INVISIBLE = 0x10,
 34 
 35         OLECMDF_LATCHED = 4,
 36 
 37         OLECMDF_NINCHED = 8,
 38 
 39         OLECMDF_SUPPORTED = 1
 40 
 41     }
 42 
 43     public enum OLECMDID
 44     {
 45 
 46         OLECMDID_PAGESETUP = 8,
 47 
 48         OLECMDID_PRINT = 6,
 49 
 50         OLECMDID_PRINTPREVIEW = 7,
 51 
 52         OLECMDID_PROPERTIES = 10,
 53 
 54         OLECMDID_SAVEAS = 4
 55 
 56     }
 57 
 58     public enum OLECMDEXECOPT
 59     {
 60 
 61         OLECMDEXECOPT_DODEFAULT,
 62 
 63         OLECMDEXECOPT_PROMPTUSER,
 64 
 65         OLECMDEXECOPT_DONTPROMPTUSER,
 66 
 67         OLECMDEXECOPT_SHOWHELP
 68 
 69     }
 70 
 71     [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)]
 72 
 73     public interface IWebBrowser2
 74     {
 75 
 76         [DispId(100)]
 77 
 78         void GoBack();
 79 
 80         [DispId(0x65)]
 81 
 82         void GoForward();
 83 
 84         [DispId(0x66)]
 85 
 86         void GoHome();
 87 
 88         [DispId(0x67)]
 89 
 90         void GoSearch();
 91 
 92         [DispId(0x68)]
 93 
 94         void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);
 95 
 96         [DispId(-550)]
 97 
 98         void Refresh();
 99 
100         [DispId(0x69)]
101 
102         void Refresh2([In] ref object level);
103 
104         [DispId(0x6a)]
105 
106         void Stop();
107 
108         [DispId(200)]
109 
110         object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
111 
112         [DispId(0xc9)]
113 
114         object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
115 
116         [DispId(0xca)]
117 
118         object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
119 
120         [DispId(0xcb)]
121 
122         object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
123 
124         [DispId(0xcc)]
125 
126         bool TopLevelContainer { get; }
127 
128         [DispId(0xcd)]
129 
130         string Type { get; }
131 
132         [DispId(0xce)]
133 
134         int Left { get; set; }
135 
136         [DispId(0xcf)]
137 
138         int Top { get; set; }
139 
140         [DispId(0xd0)]
141 
142         int Width { get; set; }
143 
144         [DispId(0xd1)]
145 
146         int Height { get; set; }
147 
148         [DispId(210)]
149 
150         string LocationName { get; }
151 
152         [DispId(0xd3)]
153 
154         string LocationURL { get; }
155 
156         [DispId(0xd4)]
157 
158         bool Busy { get; }
159 
160         [DispId(300)]
161 
162         void Quit();
163 
164         [DispId(0x12d)]
165 
166         void ClientToWindow(out int pcx, out int pcy);
167 
168         [DispId(0x12e)]
169 
170         void PutProperty([In] string property, [In] object vtValue);
171 
172         [DispId(0x12f)]
173 
174         object GetProperty([In] string property);
175 
176         [DispId(0)]
177 
178         string Name { get; }
179 
180         [DispId(-515)]
181 
182         int HWND { get; }
183 
184         [DispId(400)]
185 
186         string FullName { get; }
187 
188         [DispId(0x191)]
189 
190         string Path { get; }
191 
192         [DispId(0x192)]
193 
194         bool Visible { get; set; }
195 
196         [DispId(0x193)]
197 
198         bool StatusBar { get; set; }
199 
200         [DispId(0x194)]
201 
202         string StatusText { get; set; }
203 
204         [DispId(0x195)]
205 
206         int ToolBar { get; set; }
207 
208         [DispId(0x196)]
209 
210         bool MenuBar { get; set; }
211 
212         [DispId(0x197)]
213 
214         bool FullScreen { get; set; }
215 
216         [DispId(500)]
217 
218         void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);
219 
220         [DispId(0x1f5)]
221 
222         OLECMDF QueryStatusWB([In] OLECMDID cmdID);
223 
224         [DispId(0x1f6)]
225 
226         void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut);
227 
228         [DispId(0x1f7)]
229 
230         void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize);
231 
232         [DispId(-525)]
233 
234         WebBrowserReadyState ReadyState { get; }
235 
236         [DispId(550)]
237 
238         bool Offline { get; set; }
239 
240         [DispId(0x227)]
241 
242         bool Silent { get; set; }
243 
244         [DispId(0x228)]
245 
246         bool RegisterAsBrowser { get; set; }
247 
248         [DispId(0x229)]
249 
250         bool RegisterAsDropTarget { get; set; }
251 
252         [DispId(0x22a)]
253 
254         bool TheaterMode { get; set; }
255 
256         [DispId(0x22b)]
257 
258         bool AddressBar { get; set; }
259 
260         [DispId(0x22c)]
261 
262         bool Resizable { get; set; }
263 
264     }
265 
266     class CorssDomainHelper
267     {
268 
269         private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046");
270 
271         private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E");
272 
273         // Utility for IE cross domain access 
274 
275         // Returns null in case of failure. 
276 
277         public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow)
278         {
279 
280             if (htmlWindow == null)
281             {
282                 return null;
283             }
284 
285             // First try the usual way to get the document. 
286 
287             try
288             {
289 
290                 IHTMLDocument2 doc = htmlWindow.document;
291 
292                 return (IHTMLDocument3)doc;
293 
294             }
295 
296             catch (COMException comEx)
297             {
298 
299                 // I think COMException won't be ever fired but just to be sure ... 
300 
301             }
302 
303             catch (UnauthorizedAccessException)
304             {
305 
306             }
307 
308             catch (Exception ex)
309             {
310                 return null;
311             }
312 
313             // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. 
314             // IE tries to prevent a cross frame scripting security issue. 
315 
316             try
317             {
318 
319                 // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. 
320                 IServiceProvider sp = (IServiceProvider)htmlWindow;
321                 // Use IServiceProvider.QueryService to get IWebBrowser2 object. 
322                 Object brws = null;
323                 sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws);
324                 // Get the document from IWebBrowser2. 
325                 IWebBrowser2 browser = (IWebBrowser2)(brws);
326                 return (IHTMLDocument3)browser.Document;
327             }
328 
329             catch (Exception ex)
330             {
331                 Console.WriteLine(ex);
332             }
333             return null;
334         }
335     }
336 }

调用方法:
 1 public void test()
 2         {
 3             WebBrowser browser = new WebBrowser();
 4 
 5             HTMLDocument doc = (HTMLDocument)browser.Document.DomDocument;
 6 
 7             for (int i = 0; i < browser.Document.Window.Frames.Count; i++)
 8             {
 9 
10                 IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(browser.Document.Window.Frames[i].DomWindow
11 
12                     as IHTMLWindow2);
13 
14                 if (baiduDoc != null && baiduDoc.documentElement != null && baiduDoc.documentElement.document != null)
15                 {
16 
17                     IHTMLElementCollection linkss = ((HTMLDocument)(baiduDoc.documentElement.document)).links;
18 
19                     foreach (mshtml.IHTMLElement element in linkss)
20                     {
21 
22                         //加入你的代码就可以了。
23 
24                     }
25                 }
26             }
27         }

原文出自:http://www.cnblogs.com/Leo_wl/p/3181353.html

 

posted @ 2017-08-31 09:35  xueshu  阅读(772)  评论(0编辑  收藏  举报