asp.net使用WebBrowser采集加载完毕后的页面
1.工具类代码:(代码可以自己整理下,这里重点在实现方式)
using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Threading; using System.Windows.Forms; /// <summary> /// Summary description for CustomBrowser /// </summary> public class CustomBrowser { public CustomBrowser() { // // TODO: Add constructor logic here // } protected string _url; string html = ""; public string GetWebpage(string url) { _url = url; // WebBrowser is an ActiveX control that must be run in a // single-threaded apartment so create a thread to create the // control and generate the thumbnail Thread thread = new Thread(new ThreadStart(GetWebPageWorker)); thread.SetApartmentState(ApartmentState.STA); thread.Start(); thread.Join(); string s = html; return s; } protected void GetWebPageWorker() { try { var browser = new WebBrowser { ScrollBarsEnabled = false, ScriptErrorsSuppressed = true }; browser.BringToFront(); html = NavigateAndWaitForLoad(browser, new Uri(_url), 0); } catch (Exception ex) {} } private string NavigateAndWaitForLoad(WebBrowser browser, Uri uri, int waitTime) { const int sleepTimeMiliseconds = 5000; browser.Navigate(uri); var count = 0; while (browser.ReadyState != WebBrowserReadyState.Complete) { Thread.Sleep(sleepTimeMiliseconds); Application.DoEvents(); count++; if (count > waitTime / sleepTimeMiliseconds) { break; } } while (browser.Document.Body == null) { Application.DoEvents(); } return browser.Document.Body.OuterHtml.ToString(); } }
2.调用方法:
new CustomBrowser().GetWebpage("http://www.baidu.com");
3.写在最后:
asp.net使用多线程如果操作不好,容易造成iis崩溃
asp.net中调用winform的WebBrowser,以上代码仅仅是能实现功能,至于能否满足高并发场景需求,需要实际测试下。
欢迎大家多多交流