- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.ComponentModel;
- using System.Windows.Forms;
- using System.IO;
- using System.Threading;
- using System.Text.RegularExpressions;
- namespace Yyc.Net
- {
-
-
-
- public class PageSnatch
- {
- #region field
- private WebBrowser browser;
-
-
-
- private int timeout;
- private const int defaultTimeout = 5 * 1000;
- #endregion
- #region ctor
- public PageSnatch()
- {
- this.IsBusy = false;
- this.timeout = defaultTimeout;
- }
- public PageSnatch(string url)
- : this()
- {
- this.Url = url;
- }
- public PageSnatch(string url, int timeout)
- : this(url)
- {
- this.timeout = timeout;
- }
- #endregion
- #region event
-
-
-
- public event SnatchingEventHandler Snatching;
-
-
-
- public event SnatchedEventHandler Snatched;
-
-
-
- public event SnatchCompletedEventHandler SnatchCompleted;
- #endregion
- #region property
-
-
-
- public bool Cancel { set; get; }
-
-
-
- public bool IsBusy { private set; get; }
-
-
-
-
- private bool IsValidate
- {
- get { return Regex.IsMatch(Url, @"http(s)?://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?"); }
- }
- private string OuterHtml
- {
- get
- {
- string text = string.Empty;
- this.Execute(delegate()
- {
- text = browser.Document.Body.OuterHtml;
- });
- return text;
- }
- }
-
-
-
- public int Timeout
- {
- get { return this.timeout; }
- set
- {
- if (value > defaultTimeout) this.timeout = value;
- }
- }
-
-
-
- public string Url { set; get; }
- #endregion
- #region methods
-
-
-
- private void Dispose()
- {
- this.Execute(delegate()
- {
- browser.Stop();
- browser.Dispose();
- browser = null;
- });
- }
-
-
-
-
- private void Execute(BrowserEventHandler browserEventHanler)
- {
- if (this.browser != null)
- {
- this.browser.Invoke(browserEventHanler);
- }
- }
-
-
-
- public void Navigate()
- {
- this.Navigate(DBNull.Value);
- }
-
-
-
-
- public void Navigate(string url)
- {
- this.Url = url;
- this.Navigate(DBNull.Value);
- }
-
-
-
-
- public void Navigate(object argument)
- {
- if (this.IsBusy) throw new Exception("This document is busy!");
- if (!this.IsValidate) throw new Exception("This url is wrong!");
- int interval = 500;
- this.IsBusy = true;
- bool completed = false;
- SnatchCompletedEventArgs scea = new SnatchCompletedEventArgs();
- scea.Argument = argument;
- try
- {
- this.browser = new WebBrowser();
- this.browser.ScriptErrorsSuppressed = false;
- this.browser.Navigated += delegate(object sender, WebBrowserNavigatedEventArgs e)
- {
- if (this.Snatching != null)
- {
- SnatchingEventArgs sea = new SnatchingEventArgs();
- sea.Argument = argument;
- sea.Url = e.Url;
- this.Snatching(this, sea);
- }
- };
- this.browser.DocumentCompleted += delegate(object sender, WebBrowserDocumentCompletedEventArgs e)
- {
- scea.Url = e.Url;
- scea.Text = browser.Document.Body.OuterHtml;
- string url0 = browser.Document.Url.ToString();
- completed = url0.Equals(e.Url.ToString());
- if (this.Snatched != null)
- {
- SnatchedEventArgs sea = new SnatchedEventArgs();
- sea.Url = e.Url;
- this.Snatched(this, sea);
- }
- };
- this.browser.Navigate(Url);
- BackgroundWorker worker = new BackgroundWorker();
- worker.DoWork += delegate(object obj, DoWorkEventArgs dow)
- {
- while (!completed && !Cancel)
- {
- Application.DoEvents();
- System.Threading.Thread.Sleep(interval);
- }
- int count = 6;
- int index = 0;
- int length = 0;
- DateTime startTime = DateTime.Now;
- while (this.IsBusy && !Cancel)
- {
- System.Threading.Thread.Sleep(interval);
- double t = Math.Ceiling((DateTime.Now - startTime).TotalMilliseconds);
- if (t >= this.Timeout)
- {
- scea.Error = new Exception("Visiting about new exception delay, since the setting is timeout");
- break;
- }
- this.Execute(delegate() { this.IsBusy = !browser.IsBusy; });
- if (!IsBusy)
- {
- this.IsBusy = true;
- int len = this.OuterHtml.Length;
- if (len == length) { index++; }
- else { index = 0; length = len; }
- if (index == count) { this.IsBusy = false; }
- }
- length = this.OuterHtml.Length;
- }
- if (!Cancel)
- {
- if (SnatchCompleted != null)
- {
- scea.TextAsync = this.OuterHtml;
- scea.Timeout = (int)Math.Ceiling((DateTime.Now - startTime).TotalMilliseconds);
- SnatchCompleted(this, scea);
- }
- }
- Dispose();
- GC.Collect();
- GC.WaitForPendingFinalizers();
- };
- worker.RunWorkerAsync();
- }
- catch (Exception ex) { throw ex; }
- }
-
-
-
-
-
- public void Navigate(string url, object argument)
- {
- this.Url = url;
- this.Navigate(argument);
- }
-
-
-
-
-
-
- public void Navigate(string url, object argument, int timeout)
- {
- this.Url = url;
- this.timeout = timeout;
- this.Navigate(argument);
- }
- #endregion
- }
- #region delegate
-
-
-
- delegate void BrowserEventHandler();
-
-
-
-
-
- public delegate void SnatchingEventHandler(object sender, SnatchingEventArgs e);
-
-
-
-
-
- public delegate void SnatchedEventHandler(object sender, SnatchedEventArgs e);
-
-
-
-
-
- public delegate void SnatchCompletedEventHandler(object sender, SnatchCompletedEventArgs e);
- #endregion
- #region model
-
-
-
- public class SnatchingEventArgs
- {
-
-
-
- public object Argument { set; get; }
-
-
-
- public Uri Url { set; get; }
- }
-
-
-
- public class SnatchedEventArgs
- {
-
-
-
- public Uri Url { set; get; }
- }
-
-
-
- public class SnatchCompletedEventArgs
- {
-
-
-
- public string Text { set; get; }
-
-
-
- public string TextAsync { set; get; }
-
-
-
- public Exception Error { set; get; }
-
-
-
- public int Timeout { set; get; }
-
-
-
- public object Argument { set; get; }
-
-
-
- public Uri Url { set; get; }
- }
- #endregion
- }
-
- 调用如下:
- var p = new Yyc.Net.PageSnatch();
- p.Timeout = 20000;
- p.Url = "http://www.ignaccount.com/buywowusaccount.html";
- p.SnatchCompleted += new Yyc.Net.SnatchCompletedEventHandler(p_SnatchCompleted);
- p.Navigate();
posted @
2011-12-22 10:16
Ruiky
阅读(
8490)
评论()
编辑
收藏
举报