关于正则表达式匹配无异常资源耗尽的解决方案

在c#中使用正则表达式进行匹配,有时候我们会遇到这种情况,cpu使用率100%,但是正则表达式并没有异常抛出,正则一直处于匹配过程中,这将导致系统资源被耗尽,应用程序被卡住,这是由于正则不完全匹配,而且Regex中没有Timeout属性,使正则处理器陷入了死循环。
   这种情况尤其可能发生在对非可靠的被匹配对象的匹配过程中,例如在我的个人网站www.eahan.com项目中,对多个网站页面的自动采集匹配,就经常发生该问题。为了避免资源耗尽的情况发生,我写了一个AsynchronousRegex类,顾名思义,异步的Regex。给该类一个设置一个Timeout属性,将Regex匹配的动作置于单独的线程中,AsynchronousRegex监控Regex匹配超过Timeout限定时销毁线程。
  using System;
  using System.Text.RegularExpressions;
  using System.Threading;
  namespace LZT.Eahan.Common
  {
   public class AsynchronousRegex
   {
   private MatchCollection mc;
   private int _timeout; // 最长休眠时间(超时),毫秒
   private int sleepCounter;
   private int sleepInterval; // 休眠间隔,毫秒
   private bool _isTimeout;
   public bool IsTimeout
   {
   get {return this._isTimeout;}
   }
   public AsynchronousRegex(int timeout)
   {
   this._timeout = timeout;
   this.sleepCounter = 0;
   this.sleepInterval = 100;
   this._isTimeout = false;
   this.mc = null;
   }
   public MatchCollection Matchs(Regex regex, string input)
   {
   Reg r = new Reg(regex, input);
   r.OnMatchComplete += new Reg.MatchCompleteHandler(this.MatchCompleteHandler);
   
   Thread t = new Thread(new ThreadStart(r.Matchs));
   t.Start();
   this.Sleep(t);
   t = null;
   return mc;
   }
   private void Sleep(Thread t)
   {
   if (t != null && t.IsAlive)
   {
   Thread.Sleep(TimeSpan.FromMilliseconds(this.sleepInterval));
   this.sleepCounter ++;
   if (this.sleepCounter * this.sleepInterval >= this._timeout)
   {
   t.Abort();
   this._isTimeout = true;
   }
   else
   {
   this.Sleep(t);
   }
   }
   }
   private void MatchCompleteHandler(MatchCollection mc)
   {
   this.mc = mc;
   }
   class Reg
   {
   internal delegate void MatchCompleteHandler(MatchCollection mc);
   internal event MatchCompleteHandler OnMatchComplete;
   public Reg(Regex regex, string input)
   {
   this._regex = regex;
   this._input = input;
   }
   private string _input;
   public string Input
   {
   get {return this._input;}
   set {this._input = value;}
   }
   private Regex _regex;
   public Regex Regex
   {
   get {return this._regex;}
   set {this._regex = value;}
   }
   internal void Matchs()
   {
   MatchCollection mc = this._regex.Matches(this._input);
   if (mc != null && mc.Count > 0) // 这里有可能造成cpu资源耗尽
   {
   this.OnMatchComplete(mc);
   }
   }
   }
   }
  }

posted on 2010-12-24 11:07  Ron  阅读(747)  评论(0编辑  收藏  举报

导航