这里是我的空间,是我用来记录点滴的沃土...

抓取网页Email地址

 public class GetWebEmail
    {
        //抓取网页源代码
        public static List<string> GetHtmlAndEmail(string url)
        {
            //抓取网页内容
            string ContentHtml = String.Empty;

            HttpWebRequest httpWebRequest = null;
            HttpWebResponse httpWebResponse = null;
            Stream stream = null;
            StreamReader sr = null;

            httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
            httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
            stream = httpWebResponse.GetResponseStream();
            Encoding encoding = Encoding.Default;
            sr = new StreamReader(stream, encoding);
            ContentHtml = sr.ReadToEnd();

            //将读取出来的全部URL写入文本文件 
            string fileName = HttpContext.Current.Server.MapPath(@"~/temp/EmailText.txt");//创建文本文档
            StreamWriter sw = File.AppendText(fileName);//创建写入流,这里是以追加的模式就行的


            //用正则表达式识别Email地址
            Regex EmailRegex = new Regex(@"\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*", RegexOptions.IgnoreCase | RegexOptions.Compiled);
            MatchCollection matches = EmailRegex.Matches(ContentHtml);
            List<string> list = new List<string>();
            foreach (Match match in matches)
            {
                list.Add(match.Value.ToString());  //将数据添加到list
                sw.WriteLine(match.Value.ToString());//将数据写入文件
            }
           

            sw.Close();
            sr.Close();
            stream.Close();
            httpWebResponse.Close();
            return list;
        }
    }

//今天突然想起来实现抓取网页中的Email地址,这样可以去收集网络中的地址去打些广告不过这不是我的初衷,我只是突发奇想,想实现这个功能罢 了,以上类是我实现的抓取网页Email地址的方法,不过还没有将发送邮件的程序和该程序做练习,以前写过发送邮件的程序,有兴趣可以在我空间的日志中取 查找!有不对的地方请高手支出,共同提高……

posted @ 2011-05-06 17:32  wsx2miao  Views(1097)  Comments(0Edit  收藏  举报
这里是我的空间,是我用来记录点滴的沃土...