抓取网页并用正则表达式匹配邮箱地址
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Net; using System.IO; namespace _07正则_匹配邮箱 { class Program { static void Main(string[] args) { List<Uri> listUrl = new List<Uri>() { new Uri("http://gb.corp.163.com/gb/contactus.html"), new Uri("https://passport.csdn.net/help/faq"), new Uri("http://www.kuaipan.cn/"), new Uri("http://www.ksyun.com/home/joinUs/campus"), new Uri("http://www.cnblogs.com/about/ad.aspx"), new Uri("http://www.cnblogs.com/about/contactus.aspx"), new Uri("http://www.csdn.net/company/statement.html"), new Uri("http://hb.qq.com/job/dczp/index.htm") }; List<string> listMail = new List<string>(); foreach (Uri ur in listUrl) { GetMails(ur, listMail); } cw(listMail); Console.ReadKey(); } private static void GetMails(Uri uri,List<string> list) { try { WebClient wc = new WebClient(); Console.WriteLine("创建WebClient - [{0}]", uri.ToString()); Stream stream = wc.OpenRead(uri); //Console.WriteLine("正在下载:{0}", uri.ToString()); StreamReader reader = new StreamReader(stream, Encoding.Default); string input = reader.ReadToEnd(); string reg = @"(?<mail1>[a-zA-Z0-9_]+@[a-zA-Z0-9]+(?:\.[a-zA-Z0-9]+)+)" //zhangsan@163.com + @"|((?<mail2>[a-zA-Z0-9_]+#[a-zA-Z0-9]+(?:\.[a-zA-Z0-9]+)+))" //zhangsan#163.com + @"|((?<mail3>[a-zA-Z0-9_]+\(at\)[a-zA-Z0-9]+(?:\.[a-zA-Z0-9]+)+))";//zhangsan(at)163.com Regex regex = new Regex(reg); Console.WriteLine(Regex.IsMatch(input, reg)); MatchCollection matches = regex.Matches(input); for (int i = 0; i < matches.Count; i++) { Match match = matches[i]; //Console.WriteLine("match: {0}",match.Value); //Console.WriteLine(match.Groups.Count); for (int j = 1; j < match.Groups.Count; j++) { string mail = match.Groups[j].Value; if (!string.IsNullOrEmpty(mail)) { mail = Regex.Replace(mail, @"(.+)(?:@)(.+)", "$1@$2"); mail = Regex.Replace(mail, "(.+)#(.+)", "$1@$2"); //把zhangsan#163.com替换为zhangsan@163.com mail = Regex.Replace(mail, @"(.+)\(at\)(.+)", "$1@$2"); if (!list.Contains(mail)) { list.Add(mail); } } //Console.WriteLine("group: {0}", match.Groups[j].Value); } } } catch (Exception e) { Console.WriteLine(e.Message); } } static void cw(List<string> list) { Console.WriteLine("长度为{0}", list.Count); int i = 0; foreach (string str in list) { i++; Console.WriteLine("{0} - [{1}]", i, str); } Console.WriteLine("______________________"); } } }
如果你觉得这篇文章对你有帮助或者使你有所启发,请点击右下角的推荐按钮,谢谢,:)