C#收集网页中的EMail实现源码:
//CAll
private void GetAllURL(string urlStr)
{
new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
... //处理页面中的Link
}
/**//// <summary>
/// 提取网页中的Eamil
/// </summary>
/// <param name="urlStr">网页地址</param>
private void GetEmailAddress(object urlStr)
{
ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"); //得到Email
foreach (object tmp in EmailStrs)
{
Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
}
}
private ArrayList GetWebInfo(string URlStr,string RegExpress)
{
//打开指定页
HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
webRequest1.Method = "GET";
HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();
//用正则表达式,提取指定内容,带一个变量
Regex r;
Match m;
r = new Regex(RegExpress, //@"copyTitle.\'(?<AdInfo>.*)\'",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
int pos1=RegExpress.IndexOf("(?<");
int pos2=RegExpress.IndexOf(">",pos1);
string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
string AdStr = "";
ArrayList Result = new ArrayList();
for (m = r.Match(textData); m.Success; m = m.NextMatch())
{
AdStr = m.Result("${" + DestionKey + "}").Trim(); //地址
Result.Add(AdStr);
}
return Result;
}
上述代码中的关键是书写提取EMail的表达式:
@"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步