邮箱正则匹配、获取网页内容
1 /// <summary> 2 /// 取得内容中所有匹配的 Email地址。 3 /// </summary> 4 /// <param name="sHtmlText">内容</param> 5 /// <returns>匹配列表</returns> 6 public static string[] GetContentList(string sHtmlText) 7 { 8 //string pattern = @"<div class=[^<>]+>(?<imgUrl>[^<>]+)</div>"; 9 //邮箱匹配正则 10 string pattern = @"(?<Email>(?("")(""[^""]+?""@)|(([0-9a-z]((\.(?!\.))|[-!#\$%&'\*\+/=\?\^`\{\}\|~\w])*)(?<=[0-9a-z])@))(?(\[)(\[(\d{1,3}\.){3}\d{1,3}\])|(([0-9a-z][-\w]*[0-9a-z]*\.)+[a-z0-9]{2,17})))"; 11 Regex regImg = new Regex(pattern, RegexOptions.IgnoreCase); 12 // 搜索匹配的字符串 13 MatchCollection matches = regImg.Matches(sHtmlText); 14 int i = 0; 15 string[] sUrlList = new string[matches.Count]; 16 // 取得匹配项列表 17 foreach (Match match in matches) 18 sUrlList[i++] = match.Groups["Email"].Value; 19 return sUrlList; 20 }
//获取网页内容 传递url地址
private string GetWebRequest(string url) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream resstream = response.GetResponseStream(); StreamReader srbname = new StreamReader(resstream, System.Text.Encoding.GetEncoding("UTF-8")); return srbname.ReadToEnd(); }