邮箱正则匹配、获取网页内容

 1         /// <summary>
 2         /// 取得内容中所有匹配的 Email地址。
 3         /// </summary>
 4         /// <param name="sHtmlText">内容</param>
 5         /// <returns>匹配列表</returns>
 6         public static string[] GetContentList(string sHtmlText)
 7         {
 8             //string pattern = @"<div class=[^<>]+>(?<imgUrl>[^<>]+)</div>";
 9             //邮箱匹配正则
10             string pattern = @"(?<Email>(?("")(""[^""]+?""@)|(([0-9a-z]((\.(?!\.))|[-!#\$%&'\*\+/=\?\^`\{\}\|~\w])*)(?<=[0-9a-z])@))(?(\[)(\[(\d{1,3}\.){3}\d{1,3}\])|(([0-9a-z][-\w]*[0-9a-z]*\.)+[a-z0-9]{2,17})))";
11             Regex regImg = new Regex(pattern, RegexOptions.IgnoreCase);
12             // 搜索匹配的字符串            
13             MatchCollection matches = regImg.Matches(sHtmlText);
14             int i = 0;
15             string[] sUrlList = new string[matches.Count];
16             // 取得匹配项列表
17             foreach (Match match in matches)
18                 sUrlList[i++] = match.Groups["Email"].Value;
19             return sUrlList;
20         }
        //获取网页内容 传递url地址
private string GetWebRequest(string url) { WebRequest request = WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream resstream = response.GetResponseStream(); StreamReader srbname = new StreamReader(resstream, System.Text.Encoding.GetEncoding("UTF-8")); return srbname.ReadToEnd(); }

 

posted @ 2013-04-07 10:48  寂小魔  阅读(235)  评论(0编辑  收藏  举报