20131127-正则表达式
[1]从网页上下载图片
namespace 下载图片
{
class Program
{
static void Main(string[] args)
{
WebClient wc = new WebClient();
wc.Encoding = Encoding.Default;
string webAddress = "http://localhost:8080/美女图片/美女们.htm";
string strRegex = " <img alt=\"\" src=\"(.+)\" />";
string path = "E:\\test\\";
MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);
foreach (Match item in mat)
{
if (item.Success)
{
//拼接出图片在网络上的的真实路径
string realImgAddress = "http://localhost:8080/美女图片/" + item.Groups[1].Value;
string localpath = path + Path.GetFileName(item.Groups[1].Value);
wc.DownloadFile(realImgAddress, localpath);
}
}
Console.WriteLine("OK");
Console.ReadKey();
}
public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)
{
//获得图片所在网页的html的字符串
string html = wc.DownloadString(webAddress);
//通过匹配获得网页标签中图片的链接地址
Regex regex = new Regex(strRegex);
MatchCollection mat = regex.Matches(html);
return mat;
}
}
}
[2]从网页提取邮箱
namespace 网页提取邮箱2
{
class Program
{
static void Main(string[] args)
{
WebClient wc = new WebClient();
wc.Encoding = Encoding.Default;
string webAddress = "http://localhost:8080/提取Email.htm";
string strRegex = @"([0-9a-zA-Z_.-]+)@([0-9a-zA-Z-]+(\.[a-zA-Z]+){1,2})";
MatchCollection mat = GetWebInfo(wc, webAddress, strRegex);
foreach (Match item in mat)
{
if (item.Success)
{
Console.WriteLine(item.Value+"===用户名为:"+item.Groups[1].Value+"===域名为:"+item.Groups[2].Value);
}
}
Console.WriteLine("一共有{0}个", mat.Count);
Console.ReadKey();
}
public static MatchCollection GetWebInfo(WebClient wc, string webAddress, string strRegex)
{
string html = wc.DownloadString(webAddress);
Regex regex = new Regex(strRegex);
MatchCollection mat = regex.Matches(html);
return mat;
}
}
}