代码改变世界

抓取页面超链接

2012-08-15 18:04  C#与.NET探索者  阅读(259)  评论(0编辑  收藏  举报

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;

namespace 抓取页面超链接
{
    class Program
    {
        static void Main(string[] args)
        {
            WebClient client = new WebClient();
            client.Encoding = Encoding.UTF8;
            string html = client.DownloadString("http://localhost:8080/test1.htm");
            //正则表达式提取字符串
            MatchCollection matches = Regex.Matches(html, "<[aA].*?\\s*href=\".+?\".*?>.+?</[aA]>");
            for (int i = 0; i < matches.Count; i++)
            {
                if (matches[i].Success)
                {
                    Console.WriteLine(matches[i].Value);
                    Console.WriteLine("========================================");
                }
            }
            Console.ReadKey();
        }
    }
}