C# 爬虫总结

static void Main(string[] args)
{
    //WebRequest request = WebRequest.Create("http://www.cnblogs.com/mr-wuxiansheng/p/8353615.html");
    //WebResponse response = (WebResponse)request.GetResponse();
    //Stream dataStream = response.GetResponseStream();
    //StreamReader reader = new StreamReader(dataStream, Encoding.Default);
    //string text = reader.ReadToEnd();
    //Regex reg = new Regex(@"http(s)?://([\w-]+\.)+[\w-]+/?"); //要获取该网页上的所有超链接
    //string wangzhanyuming = reg.Match(text, 0).Value;
    //string[] strarr = text.Split('w');
    //Random r = new Random();
    //int ss = r.Next(0, strarr.Count());
    //string name = strarr[ss].ToString();
    //Console.WriteLine(text);
    //string text = "1A 2B 3C 4D 5E 6F 7G 8H 9I 10J 11Q 12J 13K 14L 15M 16N ffee80 #800080";
    //Regex rgx = new Regex(@"((\d+)([a-z]))\s+", RegexOptions.IgnoreCase);
    //MatchCollection mm = rgx.Matches(text);
    //string x = mm[5].Groups[2].Captures[0].Value; //x为第六个集合 第二组 的值 6
    //Console.ReadKey();
    WebClient wc = new WebClient();
    wc.Encoding = Encoding.UTF8;
    string html = wc.DownloadString("http://www.lagou.com/");
    MatchCollection matches = Regex.Matches(html, "<a.*jobs.*>(.*)</a>");
    foreach(Match item in matches)
    {
        Console.WriteLine(item.Groups[0].Value);
    }
    Console.WriteLine(matches.Count);
    Console.ReadKey();
}

 

posted @ 2018-02-01 12:50  ProZkb  阅读(314)  评论(0编辑  收藏  举报