简单的C#爬虫

Posted on 2017-10-26 14:35  #大囚长#  阅读(195)  评论(0编辑  收藏  举报
using System;  
using System.Collections.Generic;  
using System.IO;  
using System.Linq;  
using System.Net;  
using System.Text;  
using System.Text.RegularExpressions;  
using System.Threading.Tasks;  
  
namespace _2015._5._23通过WebClient类发起请求并下载html  
{  
    class Program  
    {  
        static void Main(string[] args)  
        {  
            #region 抓取网页邮箱  
            //string url = "http://zhidao.baidu.com/link?url=cvF0de2o9gkmk3zW2jY23TLEUs6wX-79E1DQVZG7qaBhEVT_xlh6TO7p0W4qwuAZ_InLymC_-mJBBcpdbzTeq_";  
            //WebClient wc = new WebClient();  
            //wc.Encoding = Encoding.UTF8;  
            //string str = wc.DownloadString(url);  
            //MatchCollection matchs=  Regex.Matches(str,@"\w+@([-\w])+([\.\w])+",RegexOptions.ECMAScript);  
            //foreach (Match item in matchs)  
            //{  
            //    Console.WriteLine(item.Value);  
            //}  
            //Console.WriteLine(matchs.Count);  
            #endregion   
 
            #region 抓取网页图片  
  
            //WebClient wc = new WebClient();  
            //wc.Encoding = Encoding.UTF8;  
            ////下载源网页代码  
            //string html = wc.DownloadString("http://dongxi.douban.com/?dcs=top-nav&dcm=douban");  
            //MatchCollection matches= Regex.Matches(html,"<img.*src=\"(.+?)\".*>");  
            //foreach (Match item in matches)  
            //{  
            //    //下载图片到指定路径  
            //    wc.DownloadFile(item.Groups[1].Value,@"c:\mv\"+Path.GetFileName(item.Groups[1].Value));  
            //}  
            //Console.WriteLine(matches.Count);  
 
            #endregion 爬一些信息  
  
            WebClient wc = new WebClient();  
            wc.Encoding = Encoding.UTF8;  
            string html = wc.DownloadString("http://www.lagou.com/");  
  
            MatchCollection matches= Regex.Matches(html,"<a.*jobs.*>(.*)</a>");  
            foreach (Match item in matches)  
            {  
                Console.WriteLine(item.Groups[1].Value);  
            }  
            Console.WriteLine(matches.Count);  
            Console.ReadKey();                                    
        }  
    }  
}