C#简单爬虫爬取图片并保存本地
static void Main(string[] args) { var page = 1;//抓取的页数 //抓取网页资源 for (int i = 1; i <= page; i++) { string str = GetHtmlStr($"https://fabiaoqing.com/biaoqing/lists/page/{i}.html", "UTF8"); //匹配图片的正则表达式 string regstr = "http://wx[1-4].sinaimg.cn/bmiddle/.+?.[jg][pi][fg]"; foreach (Match match in Regex.Matches(str, regstr)) //使用正则表达式解析网页文本,获得图片地址 { //下载图片 SaveAsWebImg(match.Value); } } Console.ReadKey(); Console.WriteLine("已执行结束,按任意键退出!"); }
/// <summary> /// 获取网页的HTML码 /// </summary> /// <param name="url">链接地址</param> /// <param name="encoding">编码类型</param> /// <returns></returns> public static string GetHtmlStr(string url, string encoding) { string htmlStr = ""; if (!String.IsNullOrEmpty(url)) { WebRequest request = WebRequest.Create(url); //实例化WebRequest对象 WebResponse response = request.GetResponse(); //创建WebResponse对象 Stream datastream = response.GetResponseStream(); //创建流对象 Encoding ec = Encoding.Default; if (encoding == "UTF8") { ec = Encoding.UTF8; } else if (encoding == "Default") { ec = Encoding.Default; } StreamReader reader = new StreamReader(datastream, ec); htmlStr = reader.ReadToEnd(); //读取数据 reader.Close(); datastream.Close(); response.Close(); } return htmlStr; }
/// <summary> /// 下载网站图片 /// </summary> /// <param name="picUrl"></param> /// <returns></returns> public static string SaveAsWebImg(string picUrl) { string result = ""; //设置保存目录 string path = AppDomain.CurrentDomain.SetupInformation.ApplicationBase + @"/File/"; //不存在目录则创建 if (!Directory.Exists(path)) { //创建目录 Directory.CreateDirectory(path); } try { //判断图片是否为空或者null if (!String.IsNullOrEmpty(picUrl)) { //伪随机数生成器 Random rd = new Random(); //获取当前日期时间 DateTime nowTime = DateTime.Now; //获取URL扩展名 var Extension = Path.GetExtension(picUrl); //自定义文件名 string fileName = nowTime.Month.ToString() + nowTime.Day.ToString() + nowTime.Hour.ToString() + nowTime.Minute.ToString() + nowTime.Second.ToString() + rd.Next(1000, 1000000) + Extension; WebClient webClient = new WebClient(); //下载url链接文件,并指定到本地的文件夹路径和文件名称 webClient.DownloadFile(picUrl, path + fileName); //返回结果 result = fileName; } }catch(Exception ex) { Console.WriteLine(ex.Message); } return result; }