C# 爬取图片
网络收集整理 爬取图片
引用AngleSharp NuGet 包
using AngleSharp; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; namespace CoreConsoleApp { public class Program { public static void Main(string[] args) { // 设置配置以支持文档加载 var config = Configuration.Default.WithDefaultLoader(); int pageIndex = 50; for (int i = 1; i < pageIndex; i++) { // url地址 //var address = "https://www.qwe.com"; var address = @"https://www.qwe.com?pageIndex=" + i; // 请求网页 var document = BrowsingContext.New(config).OpenAsync(address); // 根据class获取html元素 var cells = document.Result.QuerySelectorAll(".panel-body li"); var fileName = (i + " - " + document.Result.Title).Replace("|", ""); foreach (var item in cells) { //var belle = new Belle //{ // Title = item.QuerySelector("img").GetAttribute("title"), // ImageUrl = item.QuerySelector("img").GetAttribute("src") //}; var imageUrl = item.QuerySelector("img").GetAttribute("src"); //string str = DateTime.Now.ToString("yyyyMMddHHmmss"); string localPath = "D:\\Image\\" + DateTime.Now.ToString("yyyMMdd") + "\\" + fileName; List<string> arr = imageUrl.Split('/').ToList(); if (!Directory.Exists(localPath)) { // Create the directory it does not exist. Directory.CreateDirectory(localPath); } string filepath = localPath + "\\" + arr.Last(); //方法一 //Download(imageUrl, filepath); //方法二 WebClient mywebclient = new WebClient(); mywebclient.DownloadFile(imageUrl, filepath); } } Console.ReadLine(); } /// <summary> /// Http方式下载文件 /// </summary> /// <param name="url">http地址</param> /// <param name="localfile">本地文件</param> /// <returns></returns> public static bool Download(string url, string localfile) { bool flag = false; long startPosition = 0; // 上次下载的文件起始位置 FileStream writeStream; // 写入本地文件流对象 long remoteFileLength = GetHttpLength(url);// 取得远程文件长度 System.Console.WriteLine("remoteFileLength=" + remoteFileLength); if (remoteFileLength == 745) { System.Console.WriteLine("远程文件不存在."); return false; } // 判断要下载的文件夹是否存在 if (File.Exists(localfile)) { writeStream = File.OpenWrite(localfile); // 存在则打开要下载的文件 startPosition = writeStream.Length; // 获取已经下载的长度 if (startPosition >= remoteFileLength) { System.Console.WriteLine("本地文件长度" + startPosition + "已经大于等于远程文件长度" + remoteFileLength); writeStream.Close(); return false; } else { writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件写入位置定位 } } else { writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存创建一个文件 startPosition = 0; } try { HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接 if (startPosition > 0) { myRequest.AddRange((int)startPosition);// 设置Range值,与上面的writeStream.Seek用意相同,是为了定义远程文件读取位置 } Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服务器请求,获得服务器的回应数据流 byte[] btArray = new byte[512];// 定义一个字节数据,用来向readStream读取内容和向writeStream写入内容 int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向远程文件读第一次 long currPostion = startPosition; while (contentSize > 0)// 如果读取长度大于零则继续读 { currPostion += contentSize; int percent = (int)(currPostion * 100 / remoteFileLength); System.Console.WriteLine("percent=" + percent + "%"); writeStream.Write(btArray, 0, contentSize);// 写入本地文件 contentSize = readStream.Read(btArray, 0, btArray.Length);// 继续向远程文件读取 } //关闭流 writeStream.Close(); readStream.Close(); flag = true; //返回true下载成功 } catch (Exception) { writeStream.Close(); flag = false; //返回false下载失败 } return flag; } // 从文件头得到远程文件的长度 private static long GetHttpLength(string url) { long length = 0; try { HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接 HttpWebResponse rsp = (HttpWebResponse)req.GetResponse(); if (rsp.StatusCode == HttpStatusCode.OK) { length = rsp.ContentLength;// 从文件头得到远程文件的长度 } rsp.Close(); return length; } catch (Exception e) { return length; } } } }