C# 爬取图片

网络收集整理  爬取图片 

引用AngleSharp  NuGet 包

using AngleSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;

namespace CoreConsoleApp
{
    public class Program
    {
        public static void Main(string[] args)
        {
            // 设置配置以支持文档加载
            var config = Configuration.Default.WithDefaultLoader();
            int pageIndex = 50;
            for (int i = 1; i < pageIndex; i++)
            {
                // url地址
                //var address = "https://www.qwe.com";
                var address = @"https://www.qwe.com?pageIndex=" + i;
                // 请求网页
                var document = BrowsingContext.New(config).OpenAsync(address);
                // 根据class获取html元素
                var cells = document.Result.QuerySelectorAll(".panel-body li");
                var fileName = (i + " - " + document.Result.Title).Replace("|", "");
                foreach (var item in cells)
                {
                    //var belle = new Belle
                    //{
                    //    Title = item.QuerySelector("img").GetAttribute("title"),
                    //    ImageUrl = item.QuerySelector("img").GetAttribute("src")
                    //};
                    var imageUrl = item.QuerySelector("img").GetAttribute("src");
                    //string str = DateTime.Now.ToString("yyyyMMddHHmmss");
                    string localPath = "D:\\Image\\" + DateTime.Now.ToString("yyyMMdd") + "\\" + fileName;
                    List<string> arr = imageUrl.Split('/').ToList();
                    if (!Directory.Exists(localPath))
                    {
                        // Create the directory it does not exist.
                        Directory.CreateDirectory(localPath);
                    }
                    string filepath = localPath + "\\" + arr.Last();

                    //方法一
                    //Download(imageUrl, filepath);

                    //方法二
                    WebClient mywebclient = new WebClient();
                    mywebclient.DownloadFile(imageUrl, filepath);
                }
            }

            Console.ReadLine();
        }

        /// <summary>
        /// Http方式下载文件
        /// </summary>
        /// <param name="url">http地址</param>
        /// <param name="localfile">本地文件</param>
        /// <returns></returns>
        public static bool Download(string url, string localfile)
        {
            bool flag = false;
            long startPosition = 0; // 上次下载的文件起始位置
            FileStream writeStream; // 写入本地文件流对象

            long remoteFileLength = GetHttpLength(url);// 取得远程文件长度
            System.Console.WriteLine("remoteFileLength=" + remoteFileLength);
            if (remoteFileLength == 745)
            {
                System.Console.WriteLine("远程文件不存在.");
                return false;
            }

            // 判断要下载的文件夹是否存在
            if (File.Exists(localfile))
            {

                writeStream = File.OpenWrite(localfile);             // 存在则打开要下载的文件
                startPosition = writeStream.Length;                  // 获取已经下载的长度

                if (startPosition >= remoteFileLength)
                {
                    System.Console.WriteLine("本地文件长度" + startPosition + "已经大于等于远程文件长度" + remoteFileLength);
                    writeStream.Close();

                    return false;
                }
                else
                {
                    writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件写入位置定位
                }
            }
            else
            {
                writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存创建一个文件
                startPosition = 0;
            }


            try
            {
                HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接

                if (startPosition > 0)
                {
                    myRequest.AddRange((int)startPosition);// 设置Range值,与上面的writeStream.Seek用意相同,是为了定义远程文件读取位置
                }


                Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服务器请求,获得服务器的回应数据流


                byte[] btArray = new byte[512];// 定义一个字节数据,用来向readStream读取内容和向writeStream写入内容
                int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向远程文件读第一次

                long currPostion = startPosition;

                while (contentSize > 0)// 如果读取长度大于零则继续读
                {
                    currPostion += contentSize;
                    int percent = (int)(currPostion * 100 / remoteFileLength);
                    System.Console.WriteLine("percent=" + percent + "%");

                    writeStream.Write(btArray, 0, contentSize);// 写入本地文件
                    contentSize = readStream.Read(btArray, 0, btArray.Length);// 继续向远程文件读取
                }

                //关闭流
                writeStream.Close();
                readStream.Close();

                flag = true;        //返回true下载成功
            }
            catch (Exception)
            {
                writeStream.Close();
                flag = false;       //返回false下载失败
            }

            return flag;
        }

        // 从文件头得到远程文件的长度
        private static long GetHttpLength(string url)
        {
            long length = 0;

            try
            {
                HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);// 打开网络连接
                HttpWebResponse rsp = (HttpWebResponse)req.GetResponse();

                if (rsp.StatusCode == HttpStatusCode.OK)
                {
                    length = rsp.ContentLength;// 从文件头得到远程文件的长度
                }

                rsp.Close();
                return length;
            }
            catch (Exception e)
            {
                return length;
            }

        }

    }
}

  

posted @ 2018-11-23 14:56  杜子烟  阅读(1314)  评论(0编辑  收藏  举报