教你怎么使用python批量下载图片

教你怎么使用python批量下载图片


前言

通过调用该函数批量下载图片,并下载到指定位置。


一、运行环境

1. win10
2. python==3.7.2

二、需要用到的参数

1. downloadUrl: 下载的图片链接或者列表
2. saveImagePath: 保存路径
3. headers: 自定义头部信息
4. proxies: 自定义代理

三、请求链接并保存

1. 生成headers
headers = {
    'User-Agent': agent.random,
    'Referer': f'{downloadUrlParse.scheme}://{downloadUrlParse.netloc}',
    'Host': downloadUrlParse.netloc,
}
2. 请求链接,并设置headers、proxies、超时时间
response = requests.get(downloadUrl, headers=headers, timeout=20, proxies=proxies).content
3. 保存
with open(newSaveImagePath, 'wb') as f:
    f.write(response)

四、完整代码

import requests, logging, time, os
from urllib.parse import *
from fake_useragent import UserAgent

# 日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(funcName)s -> %(message)s')


def Download_Image(
        downloadUrl: str or list, saveImagePath: str, headers: dict = None, proxies: dict = None
) -> bool or str:
    """
    1、下载图片

    :param downloadUrl: 下载的图片链接或者列表
    :param saveImagePath: 保存路径
    :param headers: 自定义头部信息
    :param proxies: 自定义代理
    :return:
    """

    agent = UserAgent()
    if isinstance(downloadUrl, str):
        downloadUrlParse = urlparse(downloadUrl)
        if headers is None:
            headers = {
                'User-Agent': agent.random,
                'Referer': f'{downloadUrlParse.scheme}://{downloadUrlParse.netloc}',
                'Host': downloadUrlParse.netloc,
            }

        # 下载
        try:
            response = requests.get(downloadUrl, headers=headers, timeout=20, proxies=proxies).content
        except TimeoutError:
            logging.info(f'下载图片超时:{downloadUrl}')
            return downloadUrl
        except Exception as e:
            logging.info(f'下载图片失败:{downloadUrl} -> 原因:{e}')
            return downloadUrl

        # 新保存路径
        if os.path.isdir(saveImagePath):
            newSaveImagePath = saveImagePath + r'\0.jpg'
        else:
            newSaveImagePath = os.path.splitext(saveImagePath)[0] + '.jpg'
        with open(newSaveImagePath, 'wb') as f:
            f.write(response)

    elif isinstance(downloadUrl, list):
        # 循环下载
        for i in range(len(downloadUrl)):
            downloadUrlParse = urlparse(downloadUrl[i])
            if headers is None:
                headers = {
                    'User-Agent': agent.random,
                    'Referer': f'{downloadUrlParse.scheme}://{downloadUrlParse.netloc}',
                    'Host': downloadUrlParse.netloc,
                }
            if downloadUrl[i] == '':
                continue

            # 下载
            try:
                response = requests.get(downloadUrl[i], headers=headers, timeout=20, proxies=proxies).content
            except TimeoutError:
                logging.info(f'下载图片超时:{downloadUrl[i]}')
                return downloadUrl[i]
            except Exception as e:
                logging.info(f'下载图片失败:{downloadUrl[i]} -> 原因:{e}')
                continue

            # 新保存路径
            if os.path.isdir(saveImagePath):
                newSaveImagePath = saveImagePath + fr'\{i}.jpg'
            else:
                newSaveImagePath = os.path.splitext(saveImagePath)[0] + '.jpg'
            with open(newSaveImagePath, 'wb') as f:
                f.write(response)

    else:
        logging.info('无法下载')
        return '无法下载'

    return True


if __name__ == '__main__':
    Download_Image(
        downloadUrl=['https://lmg.jj20.com/up/allimg/1114/121R0120545/20121Q20545-9-1200.jpg', 'https://lmg.jj20.com/up/allimg/1114/121R0120545/20121Q20545-10-1200.jpg'],
        saveImagePath=r'C:\Users\Adminitrator\Desktop'
    )

posted @ 2022-07-13 14:47  NightSkyKnight  阅读(2676)  评论(0)    收藏  举报