python3 多进程下载指定列表

本代码实现多进程下载指定下载列表的功能。

注意事项有：

1、下载过程中，显示总数、已存在、已下载、出错、剩余等信息，以便随时掌握进度。

2、可以指定重试次数（在程序中指定）

3、进程数、下载列表由命令行参数指定

4、保存位置需要在程序中指定

# -*- coding: utf-8 -*-
"""
Created on Sat Nov 16 07:52:40 2019

@author: mi
"""
import requests
import os
import csv

exist_count=0#已存在
downloaded_count=0#已下载
total_count=0#总数
error_count=0#出错

def downloading_over(arg):
    global downloaded_count
    global total_count
    global exist_count
    global error_count
    print("返回状态:",arg)
    if arg=='EXISTS':
        exist_count+=1
    if arg=='SUCCESS':
        downloaded_count+=1
    if arg=='ERROR':
        error_count+=1
    print('总数：%s / 已存在：%s / 已下载：%s / 出错：%s / 剩余：%s' % (str(total_count),str(exist_count),str(downloaded_count),str(error_count),str(total_count-exist_count-downloaded_count-error_count)))


def get_page(link):
    url=link[0]
    savePath=link[1]
    print(savePath)
    if os.path.exists(savePath):
        print('已存在')
        return 'EXISTS'
    times=3
    while (times>0):
        times=times-1
        try:
            resp=requests.get(url,timeout=30)
        except requests.RequestException as e:
            print(e)
            continue
        if not os.path.exists(os.path.dirname(savePath)):
            os.makedirs(os.path.dirname(savePath))

        with open(savePath,'wb')as fw:
            fw.write(resp.content)
        return 'SUCCESS'
    else:
        return 'ERROR'

from multiprocessing import Pool
import sys


if __name__ == "__main__":
    __spec__ = "ModuleSpec(name='builtins', loader=<class '_frozen_importlib.BuiltinImporter'>)"
    process_num=sys.argv[1]
    print('进程数量：'+process_num)
    download_list=sys.argv[2]
    print('下载列表：'+download_list)
    
    pool = Pool(processes=int(process_num))    # set the processes max number
    with open(download_list,'r',encoding='utf-8') as downlist:
        lines=csv.reader(downlist)
        #下载列表，第一列为下载链接，第二列为保存位置
        for line in lines:
            total_count+=1
            link=[]
            url='http://www.xxx.com/'+line[0]
            link.append(url)
            savePath='D:/saveFolder/'+line[1]+'.htm'
            link.append(savePath)
            pool.apply_async(func=get_page, args=(link,),callback=downloading_over)
    pool.close()
    pool.join()

posted @ 2019-11-16 08:03 重积德阅读(823) 评论(0) 编辑收藏举报

刷新页面返回顶部

重积德

python3 多进程下载指定列表

公告