Python线程池下载txt

下次试试用scrapy模块

 

# coding=utf-8
import requests
import threadpool

def download_file(i):
    '''下载文件'''
    download_url = f"https://www.txt2016.com/e/DownSys/xiazai/?classid=1&pathid=0&id={i}"
    file = requests.get(download_url)

    try:
        filename = file.headers['Content-Disposition'][21:][:-19].encode('iso8859-1').decode('utf-8') 
        filename = filename.replace('/','-') + '.txt'
        # print(filename)
        with open(f'./txt/{filename}', 'wb') as txt_file:
            txt_file.write(file.content)
        print(i, ': ', filename)
    except KeyError:
        print(file.headers)
        print(file.status_code)
        print("网址不存在")


if __name__ == "__main__":
    urls = [i for i in range(2275, 113642)]  # urls里任务很多, 比如几百个, 让pool自己去调度
    pool = threadpool.ThreadPool(8) # 线程池设置,最多同时跑8个线程
    tasks = threadpool.makeRequests(download_file, urls)
    # makeRequests构造线程task请求, 第一个参数是线程函数, 第二个是参数数组
    [pool.putRequest(task) for task in tasks]
    # 列表推导式, putRequest向线程池里加task, 让pool自己去调度task
    pool.wait() # 等所有任务结束

代码参考:https://blog.csdn.net/cymy001/article/details/78218024

posted @ 2019-01-29 19:35  海牙2018  阅读(381)  评论(0编辑  收藏  举报