Python线程池下载txt
下次试试用scrapy模块
# coding=utf-8
import requests
import threadpool
def download_file(i):
'''下载文件'''
download_url = f"https://www.txt2016.com/e/DownSys/xiazai/?classid=1&pathid=0&id={i}"
file = requests.get(download_url)
try:
filename = file.headers['Content-Disposition'][21:][:-19].encode('iso8859-1').decode('utf-8')
filename = filename.replace('/','-') + '.txt'
# print(filename)
with open(f'./txt/{filename}', 'wb') as txt_file:
txt_file.write(file.content)
print(i, ': ', filename)
except KeyError:
print(file.headers)
print(file.status_code)
print("网址不存在")
if __name__ == "__main__":
urls = [i for i in range(2275, 113642)] # urls里任务很多, 比如几百个, 让pool自己去调度
pool = threadpool.ThreadPool(8) # 线程池设置,最多同时跑8个线程
tasks = threadpool.makeRequests(download_file, urls)
# makeRequests构造线程task请求, 第一个参数是线程函数, 第二个是参数数组
[pool.putRequest(task) for task in tasks]
# 列表推导式, putRequest向线程池里加task, 让pool自己去调度task
pool.wait() # 等所有任务结束