Python requests.Session 协程 下载文件
Python requests.Session 协程 下载文件
# coding: utf-8 from gevent import monkey monkey.patch_all() from gevent.pool import Pool import gevent import requests import os, sys import time import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class Downloader: def __init__(self, pool_size, retry=3): self.pool = Pool(pool_size) self.session = self._get_http_session(pool_size, pool_size, retry) self.retry = retry # 重试次数 self.dir = '' self.failed = [] self.url_total = 0 self.completed_count = 0 def _get_http_session(self, pool_connections, pool_maxsize, max_retries): session = requests.Session() adapter = requests.adapters.HTTPAdapter(pool_connections=pool_connections, pool_maxsize=pool_maxsize, max_retries=max_retries) session.mount('http://', adapter) session.mount('https://', adapter) return session def run(self, url_list, dir=''): self.dir = dir if self.dir and not os.path.isdir(self.dir): os.makedirs(self.dir) self.url_total = len(url_list) self.completed_count = 0 print ('total ts count:', self.url_total) g1 = gevent.spawn(self._check_finish) self._download(url_list) g1.join() def _download(self, url_list): self.pool.map(self._worker, url_list) if self.failed: url_list = self.failed self.failed = [] self._download(url_list) def _worker(self, url): retry = self.retry while retry: try: file_name = url.split('/')[-1].split('?')[0] file_path = os.path.join(self.dir, file_name) if not os.path.exists(file_path): r = self.session.get(url, timeout=20, verify=False) if r.ok: print ('download:', file_name) with open(file_path, 'wb') as f: f.write(r.content) else: # print('fail:', file_name) raise RuntimeError('download fail') else: print('exist:', file_name) return except: retry -= 1 print ('[FAIL]%s' % url) self.failed.append(url) self.completed_count += 1 def _check_finish(self): while self.completed_count >= self.url_total: time.sleep(0.01) if __name__ == '__main__': downloader = Downloader(50) # 协程个数 url_list = ['https://pics1.baidu.com/feed/b999a9014c086e0610f3d6bf8bf4d6ff08d1cbf7.jpeg', 'https://pics7.baidu.com/feed/d53f8794a4c27d1ef06a7b6195290065dfc438ca.jpeg'] downloader.run(url_list, './dst_dir')
分类:
Python常用方法
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
2018-02-06 Docker 常用命令