1 import time
 2 from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
 3 
 4 
 5 class ThreadPoolSpider:
 6     executor = ThreadPoolExecutor(max_workers=8)
 7     # executor = ProcessPoolExecutor(max_workers=8)
 8 
 9     def __init__(self):
10         pass
11 
12     def http_request(self, url, second):
13         time.sleep(second)
14         return url, second
15 
16     def crawl(self):
17         all_tasks = []
18         for index in range(100):
19             task = self.executor.submit(self.http_request, index, 10)
20             all_tasks.append(task)
21 
22         for result in as_completed(all_tasks):
23             data = result.result()
24             print(data)
25 
26 
27 if __name__ == '__main__':
28     spider = ThreadPoolSpider()
29     spider.crawl()
ThreadPoolExecutor:线程池
as_completed()方法是一个生成器,在没有任务完成的时候,会阻塞,在有某个任务完成的时候,会yield这个任务,就能执行for循环下面的语句,然后继续阻塞住,循环到所有的任务结束

posted on 2022-01-06 13:12  小和尚不吃素  阅读(33)  评论(0编辑  收藏  举报