python 多线程,多进程,高效爬虫
1.多线程
from concurrent.futures import ThreadPoolExecutor import requests def fetch_async(url): response = requests.get(url) return response url_list = ['http://www.github.com', 'http://www.bing.com'] pool = ThreadPoolExecutor(5) # 线程池 for url in url_list: # 迭代器 pool.submit(fetch_async, url) pool.shutdown(wait=True)
2.多进程
from concurrent.futures import ProcessPoolExecutor
import requests
def fetch_async(url):
response = requests.get(url)
return response
url_list = ['http://www.github.com', 'http://www.bing.com'] # 迭代对象
pool = ProcessPoolExecutor(5) # 进程池
for url in url_list: # 迭代器
pool.submit(fetch_async, url)
pool.shutdown(wait=True)