线程池/进程池
线程池
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor def func(name): for i in range(1000): print(name, i) if __name__ == '__main__': with ThreadPoolExecutor(50) as f: # 创建一个有50个线程的线程池 for i in range(100): # 分配100个任务 f.submit(func, name=f"线程{i}") #等待线程池中的任务全部执行完毕,才执行(守护) print("over!!!")
线程池爬取新发地菜价
import requests from concurrent.futures import ThreadPoolExecutor import csv # 创建csv文件的表头 with open("vg_price.csv", "w", encoding="utf-8") as f: content = csv.writer(f) content.writerow(["品名", "最低价", "最高价", "平均价", "发布日期"]) def download_onepage(url,number): # 请求头带的参数 params = { "limit": 20, "current": number, "pubDateStartTime": "", "pubDateEndTime": "", "prodPcatid": "", "prodCatid": "", "prodName":"", } resp = requests.get(url,params = params) #需要请求的url dic = resp.json() # print(dic) vegetables_list = dic["list"] #自己创建一个储存需求数据的新字典 new_dic = { "prodName": "", "lowPrice": "", "highPrice": "", "avgPrice": "", "pubDate": "", } for vegetable in vegetables_list: # print(vegetable["prodName"],vegetable["lowPrice"],vegetable["highPrice"],vegetable["avgPrice"],vegetable["pubDate"]) new_dic["prodName"]=vegetable["prodName"] new_dic["lowPrice"]=vegetable["lowPrice"] new_dic["highPrice"]=vegetable["highPrice"] new_dic["avgPrice"]=vegetable["avgPrice"] new_dic["pubDate"]=vegetable["pubDate"] # 创建csv文件,把爬取的数据写入到文件中去 with open("vg_price.csv", "a", encoding="utf-8") as f: content = csv.writer(f) content.writerow(new_dic.values()) # print(new_dic) if __name__ == '__main__': with ThreadPoolExecutor(50) as f: #创建一个有50个线程的线程池 url = "http://www.xinfadi.com.cn/getPriceData.html" for i in range(1,200): #创建200个任务 f.submit(download_onepage, url, i) #提交需要操作的函数,url爬取的链接地址,i代表当前爬取的页数 print("over!!!")