多线程池——爬取新发地300页菜价

#线程池一次性开辟一些线程,用户给线程提交任务,线程任务的调用交给线程池来完成
#
# from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor  #线程池和进程池
#
# def fn(name):
#     for i in range(1000):
#         print(name,i)
#
# if __name__ == '__main__':
#     #创建线程池
#     with ThreadPoolExecutor(50) as t:
#         for i in range(100):
#             t.submit(fn,name=f"线程{i}")
#      #等待线程名完毕,才继续执行
#     print("over")

#思路
#1,如何提取单个页面的数据
#2.上线程池,多个页面同时抓取
import csv
import requests
from concurrent.futures import ThreadPoolExecutor
f=open("菜价.csv",mode="w",encoding="utf-8",newline='')
csvwriter=csv.writer(f)
def down_one_page(url):
    resp=requests.get(url)
    for i in range(20):
        list=[]
        prodName=resp.json()["list"][i]['prodName']
        avgPrice=resp.json()["list"][i]['avgPrice']
        place=resp.json()["list"][i]['place']
        list.append(prodName)
        list.append(avgPrice)
        list.append(place)
        print(list)
        csvwriter.writerow(list)




if __name__ == '__main__':
    #线程池 down_one_page("http://www.xinfadi.com.cn/getPriceData.html?current=1")
    with ThreadPoolExecutor(50) as t:
        for i in range(200): #爬取200页数据
            t.submit(down_one_page,f"http://www.xinfadi.com.cn/getPriceData.html?current={i}")
    print("下载完毕")

多线程爬取新发地300页菜价,速度得到很大的提升

posted @ 2022-05-06 18:35  凋零_(  阅读(136)  评论(0编辑  收藏  举报