import csv
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import requests
# 存储文件
f = open('data.csv', mode='w', encoding='utf-8', newline='') # newline = ''
# 备注:如果没有指定 newline=‘’,则嵌入引号中的换行符将无法正确解析,并且在写入时,使用 \r\n 换行的平台会有多余的 \r 写入。
csvwriter = csv.writer(f)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41'
}
url = 'http://www.xinfadi.com.cn/getPriceData.html'
def download_one_page(current):
params = {
'limit': '20', # 每页多少
'current': current, # 哪一页
'pubDateStartTime': '',
'pubDateEndTime': '',
'prodPcatid': '',
'prodCatid': '',
'prodName': '',
}
page_text = requests.post(url=url, headers=headers, params=params).json()['list']
# print(page_text[0])
for limit in range(0, 19):
limit_list = []
limit_list.append(page_text[limit]["prodName"]) # 品名
limit_list.append(page_text[limit]["lowPrice"]) # 最低价
limit_list.append(page_text[limit]["avgPrice"]) # 平均价
limit_list.append(page_text[limit]["highPrice"]) # 最高价
limit_list.append(page_text[limit]["specInfo"]) # 规格
limit_list.append(page_text[limit]["place"]) # 产地
limit_list.append(page_text[limit]["unitInfo"]) # 单位
limit_list.append(page_text[limit]["pubDate"]) # 发布日期
# prodName = page_text[limit]["prodName"]
# print(prodName)
# break
# 把数据储存到文件中
csvwriter.writerow(limit_list)
# f.close()
if __name__ == '__main__':
print('start:', time.asctime())
# for i in range(1, 20): # 效率低下
# download_one_page(i)
with ThreadPoolExecutor(50) as t:
for i in range(1, 20):
t.submit(download_one_page,i)
print('end:', time.asctime())