66拼dd评论采集
# 需要更改的地方 cookie 、 accesstoken、 与之 对应的 用户id、
# coding=gbk
# -*- coding:uft-8 -*-
# @Time: 2023/2/4
# @Author: 十架bgm
# @FileName: pd
import datetime
import json
import pandas as pd
import re
import time
import threading
import requests
import os
import sys
import io
os.environ['NO_PROXY'] = 'https://mobile.pinduoduo.com/proxy/api/reviews/426046631761/list'
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8') #改变标准输出的默认编码
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030') # 改变标准输出的默认编码
# 评论
def pl(goods_id):
lis = []
# url = 'https://mobile.pinduoduo.com/proxy/api/reviews/426046631761/list' # 426046631761 是goods_id
try:
url = f'https://mobile.pinduoduo.com/proxy/api/reviews/{goods_id}/list'
for i in range(1, 10000): # 起始评论1
params = {
'pdduid': '5735401831', # 不变的
# 'page': '1',
'page': str(i),
# 'size': '10',
'size': '20', # 最大只能跑20
'enable_video': '1',
'enable_group_review': '1',
'label_id': '0'
}
headers = {
'cookie': 'api_uid=CkmYfmPeZz+CAABuDOyCAg==; _nano_fp=XpE8npdalpCol0XyXo_NdxAuhKD77v5NckRC8MOK; webp=1; jrpl=wylxmv1fCBzsXnZ8RSiLfmTZDX037iLG; dilx=qHF8iHPRf6m5_hLzu5j3M; njrpl=wylxmv1fCBzsXnZ8RSiLfmTZDX037iLG; PDDAccessToken=KRWIP56MWHTQVXFSCZBSIY5AZ6U6SW5SZDC3KLVJ7KWEOIW4ZYNQ1116791; pdd_user_id=5735401831; pdd_user_uin=BIVTYNEACNDUJMXJRP6QBXZ7HQ_GEXDA; pdd_vds=gaSUgVJUpXKRzAgRFRgKJgSRHHJRJjJRSgVKHXHAzWXXFVpMMzWKkJWKZzJH',
'referer': 'https://mobile.pinduoduo.com/goods_comments.html?goods_id=48187200265&_oc_trace_mark=199&_oc_adinfo=eyJzY2VuZV9pZCI6Mn0%3D&_oak_gallery=https%3A%2F%2Fimg.pddpic.com%2Fmms-material-img%2F2020-07-13%2Ff54514a5-03be-4e3c-8e7b-0b0011546dc7.png.a.jpeg&_oc_refer_ad=1&_x_query=%E5%B0%8F%E4%BD%A9solo%E9%A5%AE%E6%B0%B4%E6%9C%BA&refer_page_el_sn=99369&refer_rn=&refer_page_name=goods_detail&refer_page_id=10014_1675563362551_rjp7v0a1g5&refer_page_sn=10014&uin=BIVTYNEACNDUJMXJRP6QBXZ7HQ_GEXDA&mall_id=695078883',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
res = requests.get(url=url, headers=headers, params=params)
# print(res.json())
# print('------------------------')
res2 = res.json()
datas = res2['data']
if len(datas) == 0:
break
print(len(datas))
for data in datas:
name = data["name"]
specs = data["specs"]
specs2 = specs.split(',')[1].split(':')[1][:-3].strip()[1:] # 型号
# specs
# 荒川-----[{"spec_key":"型号","spec_value":"智能饮水机滤芯5片(不兼容1代)"}]------小佩饮水机solo2完美适配
comment = data["comment"]
print(f'{name}-----{specs2}------{comment}')
dic = {
"用户": name,
"型号": specs2,
"评论": comment
}
lis.append(dic)
time.sleep(1)
# return lis
print(f'总计爬取{len(lis)}评论')
# ----------------------------------------------excel
if len(lis) > 0:
save = input("是否保存到本地? 'y/n':")
if save == 'y':
today = str(datetime.datetime.today()).split(' ')[0].replace('-', '_')
pf = pd.DataFrame(lis) # 转列表为DataFrame
path = pd.ExcelWriter(f'{today}商品id{goods_id}.xlsx') # 设置保存路径
pf.to_excel(path, encoding='utf-8', index=False) # 转化为Excel
path.save() # 保存
print(f"保存成功,文件名为:{today}商品id{goods_id}")
else:
print("输入商品id有误!请重新输入!")
except Exception:
print("输入商品id有误!请重新输入!")
# 历史订单
def order_list():
# url = 'https://mobile.pinduoduo.com/proxy/api/api/aristotle/order_list_v4?pdduid=5735401831'
url = 'https://mobile.pinduoduo.com/proxy/api/api/aristotle/order_list_v4'
header = {
'accept': 'application/json, text/plain, */*',
'accesstoken': 'RKIMYUG5HIT7P2EDFMQOMULKX4QNUY3CX5EC6JXRAOJGJCZ56ZBA1116791',
'content-type': 'application/json;charset=UTF-8',
'cookie': 'api_uid=CkmYfmPeZz+CAABuDOyCAg==; _nano_fp=XpE8npdalpCol0XyXo_NdxAuhKD77v5NckRC8MOK; webp=1; jrpl=wylxmv1fCBzsXnZ8RSiLfmTZDX037iLG; dilx=qHF8iHPRf6m5_hLzu5j3M; njrpl=wylxmv1fCBzsXnZ8RSiLfmTZDX037iLG; PDDAccessToken=RKIMYUG5HIT7P2EDFMQOMULKX4QNUY3CX5EC6JXRAOJGJCZ56ZBA1116791; pdd_user_id=5735401831; pdd_user_uin=BIVTYNEACNDUJMXJRP6QBXZ7HQ_GEXDA; rec_list_personal=rec_list_personal_w4ddbf; pdd_vds=gaLLNOnLQmPmbtPOOLmtoQitninonONoOOnaLEmnyItIbabmNLyOGomQoyNO; JSESSIONID=49619BB5F9A6D0F9BAE45FF96CB4F9EF',
'origin': 'https://mobile.pinduoduo.com',
'referer': 'https://mobile.pinduoduo.com/orders.html?type=0&comment_tab=1&combine_orders=1&main_orders=1&refer_page_name=personal&refer_page_id=10001_1675570924330_jic22o28i0&refer_page_sn=10001&order_index=0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'verifyauthtoken': 'ymAWWO1WjmrFFdMV9YA2gw58de46e976b938cd7'
}
data = {
"pdduid": "5735401831", # 用户id
"type": "all",
"page": 1,
"origin_host_name": "mobile.pinduoduo.com",
"page_from": 0,
"size": 10,
# "offset": "MO-02-220916-403250881482407"
"offset": "MO-01-221020-252801259082407"
}
res = requests.post(url=url, headers=header, data=json.dumps(data))
# print(res.json()["orders"])
totals = res.json()["orders"]
# re.findall('"goods_name": "(.*?)"', totals)
for i in totals:
# print(f'mall_name:{i["mall"]["mall_name"]}')
print(i)
def main():
print("1.评论 2.历史订单(暂未完善)")
orde = input("输入查询的命令:")
if orde == '1':
while True:
id = input("输入要查询的商品链接(按q退出):")
if id == 'q':
break
try:
id_r = re.findall(r'goods_id=(.*?)&_oak', id)[0]
print("商品id为:" + str(id_r))
pl(goods_id=id_r)
except Exception:
print("请重新输入链接或去web页面复制商品链接!")
if orde == '2':
order_list()
if __name__ == '__main__':
main()
本文来自博客园,作者:__username,转载请注明原文链接:https://www.cnblogs.com/code3/p/17093290.html