携程爬虫

import json

import requests

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
}
url = 'https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList?_fxpcqlniredt=09031119316899837475&x-traceID=09031119316899837475-1652268879017-3695646'
for page in range(1, 3):
    post_data = json.dumps(
        {"arg": {"channelType": 2, "collapseType": 0, "commentTagId": 0, "pageIndex": page, "pageSize": 10,
                 "poiId": 10559342, "sourceType": 1, "sortType": 3, "starType": 0},
         "head": {"cid": "09031119316899837475", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888",
                  "syscode": "09", "auth": "", "xsid": "", "extension": []}})
    resp = requests.post(url, data=post_data, headers=headers).json()
    comment_list = resp['result']['items']
    for comment in comment_list:
        userNick = comment['userInfo']['userNick']
        content = comment['content']
        publishTime = comment['publishTime']
        print(userNick, content, publishTime)

posted @ 2022-05-11 19:58  biglayman  阅读(299)  评论(0编辑  收藏  举报