Python爬取京东商品用户的评价

一、爬取京东商品手机的用户评价,包括评价、颜色、手机型号并存入数据库(MySQL)

二、数据库表结构

 

 三、代码

import requests
import time
import json
from pymysql import *

def mes(productId,page):
    headers = headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
    url='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId={}&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1'.format(productId,page)
    resp=requests.get(url,headers=headers)
    #print(resp.text)
    res=resp.text.replace('fetchJSON_comment98(','')
    res=res.replace(');','')
    #print(res)
    json_data=json.loads(res)
    #print(json_data)
    return json_data

def getPage(productId):
    data=mes(productId,0)
    return data['maxPage']


def insert(db,list):
    sql = "INSERT INTO pingjia values (0,%s,%s,%s)"
    cursor = db.cursor()
    tuples=tuple(list)
    print(tuples)
    try:
        cursor.executemany(sql,tuples)
        print("插入成功")
        db.commit()
        cursor.close()
    except Exception as e:
        print(e)
        db.rollback()
    pass


def getData(productId):
    #maxpage=1
    maxpage=getPage(productId)
    print(maxpage)
    list=[]
    for page in range(0,maxpage+1):
        mess=mes(productId,page)
        mesList=mess['comments']#评论列表
        for item in mesList:
            content=item['content']
            productColor=item['productColor']
            referenceName=item['referenceName']
            list.append((content,productColor,referenceName))
    db = connectDB()
    #print(list)
    insert(db,list)

# 连接数据库的方法
def connectDB():
    try:
        db = connect(host='localhost', port=3306, user='root', password='123456', db='mmm')
        print("数据库连接成功")
        return db
    except Exception as e:
        print(e)
    return NULL

if __name__ == '__main__':
    #print(getPage(100013205938))
    getData(100011924558)
    #mes(100011924558,0)
评价.py

四、结果

 

posted @ 2020-08-07 13:09  MoooJL  阅读(848)  评论(0编辑  收藏  举报