淘宝商品信息爬取,实现词云,并进行饼状图绘制及效果图
对应json文件地址:https://blog.csdn.net/nicholas_K/article/details/86094530
1. 获取了淘宝手机商品的评论和追评
2. 对淘宝评论进行了词云
3. 实现了淘宝手机商品版本的饼状图绘制
import json
import time
import pygal
import pymysql.cursors
from wordcloud import WordCloud
# 打开对应淘宝json文件。
def get_comments():
# 这里打开的是上面的对应json文件,文件名要正确
with open('tb_comments_1.json', encoding='utf-8') as tb:
comments_dict = json.load(tb)
# 这是找到追评里的内容
comments = comments_dict['rateDetail']
comments2 = comments['rateList']
result_list = []
for comment in comments2:
# 判断是不是追评
if comment['appendComment']:
comment['appendComment'] = comment['appendComment']["content"]
# 添加到列表
result_list.append({
'id': comment['id'],
'content': comment['appendComment'],
'rateContent': comment['rateContent'],
'auctionSku': comment['auctionSku'],
'rateDate': comment['rateDate']
})
return result_list
# 连接数据库
def save_db(comments):
connection = pymysql.connect(host='127.0.0.1',
port=3306,
user='root',
password='zhangkai',
db='tb',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
connection.ping(reconnect=True)
except:
connection()
for comment in comments: # 循环评论
cursor = connection.cursor() # 创建游标
# 先判断一下是否已存储过
sql1 = "select id from tb.taobao where taobao_id=%s " % (comment['id'])
cursor.execute(sql1)
rs_set = cursor.fetchone() # 有值返回{'id':23} 无值返回None
if rs_set:
print('这条评论已存在在数据库中')
continue
sql = """INSERT INTO tb.taobao VALUES (%s, %s ,%s ,%s ,%s, %s)"""
for n in comments:
cursor.execute(sql, args=(None, n["id"], n["rateContent"], n["auctionSku"], n["rateDate"], n["content"]))
connection.commit()
time.sleep(1.2)
print('添加成功')
sql2 = """select rate, content from tb.taobao"""
cursor.execute(sql2)
rs_set = cursor.fetchall()
# 查询手机版本信息
sql3 = """SELECT COUNT(*)as num,auctionint FROM tb.taobao group by auctionint"""
cursor.execute(sql3)
rs_sets = cursor.fetchall()
return rs_set, rs_sets
# 把所有评论转成一个大字符串
def jieba_db(comments):
jieba_list = comments
tb_str = ''
for i in jieba_list:
tb_str += i['rate']
return tb_str
# 生成词云
def word_cloud(string):
# font是字体
font = 'msyhl.ttc'
wordcloud = WordCloud(font_path=font,
background_color="white",
width=1000,
height=860,
max_font_size=30,
min_font_size=10,
margin=2).generate(string)
wordcloud.to_file('淘宝词云.png')
return None
# 生成饼状图
def pygals(comments):
x = 0
for i in comments:
x = x + i['num']
pie_chart = pygal.Pie()
pie_chart.title = '购买手机颜色比例(in % )'
for i in comments:
pie_chart.add(i['auctionint'], i['num']/x*100)
pie_chart.render_to_file('淘宝.svg')
# svg文件用浏览器打开
print('绘图成功')
if __name__ == '__main__':
comment = get_comments()
save, banben = save_db(comments=comment)
taobao_jieba = jieba_db(comments=save)
ciyun = word_cloud(string=taobao_jieba)
print(ciyun)
词云图片效果如下
饼状图效果如下