天猫淘宝评论数据抓取
import requests import re,json import pandas class base(): def __init__(self,url): self.url = url def all_url(self): return [self.url + "%s" % i for i in range(1,100)] def loads_jsonp(self,_jsonp): try: return json.loads(re.match(".*?({.*}).*",_jsonp,re.S).group(1)) except: raise ValueError('Invalid Input') def url_req(self,url): content = requests.get(url).text aa = self.loads_jsonp(content) return aa def taobao_comment(self,data): for i in data['comments']: data = {} data['昵称']=i['user']['nick'] data['评论']=i['content'] info_list.append(data) def tianmao_comment(self,data): for i in data['rateList']: data = {} data['昵称']=i['displayUserNick'] data['评论']=i['rateContent'] info_list.append(data) def comment(self,url): data = self.url_req(url) self.tianmao_comment(data) if 'tmall' in url else self.taobao_comment(data) def main(url): data = base(url) for i in data.all_url(): data.comment(i) print(len(info_list)) if __name__ == "__main__": url = 'https://rate.tmall.com/list_detail_rate.htm?itemId=39258348512&spuId=250685252&sellerId=2106913388&order=3¤tPage=' info_list = [] main(url) df =pandas.DataFrame(info_list) df.to_excel('comments.xlsx',index=False)