淘宝产品抓取实战
#!coding=utf-8 import requests import re import time import json from requests.packages.urllib3.exceptions import InsecureRequestWarning import pandas as pd requests.packages.urllib3.disable_warnings(InsecureRequestWarning) ###禁止提醒SSL警告 class tb(object):####手机端 def __init__(self,path,seach): ###保存数据路径 self.path = path ###保存数据路径 self.seach= seach ##搜索词 self.s = requests.session() headers = { 'Host':'s.m.taobao.com', 'Accept-Encoding':'br, gzip, deflate', 'Connection':'keep-alive', 'Accept':'application/json', 'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/10.6b8836 Mobile/16A366 Safari/605.1.15', 'Accept-Language':'zh-cn', 'X-Requested-With':'XMLHttpRequest', } self.s.headers.update(headers) ##插入头信息 def seachdata(self): for i in range(0,100): time.sleep(1.25) url='https://s.m.taobao.com/search?event_submit_do_new_search_auction=1&_input_charset=utf-8&topSearch=1&atype=b&searchfrom=1&action=home%3Aredirect_app_action&from=1&q={}&sst=1&n=20&buying=buyitnow&m=api4h5&abtest=18&wlsort=18&style=list&closeModues=nav%2Cselecthot%2Conesearch&page={}'.format(self.seach,i) ##爬取的网址 print(i) req = self.s.get(url=url, verify=False).text #爬取页面结果 try: js=json.loads(req) print(js) except: print('err') listItem=js['listItem'] title=[] ##名称 sold=[] ##月销量 commentCount=[] ##评论量 item_id=[] ##商品ID userId=[] ##商家ID nick=[] ##商家名称 location=[] ##商家地址 pic_path=[] ##图片 itemNumId=[] ##商品NID originalPrice=[] ##原价 price=[] ##售价 category=[] ##类别ID itemurl=[] ##商品链接 if listItem==[]: break for j in listItem: ##数据提取 title.append(j['title']) sold.append(j['sold']) try: commentCount.append(j['commentCount']) except: commentCount.append('') item_id.append(j['item_id']) userId.append(j['userId']) nick.append(j['nick']) location.append(j['location']) pic_path.append(j['pic_path']) itemNumId.append(j['itemNumId']) originalPrice.append(j['originalPrice']) price.append(j['price']) try: category.append(j['category']) except: category.append('') itemurl.append(j['url']) data={ 'title_名称':title, 'sold_月销量': sold, 'commentCount_评论量': commentCount, 'item_id_商品ID': item_id, 'userId_商家ID': userId, 'nick_商家名称': nick, 'location_商家地址': location, 'pic_path_图片': pic_path, 'itemNumId_商品NID': itemNumId, 'originalPrice_原价': originalPrice, 'price_售价': price, 'category_类别ID': category, 'itemurl_商品链接': itemurl, } df=pd.DataFrame(data) if i==0: df.to_csv(self.path+r'\out.csv', index=False, header=1, encoding="GB18030") else: df.to_csv(self.path+r'\out.csv', index=False, header=0, mode='a', encoding="GB18030")###保存文件 if __name__ == '__main__': t=tb(r'E:\taobao','手机') t.seachdata()