python爬取淘宝商品信息

代码

import requests     #导入第三方库
from bs4 import BeautifulSoup as bs
import re
import os
import random
import json
import openpyxl
import time


headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
            'cookie':'这里放上自己的cookie'
           }



# 用来存放商品信息的列表
auctions = []

# 爬取的最大页面数
maxPage = 10

for i in range(maxPage):
    params = {
    'q': '鼠标',
    's': str(i*44),
    }
    response = requests.get('https://s.taobao.com/search', params=params,  headers=headers)
    # 拿到html的文本
    content = response.text
    # 拿到g_page_config的json字符串
    g_page_config = re.findall(r'g_page_config = ({.*})', content)[0]
    # 转化成字典dict
    g_page_config = json.loads(g_page_config)
    # 拿到auctions,商品信息的描述列表
    auctions.extend(g_page_config['mods']['itemlist']['data']['auctions'])
  

    print(len(auctions))
    
    time.sleep(4)
    
    
    
    
workbook = openpyxl.Workbook()

sheet = workbook.active


# 表头
sheet.cell(1,1).value = '商品名称'
sheet.cell(1,2).value = '价格'
sheet.cell(1,3).value = '销量'
sheet.cell(1,4).value = '运费'
sheet.cell(1,5).value = '发货地点'
sheet.cell(1,6).value = '店铺名'
sheet.cell(1,7).value = '图片url'

#写入数据

for i in range(len(auctions)):
    print(i)
    sheet.cell(i+2, 1).value = auctions[i]['raw_title']
    sheet.cell(i+2, 2).value = auctions[i]['view_price']
    sheet.cell(i+2, 3).value = auctions[i]['view_sales']
    sheet.cell(i+2, 4).value = auctions[i]['view_fee']
    sheet.cell(i+2, 5).value = auctions[i]['item_loc']
    sheet.cell(i+2, 6).value = auctions[i]['nick']
    sheet.cell(i+2, 7).value = auctions[i]['pic_url']



workbook.save('taobao.xlsx')
    

结果

posted @ 2022-08-03 21:11  裏表異体  阅读(585)  评论(0编辑  收藏  举报