获取淘宝特定商品信息

获取淘宝特定商品信息

import re
import requests
from bs4 import BeautifulSoup
import numpy as np
import  bs4
def getHTMLText(url):
    headers = {
        'User-Agent': 'Chorme'}
    try:  # 请求爬虫框架
        coo = "这里输入你们自己的cookie"
        cookies = {}
        for line in coo.split(';'):  # 浏览器伪装
            name, value = line.strip().split('=', 1)
            cookies[name] = value
        r = requests.get(url, cookies=cookies, headers=headers, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding


        return r.text
    except:
        return ""


def parasePage(ilt,html):
    try:

        plt=re.findall(r'\"view_price\":\"[\d|.]*\"',html)
        tlt=re.findall(r'\"raw_title\":\".*?\"',html)


        soup=BeautifulSoup(html,'html.parser')

        #for link in soup.find_all('a'):
         #   xx=link.get('href')

        for i in range(len(plt)):
            price=eval(plt[i].split(':')[1])#把得到的数最外层的双引号单引号去掉
            title=eval(tlt[i].split(':')[1])
            ilt.append([price,title])
    except:
        print("2")

def printGoodList(ilt):
    try:
        tplt = "{:4}\t{:8}\t{:16}"
        print(tplt.format("序号", "价格", "商品名称"))
        count = 0
        a=[]

        for g in ilt:
            count = count + 1


            print(tplt.format(count, g[0], g[1]))

    except:
        print('3')

def main():
    goods =input('请输入你想要查询的商品:\n');

    depth = 2
    #start_url = "https://s.taobao.com/search?q=%E4%B9%A6%E5%8C%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=3&ntoffset=3&p4ppushleft=1%2C48&s=0" + goods  # start_url通过将淘宝搜索页面的代码与变量goods的整合实现对商品的检索
    start_url="https://s.taobao.com/search?q="+goods
    infoList = []
    for i in range(depth):  # 单独对每一个url链接进行单独处理
        try:
            url = start_url + "&s=" + str(44 * i)  # 44是淘宝每个页面呈现的宝贝数量
            html = getHTMLText(url)  # 获得输入的url的网页
            parasePage(infoList, html)
        except:
            continue
    printGoodList(infoList)
    input()

main()




使用方式:

  1. 运行代码
  2. 输入想要查询的商品信息
  3. 显示出所有相关商品信息
posted @ 2019-11-02 22:52  X_J  阅读(114)  评论(0编辑  收藏  举报