MOOC淘宝商品比价定向爬虫实例

import requests
import re

def getHTMLText(url):
     try:
         r = requests.get(url,timeout = 30)
         r.raise_for_status()
         r.encoding = r.apparent_encoding
         return r.text
     except:
         return ""

def  parsePage(ilt,html):
    try:
        plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
        tlt = re.findall(r'\"raw_title\"\:\".*?\"',html)
        for i in range(len(plt)):
            price = eval(plt[i].split(':')[1])
            title = eval(tlt[i].split(':')[1])
            ilt.append([price,title])
    except:
            print("")

def printGoodList(ilt):
    tplt = "{:4}\t{:8}\t{:16}"
    print(tplt.format('序号','价格','商品名称'))
    count = 0        #count用于计数,即表示序号
    for g in ilt:
        count = count+1
        print(tplt.format(count,g[0],g[1]))

def main():
    goods = '水杯'
    depth =2
    start_url = 'https://s.taobao.com/search?q=' + goods
    infoList=[]
    for i in range(depth):
        try:
            url = start_url + '&s=' + str(44*i)
            html = getHTMLText(url)
            parsePage(infoList,html)
        except:
            contine
        printGoodList(infoList)

main()

 

posted @ 2017-09-12 20:39  松花酿酒春水煎茶  阅读(163)  评论(0编辑  收藏  举报