淘宝比价

#抓取淘宝数据
import re
import requests
from bs4 import BeautifulSoup
import  string
import os
import sqlite3
class Getdata:
    def getHTMLText(url,header):
        try:
            r = requests.get(url,headers=header)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return ""
  
    def parsePage(ilt,html):
        try:
            plt = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
            tlt = re.findall(r'\"raw_title\"\:\".*?\"',html)
            for i in range(len(plt)):
                price = eval(plt[i].split(":")[1])
                title = eval(tlt[i].split(":")[1])
                ilt.append([price,title])
        except:
            print("爬取失败")

    def GetCount(html):
        total=re.findall('"totalPage"\:\d+',html)
        for i in range(len(total)):
            totalPage = eval(total[i].split(":")[1])
        return totalPage

    def printGoodsList(ilt):
        tplt = "{:4}\t{:8}\t{:16}"
        print(tplt.format("序号","商品价格","商品名称"))
        count = 0
        for g in ilt:
            count = count + 1
            print(tplt.format(count,g[0],g[1]))

class DatabaseMannege:
    #创建数据表
    def CreateDataBase():
        try:
            db =sqlite3.connect("taobao.db")
        except:
            print("创建数据库失败")
        try:
            db.execute('create table GoodMsg(id varchar(10),price varchar(10),name varchar(40))')
        except:
            print("创建表失败或表已经存在")
    ##将数据写入数据库
    def InsertDatabase(data):
        db =sqlite3.connect("taobao.db")
        cur = db.cursor()
        for property in data:
            try:
                print("开始插入")
                sql_insert = ("insert into GoodMsg(price,name)values('{}','{}')").format(property[0],property[1])

                cur.execute(sql_insert)
                db.commit()
                print("插入成功")
            except :
                print('插入失败')
class Main:
    def main():
        print("请输入查询商品")
        goods = input()
        infoList = []
        start_url = "https://s.taobao.com/search?q=" + goods
        header = {"cookie":"thw=cn; cna=ktJ/FI8k0gQCAbaLv4XUGVvh; tg=0; enc=%2FDi9xgv2fnznKtXV88N9fUTdV6UcRLyw3G6h3pjdwcpbHwkSTh%2FO1B1zsb29cDTL5N8TU0t4TdkRNxzvKIn4Ig%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; tracknick=1052071694www; t=0a525deca2dff81647d91643519e7e37; UM_distinctid=16b9bd49a2a5ef-031997ebe67ce2-37c143e-144000-16b9bd49a2b92e; miid=1364685100501550517; _cc_=W5iHLLyFfA%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; _m_h5_tk=98af7fdaf32be92fe72127eda6e0044e_1571041861081; _m_h5_tk_enc=ca1bdc50118e6ce4e5fd587ccc946e6c; mt=ci%3D-1_0; v=0; cookie2=1aac9317cb43d8f5dfab37bd0222fcf9; _tb_token_=578e3e4e7eedb; JSESSIONID=021AC0B7547DE41EE0944D2ECB89C106; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; l=dBjS2MZrqT2zAZFsBOCgSZ1_aY79jIRAguWbYNq9i_5BK6L_qNbOkg25WFp6cjWfthYB4NSLztv9-etkiKy06Pt-g3fPNxDc.; isg=BHR0oiuylQB4VAH5skFM2Q9IRTLsTpjNHWdJ-w7VA_-CeRTDNlnkx4w7_fEEgdCP",
                    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
        
        html = Getdata.getHTMLText(start_url,header)
        depth = Getdata.GetCount(html)
        DatabaseMannege.CreateDataBase()
        for i in range(depth):
            try:
                url = start_url + "&s=" + str(44 * i)
                html = Getdata.getHTMLText(url,header)
                Getdata.parsePage(infoList,html)
                Getdata.printGoodsList(infoList)
                DatabaseMannege.InsertDatabase(infoList)
            except:
                continue
Main.main()

 

posted @ 2020-05-18 16:27  王者2  阅读(344)  评论(0编辑  收藏  举报