爬取彩票网站数据界面版

完整代码

import requests
from lxml import etree
import xlwt
from pymongo import MongoClient
from tkinter import *

#设置浏览器的请求头,告诉服务器我们是从浏览器来的,作用是阻止被网站反爬
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
    'Accept-Encoding': 'gzip, deflate',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Connection': 'keep-alive'
}
def show_entry_mongo():
    # 创建数据库
    client = MongoClient()
    database = client['Chapter6']
    collection = database['webdata']



    for i in range(1, 21):
        url = "http://kaijiang.zhcw.com/zhcw/html/3d/list_{}.html".format(i)
        #发送请求 得到数据
        response = requests.get(url=url,headers=headers)
        #print(response.text)

        #将数据改成xpath结构
        res_xpath = etree.HTML(response.text)
        trs = res_xpath.xpath('/html/body/table//tr')


        # 将数据写入MongoDB数据库
        for tr in trs[2:-1]:
            data = {
                '开奖日期': tr.xpath("./td[1]/text()")[0],
                '期号': tr.xpath("./td[2]/text()")[0],
                '中奖号码1': tr.xpath("./td[3]/em[1]/text()")[0],
                '中奖号码2': tr.xpath("./td[3]/em[2]/text()")[0],
                '中奖号码3': tr.xpath("./td[3]/em[3]/text()")[0],
                '销售额(元)': tr.xpath("./td[7]/strong/text()")[0],
                '返奖比例': tr.xpath("./td[8]/text()")[0]
            }
            collection.insert_one(data);

def show_entry_excel():
    #创建一个工作簿
    f = xlwt.Workbook()
    #添加一个工作表
    sheet3d = f.add_sheet("3d", cell_overwrite_ok=True)
    #往表里添加表头
    row0 = ['开奖日期','期号','中奖号码1','中奖号码2','中奖号码3','销售额(元)','返奖比例']
    for i in range(0,len(row0)):
        sheet3d.write(0,i,row0[i])
    j = 1

    for i in range(1, 21):
        url = "http://kaijiang.zhcw.com/zhcw/html/3d/list_{}.html".format(i)
        #发送请求 得到数据
        response = requests.get(url=url,headers=headers)
        #print(response.text)

        #将数据改成xpath结构
        res_xpath = etree.HTML(response.text)
        trs = res_xpath.xpath('/html/body/table//tr')
        # print(trs)

        # 写入循环
        for tr in trs[2:-1]:
            sheet3d.write(j,0,tr.xpath("./td[1]/text()")[0])
            sheet3d.write(j,1,tr.xpath("./td[2]/text()")[0])
            sheet3d.write(j,2,tr.xpath("./td[3]/em[1]/text()")[0])
            sheet3d.write(j,3,tr.xpath("./td[3]/em[2]/text()")[0])
            sheet3d.write(j,4,tr.xpath("./td[3]/em[3]/text()")[0])
            sheet3d.write(j,5,tr.xpath("./td[7]/strong/text()")[0])
            sheet3d.write(j,6,tr.xpath("./td[8]/text()")[0])
            j += 1;
    f.save('3D.xls')

master = Tk()

Button(text='写入数据库', command=show_entry_mongo).grid(row=0, column=0, sticky=W, pady=4)
Button(text='生成Excel文件', command=show_entry_excel).grid(row=0, column=1, sticky=W, pady=4)
Button(text='退出程序', command=master.quit).grid(row=0, column=2, sticky=W, pady=4)
mainloop( )

1.准备工作:

1.1安装requests: cmd >> pip install requests
1.2 安装lxml: cmd >>  pip install lxml
1.3 安装xlwt: cmd >> pip install xlwt
1.4 安装pymongo: cmd >> pip install pymongo

运行效果

 

 

注:1.当点击写入数据库时,将数据写入MongoDB数据库,

2.当点击生成Excel文件时,生成Excel文件,

3.点击退出程序,退出

posted @ 2019-06-13 09:52  venkim  阅读(1052)  评论(0编辑  收藏  举报