Fork me on GitHub

xlwt sqlite3基础

将爬取到的数据保存下来,存到Excel表格用xlwt包,存到数据库有多种选择,这里用轻量级的sqlite3,集成到python3包中。

一个文件即可作为数据库连接对象,这里用test.db,直接贴代码。 可以参考廖雪峰大神

import re
import sqlite3
import urllib.request
import urllib.parse
import ssl
import bs4
import xlwt

ssl._create_default_https_context = ssl.SSLContext


def getData():  # 返回25部电影数组
    html = getHtml('https://movie.douban.com/top250')
    bs = bs4.BeautifulSoup(html, 'html.parser')
    datalist = []  # 存储25部电影
    for item in bs.find_all('div', class_='item'):
        data = []
        item = str(item)  # 每部电影信息
        movielink = re.compile(r'<a href="(.*?)"')  # 电影链接正则
        moviename = re.compile(r'<span class="title">(.*)</span>')  # 电影名正则
        link = re.findall(movielink, item)[0]
        name = re.findall(moviename, item)
        data.append(link)
        if len(name) == 2:
            data.append(name[0])
            data.append(name[1].replace('\xa0/\xa0', ''))
        else:
            data.append(name[0])
            data.append(' ')
        datalist.append(data)
    return datalist


def getHtml(baseurl):   # 请求url 并返回html页面
    # HTTP代理
    # proxy_handler = urllib.request.ProxyHandler({
    #     'http': '127.0.0.1:1087',
    #     'https': '127.0.0.1:1087'
    # })
    # opener = urllib.request.build_opener(proxy_handler)
    # urllib.request.install_opener(opener)

    req = urllib.request.Request(baseurl)  # 请求url
    req.add_header('User-Agent',  # 更改请求方从python到browser
                   'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/537.36 '
                   '(KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36')

    # data = bytes(urllib.parse.urlencode({"hello": "world"}), encoding='utf-8')  # post需提交信息
    response = urllib.request.urlopen(req, timeout=3)  # 打开url post需提交data
    html = response.read().decode('utf-8')  # 响应体
    # print('Status:', response.status, response.reason)  # 返回状态码
    # print(html)  # 返回响应体
    # for k, v in response.getheaders():  # 返回响应头
    #     print('%s: %s' % (k, v))
    return html


def saveExcel(datalist, savepath):  # 将数组写入到  .xls
    movieExcel = xlwt.Workbook(encoding='utf-8', style_compression=0)
    sheet = movieExcel.add_sheet('Top1-25sheet', cell_overwrite_ok=True)
    col = ['Rank', 'Link', 'Chinese Name', 'Original Name']
    for i in range(0, 4):  # 首行属性名
        sheet.write(0, i, col[i])
    for i in range(0, 25):
        sheet.write(i + 1, 0, i + 1)  # 排名
        for j in range(0, 3):  # 电影信息
            sheet.write(i + 1, j + 1, datalist[i][j])
    movieExcel.save(savepath)


def saveDB():   # 将数组存到sqlite数据库中
    conn = sqlite3.connect('test.db')
    cursor = conn.cursor()
    # cursor.execute('create table movie (Rank varchar(10), '
    #                'Link varchar(100), '
    #                'ChineseName varchar(50), '
    #                'OriginalName varchar(50));')
    datalist = getData()
    for i in range(0, 25):
        cursor.execute("insert into movie values(?, ?, ?, ?);",
                       (i+1, datalist[i][0],
                        datalist[i][1], datalist[i][2]))
    cursor.execute("select * from movie")
    movie = cursor.fetchall()
    print(movie)    # 测试
    cursor.close()
    conn.commit()
    conn.close()


def main():
    saveDB()


if __name__ == '__main__':
    main()

 

 

posted @ 2021-04-15 09:19  Faded828x  阅读(57)  评论(0编辑  收藏  举报