顶会热词统计1

python爬虫

main.py

import requests
from bs4 import BeautifulSoup
import re
import pymysql

url = 'https://openaccess.thecvf.com/CVPR2020?day=2020-06-18'
response = requests.get(url)

obj1 = re.compile(r'<dt class="ptitle"><br>.*?.html">(?P<name>.*?)</a></dt>.*?'
                  r'\[<a href="(?P<pdf>.*?)">pdf</a>].*?'
                  r'author = {(?P<author>.*?)},<br>.*?'
                  r'title = {(?P<title>.*?)},<br>.*?'
                  r'booktitle = {(?P<booktitle>.*?)},<br>', re.S)

result = obj1.finditer(response.text)

# 连接数据库
conn = pymysql.connect(host='localhost', user='root', password='15132949306@', database='crewler', charset='utf8', port=3306)
# 创建游标对象
cursor = conn.cursor()
sql = 'INSERT INTO db1(`name`, pdf, author, title, booktitle, `date`) values(%s,%s,%s,%s,%s,%s)'

for it in result:
    try:
        data = [it.group('name'), it.group('pdf'), it.group('author'), it.group('title'), it.group('booktitle'), 20200618]
        cursor.execute(sql, data)
        conn.commit()
    except Exception as e:
        print(e)


response.close()

# 关闭游标
cursor.close()
# 关闭连接
conn.close()

print('over!!!')

数据库结构

 

posted @ 2022-05-16 23:39  好(justice)……  阅读(24)  评论(0编辑  收藏  举报