爬取博客新闻并入库

爬取新闻并入库操作

"""


import requests
import pymysql
from bs4 import BeautifulSoup

conn = pymysql.Connect(host='127.0.0.1', user='root', password='123456', database='article')

cursor = conn.cursor()

ret = requests.get('https://www.cnblogs.com/sitehome/p/3')
soup = BeautifulSoup(ret.text, 'lxml')

article_list = soup.find_all(class_='post_item')

for article in article_list:
    title=article.find(class_='titlelnk').text
    href=article.find(class_='titlelnk')['href']
    desc=article.find(class_='post_item_summary').text
    author=article.find(class_='lightblue').text
    print('''
    文章标题:%s
    文章地址:%s
    文章摘要:%s
    文章作者:%s
    '''%(title,href,desc,author))

    sql = "insert into at (title, author, url, `desc`) values ('%s', '%s', '%s', '%s')"%(title, author, href, desc)
    cursor.execute(sql)
    conn.commit()
cursor.close()
conn.close()
"""
posted @ 2020-04-08 22:45  alen_zhan  阅读(127)  评论(0编辑  收藏  举报
返回顶部