爬取博客新闻并入库
爬取新闻并入库操作
"""
import requests
import pymysql
from bs4 import BeautifulSoup
conn = pymysql.Connect(host='127.0.0.1', user='root', password='123456', database='article')
cursor = conn.cursor()
ret = requests.get('https://www.cnblogs.com/sitehome/p/3')
soup = BeautifulSoup(ret.text, 'lxml')
article_list = soup.find_all(class_='post_item')
for article in article_list:
title=article.find(class_='titlelnk').text
href=article.find(class_='titlelnk')['href']
desc=article.find(class_='post_item_summary').text
author=article.find(class_='lightblue').text
print('''
文章标题:%s
文章地址:%s
文章摘要:%s
文章作者:%s
'''%(title,href,desc,author))
sql = "insert into at (title, author, url, `desc`) values ('%s', '%s', '%s', '%s')"%(title, author, href, desc)
cursor.execute(sql)
conn.commit()
cursor.close()
conn.close()
"""
Only you can control your future
You're not alone. You still have family,peopel who care for you and want to save you.