爬top250

import re

import requests
from bs4 import BeautifulSoup
import xlwt

def getID():
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
'Connection': 'keep-alive'
}

url = "https://top.baidu.com/board?tab=realtime"
r = requests.get(url, headers=headers)
r.elapsed = "gbk" ##设置字符集
soup = BeautifulSoup(r.text, "html.parser")
return soup
def getItem(soup):
mod = soup.find("div",attrs={"class":"container-bg_lQ801"})
sy = mod.find_all("div",attrs={"class":"category-wrap_iQLoo horizontal_1eKyQ"})
dataList = []
for i in sy:
EE = i.find_all("div", attrs={"class": "index_1Ew5p"})
top = re.findall(r'(\d+)',str(EE))[-1]
name = i.find("a",attrs={"class":"title_dIF3B"}).text
jj = i.find("div",attrs={"class":"hot-desc_1m_jR"}).text
rr = i.find("div",attrs={"class":"hot-index_1Bl1a"}).string
dataList.append(dict(排名=top,新闻头条=name,简介=jj,热度=rr))
return dataList
def saveExce(result):
##(1)常见一个实例化对象,创建工作簿
wb = xlwt.Workbook()
# (2)使用第一张表
sheet = wb.add_sheet("姬伯")
list_h =["排名","新闻头条","简介","热度"]
for row, str in enumerate(list_h):
sheet.write(0, row, str)
for i in range(0, len(result)):
for n, inV in enumerate(list_h):
sheet.write(i + 1, n, result[i][inV])

wb.save('百度热搜.xlsx')

from openpyxl import Workbook


def saveExcel():
data = getID()
#创建工作表
workbook = Workbook()


a = getID()
b = getItem(a)
saveExce(b)
posted @ 2021-06-29 18:09  /**serenity*/  阅读(216)  评论(0编辑  收藏  举报