爬top250


import re

import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook

def getID():
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
'Connection': 'keep-alive'
}

url = "https://top.baidu.com"
r = requests.get(url, headers=headers)
r.elapsed = r.apparent_encoding ##设置字符集
soup = BeautifulSoup(r.text, "html.parser")
return soup
ID=[]
ww=[]
def getId(soup):
ids = soup.find("div",{"class":"c-theme-color tabs-wrap_3Ac9n"})
idss = ids.find_all("a")
for i in idss:
id = i["href"]
tet = i.find("span").text
ID.append(id)
ww.append(tet)
posted @ 2021-06-29 18:05  /**serenity*/  阅读(21)  评论(0编辑  收藏  举报