爬top250


import re

import requests
from bs4 import BeautifulSoup
from  openpyxl import  Workbook

def getID():
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        'Connection': 'keep-alive'
    }

    url = "https://top.baidu.com"
    r = requests.get(url, headers=headers)
    r.elapsed = r.apparent_encoding  ##设置字符集
    soup = BeautifulSoup(r.text, "html.parser")
    return soup
ID=[]
ww=[]
def getId(soup):
    ids = soup.find("div",{"class":"c-theme-color tabs-wrap_3Ac9n"})
    idss = ids.find_all("a")
    for i in idss:
        id = i["href"]
        tet = i.find("span").text
        ID.append(id)
        ww.append(tet)

posted @ 2021-06-29 18:05 /**serenity*/ 阅读(21) 评论(0) 编辑收藏举报

刷新页面返回顶部

/**serenity*/

爬top250

公告