Python 爬虫实战

基于 python 的 requests、BeautifulSoup4 第三方库,对哔站全站范围热门视频排行榜前一百实时爬取。

import requests
from bs4 import BeautifulSoup

file = open("hot100.txt", "w")
url = "https://www.bilibili.com/v/popular/rank/all/"
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55'}
res = requests.get(url, headers=header)
if res.status_code != 200:
    print(res.status_code)
    raise Exception("error")
html = res.text
soup = BeautifulSoup(html, "html.parser")
links = soup.find("div", id="app")\
    .find("ul", class_="rank-list")\
    .find_all("li", class_="rank-item")
for link in links:
    link = link.find("div", class_="info").find("a", class_="title")
    name = link.get_text()
    href = "https:" + link["href"]
    print("[%s](%s)" % (name, href))
    try:
        file.write("[%s](%s)\n" % (name, href))
    except UnicodeEncodeError:
        file.write("[Error](%s)\n" % href)

file.close()
posted @ 2023-03-11 14:05  SRIGT  阅读(44)  评论(2编辑  收藏  举报