python 简单的动漫排名爬虫
前两天刚看了两部动漫,找了找动漫排名,发现网上的排名有点老了,于是自己简单写了一点儿代码,非常简单,没有用多线程或多进程
import json
from bs4 import BeautifulSoup
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
}
proxies = {
'http': 'socks5://127.0.0.1:10808',
'https': 'socks5://127.0.0.1:10808'
}
def MAL(links: list[str]):
values = {}
num = 0
for link in links:
res = requests.get(link)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.findAll(class_='ranking-list')
for item in items:
# rank = item.find(class_='top-anime-rank-text').text
name = item.find(class_="anime_ranking_h3").text
score = item.find(class_="score-label").text
print(name, score)
values[name] = score
num += 1
if num == 100: # 只获取前 100 个
return values
def BGM(links: list[str]):
values = {}
num = 0
for link in links:
res = requests.get(link, headers=headers)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.find(class_='browserFull').findAll('li')
for item in items:
# rank = item.find(class_='rank').text.replace("Rank ", "")
name = item.find(class_="l").text
score = item.find(class_="fade").text
print(name, score)
values[name] = score
num += 1
if num == 100:
return values
def ANK(links: list[str]):
# Anikore 需要登陆才能查看排名,用 session 来登陆并保持会话
session = requests.session()
session.post(url='https://www.anikore.jp/users/login/',
data={'data[User][email]': "your_username@qq.com", # 键是登陆页面的用户名和密码标签的 name 属性;值是账户,要改成自己的账户和密码
'data[User][original_password]': 'your_password'},
headers=headers)
values = {}
num = 0
for link in links:
res = session.get(link, headers=headers)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.findAll(class_='l-searchPageRanking_unit')
for item in items:
item = item.find('h2')
name = item.find(class_="l-searchPageRanking_unit_title").text
score = item.find(class_="l-searchPageRanking_unit_score").text
print(name, score)
values[name] = score
num += 1
if num == 100:
return values
if __name__ == "__main__":
v1 = MAL(["https://myanimelist.net/topanime.php?limit=%s" % i for i in range(0, 500, 50)])
v2 = BGM(['http://bangumi.tv/anime/browser?sort=rank&page=%s' % i for i in range(1, 10)])
v3 = ANK(['https://www.anikore.jp/pop_ranking/page:%s' % i for i in range(1, 10)])
with open('mal.json', 'w', encoding='utf8') as f:
json.dump(v1, f, indent=4, ensure_ascii=False) # ensure_ascii=False 可以让 json 写入非 ASCII 码的内容,即英文以外的其他语言
with open('bgm.json', 'w', encoding='utf8') as f:
json.dump(v2, f, indent=4, ensure_ascii=False)
with open('ank.json', 'w', encoding='utf8') as f:
json.dump(v3, f, indent=4, ensure_ascii=False)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构