python 简单的动漫排名爬虫
前两天刚看了两部动漫,找了找动漫排名,发现网上的排名有点老了,于是自己简单写了一点儿代码,非常简单,没有用多线程或多进程
import json
from bs4 import BeautifulSoup
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
}
proxies = {
'http': 'socks5://127.0.0.1:10808',
'https': 'socks5://127.0.0.1:10808'
}
def MAL(links: list[str]):
values = {}
num = 0
for link in links:
res = requests.get(link)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.findAll(class_='ranking-list')
for item in items:
# rank = item.find(class_='top-anime-rank-text').text
name = item.find(class_="anime_ranking_h3").text
score = item.find(class_="score-label").text
print(name, score)
values[name] = score
num += 1
if num == 100: # 只获取前 100 个
return values
def BGM(links: list[str]):
values = {}
num = 0
for link in links:
res = requests.get(link, headers=headers)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.find(class_='browserFull').findAll('li')
for item in items:
# rank = item.find(class_='rank').text.replace("Rank ", "")
name = item.find(class_="l").text
score = item.find(class_="fade").text
print(name, score)
values[name] = score
num += 1
if num == 100:
return values
def ANK(links: list[str]):
# Anikore 需要登陆才能查看排名,用 session 来登陆并保持会话
session = requests.session()
session.post(url='https://www.anikore.jp/users/login/',
data={'data[User][email]': "your_username@qq.com", # 键是登陆页面的用户名和密码标签的 name 属性;值是账户,要改成自己的账户和密码
'data[User][original_password]': 'your_password'},
headers=headers)
values = {}
num = 0
for link in links:
res = session.get(link, headers=headers)
soup = BeautifulSoup(res.content, 'lxml')
items = soup.findAll(class_='l-searchPageRanking_unit')
for item in items:
item = item.find('h2')
name = item.find(class_="l-searchPageRanking_unit_title").text
score = item.find(class_="l-searchPageRanking_unit_score").text
print(name, score)
values[name] = score
num += 1
if num == 100:
return values
if __name__ == "__main__":
v1 = MAL(["https://myanimelist.net/topanime.php?limit=%s" % i for i in range(0, 500, 50)])
v2 = BGM(['http://bangumi.tv/anime/browser?sort=rank&page=%s' % i for i in range(1, 10)])
v3 = ANK(['https://www.anikore.jp/pop_ranking/page:%s' % i for i in range(1, 10)])
with open('mal.json', 'w', encoding='utf8') as f:
json.dump(v1, f, indent=4, ensure_ascii=False) # ensure_ascii=False 可以让 json 写入非 ASCII 码的内容,即英文以外的其他语言
with open('bgm.json', 'w', encoding='utf8') as f:
json.dump(v2, f, indent=4, ensure_ascii=False)
with open('ank.json', 'w', encoding='utf8') as f:
json.dump(v3, f, indent=4, ensure_ascii=False)