Pyhon信息采集 - 喜马拉雅专辑歌曲
Pyhon信息采集 - 喜马拉雅专辑歌曲
setting.py
# 喜马拉雅URL
XMLY_URL = "https://www.ximalaya.com/revision/play/album?albumId=%s&pageNum=%s&sort=-1&pageSize=30"
HEADER = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}
# 数据库配置
import pymongo
conn = pymongo.MongoClient("127.0.0.1",27017)
MONGODB = conn["db3"]
# 目录配置
MUSIC_PATH = "Music"
COVER_PATH = "Cover"
data.py
import time
from setting import XMLY_URL, HEADER, MONGODB, MUSIC_PATH, COVER_PATH
import requests, os
from uuid import uuid4
my_url = XMLY_URL % ("17514344", "1")
res = requests.get(my_url, headers=HEADER)
data = res.json()
content_list = []
for music_info in data.get("data").get("tracksAudioPlay"):
music = {
"music": "",
"cover": "",
"title": ""
}
filename = uuid4()
audio = requests.get(music_info.get("src"))
audio_path = os.path.join(MUSIC_PATH, f"{filename}.mp3")
with open(audio_path, "wb") as f:
f.write(audio.content)
cover = requests.get("http:" + music_info.get("trackCoverPath"))
print(cover, "cover")
cover_path = os.path.join(COVER_PATH, f"{filename}.jpg")
with open(cover_path, "wb") as f:
f.write(cover.content)
music["cover"] = f"{filename}.jpg"
music["music"] = f"{filename}.mp3"
music["title"] = music_info.get("trackName")
content_list.append(music)
time.sleep(0.2)
# MONGODB.content.insert_one(music)
MONGODB.content.insert_many(content_list)