此程序只适用于歌单列表页面的音乐文件爬取,建议去网易云官网寻找要下载的音乐歌单,将歌单的url地址替换到mian()函数的url,然后下载,文件存放位置为py文件同级的oldMusic文件夹中
直接上代码
import requests
from bs4 import BeautifulSoup
import urllib.request
import sys
# 爬取歌单中的所有音乐名字和下载链接地址
def getAllMusicList(play_url):
headers = {
'Referer': 'http://music.163.com/',
'Host': 'music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
# 获取页面内容
s = requests.session()
response = s.get(play_url, headers=headers).content
# 使用bs4匹配出对应的歌曲名称和地址
s = BeautifulSoup(response, 'lxml')
main = s.find('ul', {'class': 'f-hide'})
lists = []
for music in main.find_all('a'):
# print(music)
list = []
# print('{} : {}'.format(music.text, music['href']))
musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3'
musicName = music.text
# 单首歌曲的名字和地址放在list列表中
list.append(musicName)
list.append(musicUrl)
# 全部歌曲信息放在lists列表中
lists.append(list)
return lists
# 下载歌单lists中的所有音乐
def downloadMusicList(lists, folder_url):
# 下载列表中的全部歌曲,并以歌曲名命名下载后的文件
header1 = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
}
for item in lists:
# 将歌名中的空格替换成下划线
name = item[0].replace(' ', '_')
url = item[1]
res = requests.get(url, headers=header1)
print('正在下载 ===> %s : %s' % (name, url))
try:
with open(folder_url + name + '.mp3', "ab") as f:
f.write(res.content)
except:
print('下载%s报错' % name)
continue
print("下载完成")
def main():
# 歌单url [注意:需要去除playlist前面的‘#/’]
url = 'https://music.163.com/playlist?id=7431301214'
# 存放路径(如果相对路径有问题,就写绝对路径)
folder_url = 'E:\mine\xxx\wyyMusic\\'
# 获取歌名,歌曲下载链接地址
lists = getAllMusicList(url)
downloadMusicList(lists, folder_url)
if __name__ == "__main__":
main()