爬虫下载QQ音乐:获取所有歌手-每个歌手的专辑-每个专辑里的歌曲
# coding=utf-8 # !/usr/bin/env python ''' author: dangxusheng desc : 稍微有点难度,需要多次请求获取key date : 2018-08-29 ''' # 导入模块 import requests from lxml import etree import json import time # 准备全局变量 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36 Qiyu/2.1.1.1", "Referer": "https://y.qq.com/portal/player.html" }
SAVE_ROOT_PATH = 'H:/spider_download' def download_rank_list(): # 直接打开 https://y.qq.com/n/yqq/toplist/62.html#stat=y_new.toplist.menu.62 找到 # https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI****, 修改即可下载 # 热歌榜 # url = "https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI4061906502614365&g_tk=5381&sign=zzaqgitaptrt3c68c23599f05a73b8beed6c1e387cb55&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22detail%22%3A%7B%22module%22%3A%22musicToplist.ToplistInfoServer%22%2C%22method%22%3A%22GetDetail%22%2C%22param%22%3A%7B%22topId%22%3A26%2C%22offset%22%3A0%2C%22num%22%3A20%2C%22period%22%3A%222021_14%22%7D%7D%2C%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%7D" url = "https://u.y.qq.com/cgi-bin/musics.fcg?-=getUCGI22105661521063658&g_tk=5381&sign=zza4sgwo26nxligs809cad6fef7d9240750df6df27bc0296&loginUin=0&hostUin=0&format=json&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq.json&needNewCode=0&data=%7B%22detail%22%3A%7B%22module%22%3A%22musicToplist.ToplistInfoServer%22%2C%22method%22%3A%22GetDetail%22%2C%22param%22%3A%7B%22topId%22%3A128%2C%22offset%22%3A0%2C%22num%22%3A20%2C%22period%22%3A%222021_14%22%7D%7D%2C%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%7D" headers['Referer'] = "https://y.qq.com/n/yqq/toplist/62.html" ie = requests.session() rep = ie.get(url, headers=headers) html = rep.content.decode('utf-8') song_list = json.loads(html)['detail']['data']['songInfoList'] for song in song_list: song_mid = song['mid'] song_name = song['name'] album_mid = song['album']['mid'] album_name = song['album']['name'] singer = '&'.join([item['name'] for item in song['singer']]) media_id = song['file']['media_mid'] print(song_name) print(song_mid) print(album_mid) print(singer) print(media_id) get_key_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?g_tk=5381&jsonpCallback=getplaysongvkey0996617262271613&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&data=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%228216405924%22%2C%22songmid%22%3A%5B%22" + song_mid + "%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A20%2C%22cv%22%3A0%7D%7D" rep = ie.get(get_key_url, headers=headers) html = rep.content.decode('utf-8') data = json.loads(html) if data['code'] == 0: if data['req_0']['code'] == 0: midurlinfo_list = data['req_0']['data']['midurlinfo'] if len(midurlinfo_list) > 0: filename = midurlinfo_list[0]['filename'] # C400002Fc5Be34LLWm.m4a purl = midurlinfo_list[0]['purl'] vkey = midurlinfo_list[0]['vkey'] download_url = f'http://183.131.48.150/amobile.music.tc.qq.com/{purl}' rep = ie.get(download_url, headers=headers, stream=True) fname = f"{song_name}-{singer}-{filename}" # 去除不能作为文件名的特殊字符 fname = re.findall(r'[^\*"/:?\\|<>]', fname, re.S) fname = "".join(fname) save_filepath = f'{SAVE_ROOT_PATH}/qqmusic-20210411/{fname}' os.makedirs(osp.dirname(save_filepath), exist_ok=True) if osp.exists(save_filepath) and osp.getsize(save_filepath) > 100 * 1024: continue with open(save_filepath, 'wb') as file: for byte_data in rep.iter_content(1024): file.write(byte_data) print('《%s》 下载成功!' % song_name) time.sleep(2) # return print('Done.')
# 获取歌手列表 # https://y.qq.com/portal/singer_list.html def get_singer_list(): url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getUCGI25738961582047115&g_tk=5381&jsonpCallback=getUCGI25738961582047115&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&data=%7B%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A10000%7D%2C%22singerList%22%3A%7B%22module%22%3A%22Music.SingerListServer%22%2C%22method%22%3A%22get_singer_list%22%2C%22param%22%3A%7B%22area%22%3A-100%2C%22sex%22%3A-100%2C%22genre%22%3A-100%2C%22index%22%3A-100%2C%22sin%22%3A0%2C%22cur_page%22%3A1%7D%7D%7D" headers['Referer'] = "https://y.qq.com/portal/singer_list.html" ie = requests.session() rep = ie.get(url, headers=headers) html = rep.content.decode('utf-8')[25:-1] singer_list = json.loads(html)['singerList']['data']['singerlist'] ls = [] for singer in singer_list: singer_mid = singer['singer_mid'] singer_name = singer['singer_name'] singer_pic = singer['singer_pic'] ls.append({'singer_mid': singer_mid, "singer_name": singer_name, "singer_pic": singer_pic}) return ls # print(ls) # exit() # 获取专辑列表 def get_album_list(singer_mid=''): url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getUCGI2613146679247198&g_tk=5381&jsonpCallback=getUCGI2613146679247198&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&data=%7B%22singerAlbum%22%3A%7B%22method%22%3A%22get_singer_album%22%2C%22param%22%3A%7B%22singermid%22%3A%22" + singer_mid + "%22%2C%22order%22%3A%22time%22%2C%22begin%22%3A0%2C%22num%22%3A5%2C%22exstatus%22%3A1%7D%2C%22module%22%3A%22music.web_singer_info_svr%22%7D%7D" headers['Referer'] = "https://y.qq.com/n/yqq/singer/" + singer_mid + ".html" ie = requests.session() rep = ie.get(url, headers=headers) html = rep.content.decode('utf-8')[24:-1] ablum_list = json.loads(html)['singerAlbum']['data']['list'] ls = [] for item in ablum_list: album_mid = item['album_mid'] album_name = item['album_name'] singer_mid = item['singer_mid'] singer_name = item['singer_name'] ls.append( {'album_mid': album_mid, 'album_name': album_name, 'singer_mid': singer_mid, 'singer_name': singer_name}) return ls # 根据专辑ID下载 def download_music_by_albumid(albummid='', singername=''): # albummid = "001mTkmb4GJlh4" url = "https://c.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=" + albummid + "&g_tk=5381&jsonpCallback=albuminfoCallback&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0" headers['Referer'] = "https://y.qq.com/portal/player.html" ie = requests.session() rep = ie.get(url, headers=headers) html = rep.content.decode('utf-8')[19:-1] song_list = json.loads(html)['data']['list'] for song in song_list: song_name = song['songname'] song_mid = song['songmid'] get_key_url = "https://u.y.qq.com/cgi-bin/musicu.fcg?callback=getplaysongvkey0996617262271613&g_tk=5381&jsonpCallback=getplaysongvkey0996617262271613&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&data=%7B%22req_0%22%3A%7B%22module%22%3A%22vkey.GetVkeyServer%22%2C%22method%22%3A%22CgiGetVkey%22%2C%22param%22%3A%7B%22guid%22%3A%228216405924%22%2C%22songmid%22%3A%5B%22" + song_mid + "%22%5D%2C%22songtype%22%3A%5B0%5D%2C%22uin%22%3A%220%22%2C%22loginflag%22%3A1%2C%22platform%22%3A%2220%22%7D%7D%2C%22comm%22%3A%7B%22uin%22%3A0%2C%22format%22%3A%22json%22%2C%22ct%22%3A20%2C%22cv%22%3A0%7D%7D" rep = ie.get(get_key_url, headers=headers) html = rep.content.decode('utf-8')[32:-1] data = json.loads(html)['req_0']['data'] download_url_1 = data['sip'][0] download_url_2 = data['midurlinfo'][0]['purl'] filename = data['midurlinfo'][0]['filename'] url = download_url_1 + download_url_2 rep = ie.get(url, headers=headers, stream=True) with open('./qqmusic/%s' % (song_name + "-" + singername + "-" + filename), 'wb') as file: for byte_data in rep.iter_content(1024): file.write(byte_data) print('《%s》 下载成功!' % song_name) time.sleep(2) # 入口函数 if __name__ == '__main__': # 先获取歌手列表 ls = get_singer_list() for singer in ls: # 获取歌手的专辑列表 album_list = get_album_list(singer['singer_mid']) singer_name = singer['singer_name'] # 下载专辑的所有歌曲 for album in album_list: download_music_by_albumid(album['album_mid'], singer_name) time.sleep(2) exit()