python 爬取mp3音乐
一、酷狗音乐
1、代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | import requests import time # 导入解密模块 import hashlib import re import os import json # 导入制表模块 import prettytable as pt # 模拟浏览器 headers = { 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 ' 'Safari/537.36 Edg/119.0.0.0' , 'Cookie' : 'kg_mid=3a8e2eda6b55afd434ed43d762bae621; kg_dfid=4XSJ8z0tMH343y3JOZ2ZluzO; ' 'kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1701158152,' '1701163020,1701180349,1701337969; kg_mid_temp=3a8e2eda6b55afd434ed43d762bae621; ' 'Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1701337996' } def Hash_md5(audio_id, date_time): # audio_id = '9gaecb60' s = [ "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" , "appid=1014" , # 时间戳 f "clienttime={date_time}" , "clientver=20000" , "dfid=4XSJ8z0tMH343y3JOZ2ZluzO" , # 歌曲id f "encode_album_audio_id={audio_id}" , "mid=3a8e2eda6b55afd434ed43d762bae621" , "platid=4" , "srcappid=2919" , "token=" , "userid=0" , "uuid=3a8e2eda6b55afd434ed43d762bae621" , "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" ] # 把列表拼接成字符串 string = ''.join(s) MD5 = hashlib.md5() MD5.update(string.encode( 'utf-8' )) signature = MD5.hexdigest() # print(signature) return signature def search_MD5(world, date_time): search_s = [ "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" , "appid=1014" , "bitrate=0" , "callback=callback123" , f "clienttime={date_time}" , "clientver=1000" , "dfid=4XSJ8z0tMH343y3JOZ2ZluzO" , "filter=10" , "inputtype=0" , "iscorrection=1" , "isfuzzy=0" , f "keyword={world}" , "mid=3a8e2eda6b55afd434ed43d762bae621" , "page=1" , "pagesize=30" , "platform=WebFilter" , "privilege_filter=0" , "srcappid=2919" , "token=" , "userid=0" , "uuid=3a8e2eda6b55afd434ed43d762bae621" , "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt" ] search_string = ''.join(search_s) MD5 = hashlib.md5() MD5.update(search_string.encode( 'utf-8' )) search_signature = MD5.hexdigest() # print(signature) return search_signature # audio_id = '9gaecb60' # 获取当前时间的毫秒数 # 时间戳13位,所以是到毫秒的,所以我们要*1000,因为我们获取当前时间只精确到秒 date_time = int (time.time() * 1000 ) # signature = Hash_md5(audio_id, date_time) # key = '周杰伦' key = input ( '请输入歌名或歌手:' ) # 获取加密参数 search_signature = search_MD5(key, date_time) # 搜索链接 search_url = 'https://complexsearch.kugou.com/v2/search/song?' # search_url = 'https://www.kugou.com/yy/html/search.html#searchType=song?' # 搜索请求参数 search_data = { 'callback' : 'callback123' , 'srcappid' : '2919' , 'clientver' : '1000' , 'clienttime' : date_time, 'mid' : '3a8e2eda6b55afd434ed43d762bae621' , 'uuid' : '3a8e2eda6b55afd434ed43d762bae621' , 'dfid' : '4XSJ8z0tMH343y3JOZ2ZluzO' , 'keyword' : key, 'page' : '1' , 'pagesize' : '30' , 'bitrate' : '0' , 'isfuzzy' : '0' , 'inputtype' : '0' , 'platform' : 'WebFilter' , 'userid' : '0' , 'iscorrection' : '1' , 'privilege_filter' : '0' , 'filter' : '10' , 'token' : '', 'appid' : '1014' , 'signature' : search_signature } # 发送请求 response = requests.get(url = search_url, params = search_data, headers = headers) """" 获取数据 """ search_data = response.text html_data = re.findall( 'callback123\((.*)' , search_data)[ 0 ].replace( ')' , '') # 把json字符串,转成字典数据 json_data = json.loads(html_data) # print(json_data) tb = pt.PrettyTable() tb.field_names = [ '序号' , '歌名' , '歌手' , '专辑' , 'id' ] lis = [] num = 1 # for循坏遍历 for index in json_data[ 'data' ][ 'lists' ]: dit = { '歌名' : index[ 'SongName' ], '歌手' : index[ 'SingerName' ], '专辑' : index[ 'AlbumName' ], 'id' : index[ 'EMixSongID' ] } lis.append(dit) tb.add_row([num, index[ 'SongName' ], index[ 'SingerName' ], index[ 'AlbumName' ], index[ 'EMixSongID' ]]) num + = 1 # print(dit) # print(audio_id) print (tb) # audio_id = input('请输入歌曲id:') # signature = Hash_md5(audio_id, date_time) def save(audio_id): signature = Hash_md5(audio_id, date_time) url = 'https://wwwapi.kugou.com/play/songinfo?' data = { 'srcappid' : '2919' , 'clientver' : '20000' , 'clienttime' : date_time, 'mid' : '3a8e2eda6b55afd434ed43d762bae621' , 'uuid' : '3a8e2eda6b55afd434ed43d762bae621' , 'dfid' : '4XSJ8z0tMH343y3JOZ2ZluzO' , 'appid' : '1014' , 'platid' : '4' , 'encode_album_audio_id' : audio_id, 'token' : '', 'userid' : '0' , 'signature' : signature } response = requests.get(url = url, params = data, headers = headers) json_data = response.json() # 打印 JSON 响应以检查字段 # print(json.dumps(json_data, indent=4, ensure_ascii=False)) try : # 歌名 audio_name = json_data[ 'data' ][ 'audio_name' ] # 音频链接 play_url = json_data[ 'data' ][ 'play_url' ] # 歌手名 author_info = json_data[ 'data' ].get( 'authors' , [{}]) # 获取作者信息,若不存在则用空字典 singer_name = author_info[ 0 ].get( 'author_name' , '未知歌手' ) # 获取作者名,若无则用默认值 # 创建保存路径 save_directory = os.path.join( 'kugou' , singer_name) os.makedirs(save_directory, exist_ok = True ) # 检查并创建目录 # 保存音频 music_content = requests.get(url = play_url, headers = headers).content audio_file_path = os.path.join(save_directory, f '{audio_name}.mp3' ) with open (audio_file_path, mode = 'wb+' ) as f: f.write(music_content) print (f '{audio_name}.mp3下载完成' ) # 保存歌词 lyrics = json_data[ 'data' ][ 'lyrics' ] song_info_cleaned = re.sub( "\[(.*?)\]" , "", lyrics) lyrics_file_path = os.path.join(save_directory, f '{audio_name}.txt' ) with open (lyrics_file_path, 'w+' , encoding = "utf-8" ) as f: f.write(song_info_cleaned) print (f '{audio_name}.txt下载完成' ) # 保存图片 img = json_data[ 'data' ][ 'authors' ][ 0 ][ 'sizable_avatar' ] # 使用头像 URL 作为图片 music_img = requests.get(url = img, headers = headers).content img_file_path = os.path.join(save_directory, f '{audio_name}.jpg' ) with open (img_file_path, mode = 'wb+' ) as f: f.write(music_img) print (f '{audio_name}.jpg下载完成' ) except KeyError as e: print (f '获取数据时出错: {e}' ) if __name__ = = '__main__' : page = input ( '请输入你要下载的歌曲序号 / 全部下载<0>:' ) try : if page = = '0' : for li in lis: save(audio_id = li[ 'id' ]) else : save(audio_id = lis[ int (page) - 1 ][ 'id' ]) except Exception as e: print ( '你可能输入有误' , e) |
二、网易云热榜音乐
1、代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import requests import re import os filename = 'music' if not os.path.exists(filename): os.makedirs(filename) url = "https://music.163.com/discover/toplist?id=3778678" # 网易云音乐热歌榜单页面 headers = { 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0' } response = requests.get(url, headers = headers) html_data = re.findall( '<li><a href="/song\?id=(\d+)">(.*?)</a>' , response.text) for song_id, song_name in html_data: song_name_clean = ' '.join(c for c in song_name if c not in ' <>:" / \\|? * ') # 清理文件名 music_url = f 'http://music.163.com/song/media/outer/url?id={song_id}.mp3' # 对于音乐播放地址发送请求 获取二进制数据内容 music_content = requests.get(url = music_url, headers = headers).content # 使用完整的路径保存文件 with open (os.path.join(filename, f '{song_name_clean}.mp3' ), mode = 'wb' ) as f: f.write(music_content) print (f 'Song ID: {song_id}, Song Name: {song_name_clean}' ) |