python 爬取mp3音乐

一、酷狗音乐

1、代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import requests
import time
# 导入解密模块
import hashlib
import re
import os
import json
# 导入制表模块
import prettytable as pt
 
# 模拟浏览器
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 '
                  'Safari/537.36 Edg/119.0.0.0',
    'Cookie': 'kg_mid=3a8e2eda6b55afd434ed43d762bae621; kg_dfid=4XSJ8z0tMH343y3JOZ2ZluzO; '
              'kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1701158152,'
              '1701163020,1701180349,1701337969; kg_mid_temp=3a8e2eda6b55afd434ed43d762bae621; '
              'Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1701337996'
}
 
 
def Hash_md5(audio_id, date_time):
    # audio_id = '9gaecb60'
    s = [
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt",
        "appid=1014",
        # 时间戳
        f"clienttime={date_time}",
        "clientver=20000",
        "dfid=4XSJ8z0tMH343y3JOZ2ZluzO",
        # 歌曲id
        f"encode_album_audio_id={audio_id}",
        "mid=3a8e2eda6b55afd434ed43d762bae621",
        "platid=4",
        "srcappid=2919",
        "token=",
        "userid=0",
        "uuid=3a8e2eda6b55afd434ed43d762bae621",
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"
    ]
    # 把列表拼接成字符串
    string = ''.join(s)
    MD5 = hashlib.md5()
    MD5.update(string.encode('utf-8'))
    signature = MD5.hexdigest()
    # print(signature)
    return signature
 
 
def search_MD5(world, date_time):
    search_s = [
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt",
        "appid=1014",
        "bitrate=0",
        "callback=callback123",
        f"clienttime={date_time}",
        "clientver=1000",
        "dfid=4XSJ8z0tMH343y3JOZ2ZluzO",
        "filter=10",
        "inputtype=0",
        "iscorrection=1",
        "isfuzzy=0",
        f"keyword={world}",
        "mid=3a8e2eda6b55afd434ed43d762bae621",
        "page=1",
        "pagesize=30",
        "platform=WebFilter",
        "privilege_filter=0",
        "srcappid=2919",
        "token=",
        "userid=0",
        "uuid=3a8e2eda6b55afd434ed43d762bae621",
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"
    ]
    search_string = ''.join(search_s)
    MD5 = hashlib.md5()
    MD5.update(search_string.encode('utf-8'))
    search_signature = MD5.hexdigest()
    # print(signature)
    return search_signature
 
 
# audio_id = '9gaecb60'
# 获取当前时间的毫秒数
# 时间戳13位,所以是到毫秒的,所以我们要*1000,因为我们获取当前时间只精确到秒
date_time = int(time.time() * 1000)
# signature = Hash_md5(audio_id, date_time)
 
 
# key = '周杰伦'
key = input('请输入歌名或歌手:')
# 获取加密参数
search_signature = search_MD5(key, date_time)
 
# 搜索链接
search_url = 'https://complexsearch.kugou.com/v2/search/song?'
# search_url = 'https://www.kugou.com/yy/html/search.html#searchType=song?'
# 搜索请求参数
search_data = {
    'callback': 'callback123',
    'srcappid': '2919',
    'clientver': '1000',
    'clienttime': date_time,
    'mid': '3a8e2eda6b55afd434ed43d762bae621',
    'uuid': '3a8e2eda6b55afd434ed43d762bae621',
    'dfid': '4XSJ8z0tMH343y3JOZ2ZluzO',
    'keyword': key,
    'page': '1',
    'pagesize': '30',
    'bitrate': '0',
    'isfuzzy': '0',
    'inputtype': '0',
    'platform': 'WebFilter',
    'userid': '0',
    'iscorrection': '1',
    'privilege_filter': '0',
    'filter': '10',
    'token': '',
    'appid': '1014',
    'signature': search_signature
}
 
# 发送请求
response = requests.get(url=search_url, params=search_data, headers=headers)
"""" 获取数据 """
search_data = response.text
html_data = re.findall('callback123\((.*)', search_data)[0].replace(')', '')
# 把json字符串,转成字典数据
json_data = json.loads(html_data)
# print(json_data)
tb = pt.PrettyTable()
tb.field_names = ['序号', '歌名', '歌手', '专辑', 'id']
lis = []
num = 1
# for循坏遍历
for index in json_data['data']['lists']:
    dit = {
        '歌名': index['SongName'],
        '歌手': index['SingerName'],
        '专辑': index['AlbumName'],
        'id': index['EMixSongID']
    }
    lis.append(dit)
    tb.add_row([num, index['SongName'], index['SingerName'], index['AlbumName'], index['EMixSongID']])
    num += 1
    # print(dit)
    # print(audio_id)
print(tb)
 
 
# audio_id = input('请输入歌曲id:')
# signature = Hash_md5(audio_id, date_time)
 
def save(audio_id):
    signature = Hash_md5(audio_id, date_time)
    url = 'https://wwwapi.kugou.com/play/songinfo?'
 
    data = {
        'srcappid': '2919',
        'clientver': '20000',
        'clienttime': date_time,
        'mid': '3a8e2eda6b55afd434ed43d762bae621',
        'uuid': '3a8e2eda6b55afd434ed43d762bae621',
        'dfid': '4XSJ8z0tMH343y3JOZ2ZluzO',
        'appid': '1014',
        'platid': '4',
        'encode_album_audio_id': audio_id,
        'token': '',
        'userid': '0',
        'signature': signature
    }
 
    response = requests.get(url=url, params=data, headers=headers)
    json_data = response.json()
 
    # 打印 JSON 响应以检查字段
    # print(json.dumps(json_data, indent=4, ensure_ascii=False))
 
    try:
        # 歌名
        audio_name = json_data['data']['audio_name']
        # 音频链接
        play_url = json_data['data']['play_url']
 
        # 歌手名
        author_info = json_data['data'].get('authors', [{}])  # 获取作者信息,若不存在则用空字典
        singer_name = author_info[0].get('author_name', '未知歌手'# 获取作者名,若无则用默认值
 
        # 创建保存路径
        save_directory = os.path.join('kugou', singer_name)
        os.makedirs(save_directory, exist_ok=True# 检查并创建目录
 
        # 保存音频
        music_content = requests.get(url=play_url, headers=headers).content
        audio_file_path = os.path.join(save_directory, f'{audio_name}.mp3')
 
        with open(audio_file_path, mode='wb+') as f:
            f.write(music_content)
 
        print(f'{audio_name}.mp3下载完成')
 
        # 保存歌词
        lyrics = json_data['data']['lyrics']
        song_info_cleaned = re.sub("\[(.*?)\]", "", lyrics)
        lyrics_file_path = os.path.join(save_directory, f'{audio_name}.txt')
 
        with open(lyrics_file_path, 'w+', encoding="utf-8") as f:
            f.write(song_info_cleaned)
 
        print(f'{audio_name}.txt下载完成')
 
        # 保存图片
        img = json_data['data']['authors'][0]['sizable_avatar'# 使用头像 URL 作为图片
        music_img = requests.get(url=img, headers=headers).content
        img_file_path = os.path.join(save_directory, f'{audio_name}.jpg')
 
        with open(img_file_path, mode='wb+') as f:
            f.write(music_img)
 
        print(f'{audio_name}.jpg下载完成')
 
    except KeyError as e:
        print(f'获取数据时出错: {e}')
 
 
if __name__ == '__main__':
    page = input('请输入你要下载的歌曲序号 / 全部下载<0>:')
    try:
        if page == '0':
            for li in lis:
                save(audio_id=li['id'])
        else:
            save(audio_id=lis[int(page) - 1]['id'])
    except Exception as e:
        print('你可能输入有误', e)

二、网易云热榜音乐

1、代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import requests
import re
import os
 
filename = 'music'
if not os.path.exists(filename):
    os.makedirs(filename)
 
url = "https://music.163.com/discover/toplist?id=3778678"  # 网易云音乐热歌榜单页面
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'}
 
response = requests.get(url, headers=headers)
html_data = re.findall('<li><a href="/song\?id=(\d+)">(.*?)</a>', response.text)
 
for song_id, song_name in html_data:
    song_name_clean = ''.join(c for c in song_name if c not in '<>:"/\\|?*')  # 清理文件名
    music_url = f'http://music.163.com/song/media/outer/url?id={song_id}.mp3'
 
    # 对于音乐播放地址发送请求 获取二进制数据内容
    music_content = requests.get(url=music_url, headers=headers).content
 
    # 使用完整的路径保存文件
    with open(os.path.join(filename, f'{song_name_clean}.mp3'), mode='wb') as f:
        f.write(music_content)
 
    print(f'Song ID: {song_id}, Song Name: {song_name_clean}')

  

 

posted @   凡人半睁眼  阅读(16)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· .NET10 - 预览版1新功能体验(一)

阅读目录(Content)

此页目录为空

点击右上角即可分享
微信分享提示