3.python 爬取mp3音乐01-07

一、酷狗音乐

1、代码

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

import requests
import time
# 导入解密模块
import hashlib
import re
import os
import json
# 导入制表模块
import prettytable as pt
 
# 模拟浏览器
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 '
                  'Safari/537.36 Edg/119.0.0.0',
    'Cookie': 'kg_mid=3a8e2eda6b55afd434ed43d762bae621; kg_dfid=4XSJ8z0tMH343y3JOZ2ZluzO; '
              'kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1701158152,'
              '1701163020,1701180349,1701337969; kg_mid_temp=3a8e2eda6b55afd434ed43d762bae621; '
              'Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1701337996'
}
 
 
def Hash_md5(audio_id, date_time):
    # audio_id = '9gaecb60'
    s = [
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt",
        "appid=1014",
        # 时间戳
        f"clienttime={date_time}",
        "clientver=20000",
        "dfid=4XSJ8z0tMH343y3JOZ2ZluzO",
        # 歌曲id
        f"encode_album_audio_id={audio_id}",
        "mid=3a8e2eda6b55afd434ed43d762bae621",
        "platid=4",
        "srcappid=2919",
        "token=",
        "userid=0",
        "uuid=3a8e2eda6b55afd434ed43d762bae621",
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"
    ]
    # 把列表拼接成字符串
    string = ''.join(s)
    MD5 = hashlib.md5()
    MD5.update(string.encode('utf-8'))
    signature = MD5.hexdigest()
    # print(signature)
    return signature
 
 
def search_MD5(world, date_time):
    search_s = [
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt",
        "appid=1014",
        "bitrate=0",
        "callback=callback123",
        f"clienttime={date_time}",
        "clientver=1000",
        "dfid=4XSJ8z0tMH343y3JOZ2ZluzO",
        "filter=10",
        "inputtype=0",
        "iscorrection=1",
        "isfuzzy=0",
        f"keyword={world}",
        "mid=3a8e2eda6b55afd434ed43d762bae621",
        "page=1",
        "pagesize=30",
        "platform=WebFilter",
        "privilege_filter=0",
        "srcappid=2919",
        "token=",
        "userid=0",
        "uuid=3a8e2eda6b55afd434ed43d762bae621",
        "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"
    ]
    search_string = ''.join(search_s)
    MD5 = hashlib.md5()
    MD5.update(search_string.encode('utf-8'))
    search_signature = MD5.hexdigest()
    # print(signature)
    return search_signature
 
 
# audio_id = '9gaecb60'
# 获取当前时间的毫秒数
# 时间戳13位，所以是到毫秒的，所以我们要*1000，因为我们获取当前时间只精确到秒
date_time = int(time.time() * 1000)
# signature = Hash_md5(audio_id, date_time)
 
 
# key = '周杰伦'
key = input('请输入歌名或歌手：')
# 获取加密参数
search_signature = search_MD5(key, date_time)
 
# 搜索链接
search_url = 'https://complexsearch.kugou.com/v2/search/song?'
# search_url = 'https://www.kugou.com/yy/html/search.html#searchType=song?'
# 搜索请求参数
search_data = {
    'callback': 'callback123',
    'srcappid': '2919',
    'clientver': '1000',
    'clienttime': date_time,
    'mid': '3a8e2eda6b55afd434ed43d762bae621',
    'uuid': '3a8e2eda6b55afd434ed43d762bae621',
    'dfid': '4XSJ8z0tMH343y3JOZ2ZluzO',
    'keyword': key,
    'page': '1',
    'pagesize': '30',
    'bitrate': '0',
    'isfuzzy': '0',
    'inputtype': '0',
    'platform': 'WebFilter',
    'userid': '0',
    'iscorrection': '1',
    'privilege_filter': '0',
    'filter': '10',
    'token': '',
    'appid': '1014',
    'signature': search_signature
}
 
# 发送请求
response = requests.get(url=search_url, params=search_data, headers=headers)
"""" 获取数据 """
search_data = response.text
html_data = re.findall('callback123\((.*)', search_data)[0].replace(')', '')
# 把json字符串，转成字典数据
json_data = json.loads(html_data)
# print(json_data)
tb = pt.PrettyTable()
tb.field_names = ['序号', '歌名', '歌手', '专辑', 'id']
lis = []
num = 1
# for循坏遍历
for index in json_data['data']['lists']:
    dit = {
        '歌名': index['SongName'],
        '歌手': index['SingerName'],
        '专辑': index['AlbumName'],
        'id': index['EMixSongID']
    }
    lis.append(dit)
    tb.add_row([num, index['SongName'], index['SingerName'], index['AlbumName'], index['EMixSongID']])
    num += 1
    # print(dit)
    # print(audio_id)
print(tb)
 
 
# audio_id = input('请输入歌曲id：')
# signature = Hash_md5(audio_id, date_time)
 
def save(audio_id):
    signature = Hash_md5(audio_id, date_time)
    url = 'https://wwwapi.kugou.com/play/songinfo?'
 
    data = {
        'srcappid': '2919',
        'clientver': '20000',
        'clienttime': date_time,
        'mid': '3a8e2eda6b55afd434ed43d762bae621',
        'uuid': '3a8e2eda6b55afd434ed43d762bae621',
        'dfid': '4XSJ8z0tMH343y3JOZ2ZluzO',
        'appid': '1014',
        'platid': '4',
        'encode_album_audio_id': audio_id,
        'token': '',
        'userid': '0',
        'signature': signature
    }
 
    response = requests.get(url=url, params=data, headers=headers)
    json_data = response.json()
 
    # 打印 JSON 响应以检查字段
    # print(json.dumps(json_data, indent=4, ensure_ascii=False))
 
    try:
        # 歌名
        audio_name = json_data['data']['audio_name']
        # 音频链接
        play_url = json_data['data']['play_url']
 
        # 歌手名
        author_info = json_data['data'].get('authors', [{}])  # 获取作者信息，若不存在则用空字典
        singer_name = author_info[0].get('author_name', '未知歌手')  # 获取作者名，若无则用默认值
 
        # 创建保存路径
        save_directory = os.path.join('kugou', singer_name)
        os.makedirs(save_directory, exist_ok=True)  # 检查并创建目录
 
        # 保存音频
        music_content = requests.get(url=play_url, headers=headers).content
        audio_file_path = os.path.join(save_directory, f'{audio_name}.mp3')
 
        with open(audio_file_path, mode='wb+') as f:
            f.write(music_content)
 
        print(f'{audio_name}.mp3下载完成')
 
        # 保存歌词
        lyrics = json_data['data']['lyrics']
        song_info_cleaned = re.sub("\[(.*?)\]", "", lyrics)
        lyrics_file_path = os.path.join(save_directory, f'{audio_name}.txt')
 
        with open(lyrics_file_path, 'w+', encoding="utf-8") as f:
            f.write(song_info_cleaned)
 
        print(f'{audio_name}.txt下载完成')
 
        # 保存图片
        img = json_data['data']['authors'][0]['sizable_avatar']  # 使用头像 URL 作为图片
        music_img = requests.get(url=img, headers=headers).content
        img_file_path = os.path.join(save_directory, f'{audio_name}.jpg')
 
        with open(img_file_path, mode='wb+') as f:
            f.write(music_img)
 
        print(f'{audio_name}.jpg下载完成')
 
    except KeyError as e:
        print(f'获取数据时出错: {e}')
 
 
if __name__ == '__main__':
    page = input('请输入你要下载的歌曲序号 / 全部下载<0>:')
    try:
        if page == '0':
            for li in lis:
                save(audio_id=li['id'])
        else:
            save(audio_id=lis[int(page) - 1]['id'])
    except Exception as e:
        print('你可能输入有误', e)

二、网易云热榜音乐

1、代码

import requests
import re
import os
 
filename = 'music'
if not os.path.exists(filename):
    os.makedirs(filename)
 
url = "https://music.163.com/discover/toplist?id=3778678"  # 网易云音乐热歌榜单页面
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'}
 
response = requests.get(url, headers=headers)
html_data = re.findall('<li><a href="/song\?id=(\d+)">(.*?)</a>', response.text)
 
for song_id, song_name in html_data:
    song_name_clean = ''.join(c for c in song_name if c not in '<>:"/\\|?*')  # 清理文件名
    music_url = f'http://music.163.com/song/media/outer/url?id={song_id}.mp3'
 
    # 对于音乐播放地址发送请求 获取二进制数据内容
    music_content = requests.get(url=music_url, headers=headers).content
 
    # 使用完整的路径保存文件
    with open(os.path.join(filename, f'{song_name_clean}.mp3'), mode='wb') as f:
        f.write(music_content)
 
    print(f'Song ID: {song_id}, Song Name: {song_name_clean}')

posted @ 2025-01-07 17:53 凡人半睁眼阅读(16) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 爬虫之requests模块

· 爬虫之代理池、爬取视频网站、新闻、bs4

· python爬取音乐小工具

· 爬虫学习之爬取酷狗音乐

· 跟我一起学-Python爬取(酷我)

阅读排行：
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布：重大改进与新特性概览！
· .NET10 - 预览版1新功能体验（一）

阅读目录(Content)

此页目录为空

海棠未雨，梨花先雪，一半春休

想看山海，早也去，晚也去，一个人也去

念两句诗

python 爬取mp3音乐

一、酷狗音乐

二、网易云热榜音乐

公告

个人信息

日历

搜索

常用链接

最新随笔

积分与排名

合集 (7)

随笔分类 (236)

随笔档案 (341)

相册 (5)

阅读排行榜

评论排行榜

推荐排行榜

海棠未雨，梨花先雪，一半春休

想看山海，早也去，晚也去 ，一个人也去

念两句诗

python 爬取mp3音乐

一、酷狗音乐

二、网易云热榜音乐

公告

个人信息

日历

搜索

常用链接

最新随笔

积分与排名

合集 (7)

随笔分类 (236)

随笔档案 (341)

相册 (5)

阅读排行榜

评论排行榜

推荐排行榜

想看山海，早也去，晚也去，一个人也去