爬虫项目
网易云音乐

import random, base64, binascii, json from Cryptodome.Cipher import AES # 根据获取歌曲时发送post请求的2个参数名params、encSecKey在chrome调试界面Source中找到生成这2个参数的方法; # 然后设置断点,找到加密方法找到a、b、c、d。 class Encryptor(object): modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7' nonce = '0CoJUm6Qyw8W8jud' pub_key = '010001' ''' function a(a) { var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = ""; for (d = 0; a > d; d += 1) e = Math.random() * b.length,//获取0-1之间的随机数*b的长度 e = Math.floor(e), //将e进行四舍五入 c += b.charAt(e); //取字符串b的索引位e的字符,加入到c中 return c } ''' @classmethod def a(self, a): # 这是我根据网易的js源码写的 b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" c = '' d = 0 while a > d: e = random.random() * len(b) e = round(e) - 1 c += b[e] d += 1 return c import os # def a(size): #这是教程的版本,将会没有大写英文 # return binascii.hexlify(os.urandom(size))[:16] ''' //下载地址:https://github.com/brix/crypto-js/blob/master/docs/QuickStartGuide.wiki#AES <script src="crypto-js-svn-mirror-3.1.2/build/components/core.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/md5.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/evpkdf.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/enc-base64.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/cipher-core.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/aes.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/hmac.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/sha1.js"></script> <script src="crypto-js-svn-mirror-3.1.2/build/components/sha256.js"></script> function b(a, b) { var c = CryptoJS.enc.Utf8.parse(b)//key , d = CryptoJS.enc.Utf8.parse("0102030405060708")//iv , e = CryptoJS.enc.Utf8.parse(a) //加密的文本 , f = CryptoJS.AES.encrypt(e, c, { iv: d, mode: CryptoJS.mode.CBC }); return f.toString() } //b('12345','1234567812345678')# parse前的文本;perse前的key ''' @classmethod def b(self, a, b): pad = 16 - len(a) % 16 text = (a + chr(pad) * pad).encode() secKey = b.encode() iv = b"0102030405060708" mycipher = AES.new(secKey, AES.MODE_CBC, iv) ciphertext = mycipher.encrypt(text) ciphertext = base64.b64encode(ciphertext).decode() return ciphertext ''' function c(a, b, c) { var d, e; return setMaxDigits(131), d = new RSAKeyPair(b,"",c), e = encryptedString(d, a) } ''' # binascii.hexlify 把二进制数据转化为十六进制的数据展示,即每一个字节的数据转换成相应的2位十六进制表示。因此产生的字串是源数据两倍长度。a2b_hex和unhexlify则执行反向操作。 @classmethod def c(self, text, pubKey, modulus): text = text[::-1] rs = pow(int(binascii.hexlify(text), 16), int(pubKey, 16), int(modulus, 16)) return format(rs, 'x').zfill(256) ''' function d(d, e, f, g) { var h = {} , i = a(16); return h.encText = b(d, g), h.encText = b(h.encText, i), h.encSecKey = c(i, e, f), h } ''' # def d(self,d,e,f,g): @classmethod def d(self, d): e = self.pub_key f = self.modulus g = self.nonce ''' d需要动态传入;efg都是常量 :param d: text==JSON.stringify(i8a) :param e: pub_key = '010001' :param f: modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7' :param g: nonce = '0CoJUm6Qyw8W8jud' :return: ''' text = json.dumps(d) sec_key = self.a(16) enc_text = self.b(text, g) params = self.b(enc_text, sec_key) # 调用2次b方法得到params encSecKey = self.c(sec_key.encode(), e, f) # 调用c方法得到encSecKey data = { 'params': params, 'encSecKey': encSecKey, } return data class Song(object): def __init__(self, id, name, singer, index): self.index = index self.id = id self.name = name self.singer = singer

import json, winreg, os, requests from utils import Encryptor, Song # 网易的这个加密器是公共加密器,所有的携带params、encSecKey参数的post请求都使用该加密器进行加密。 # 通过设置断点,不断的查看参数i8a,获得多组字典,然后逐个测试字典,最终获取与url相匹配的字典 class Downloader(object): # 经过断点测试,发现在搜索时,传入到加密器的i8a参数有3组,分别是以下的dict1/dict2/dict3 dict1 = { 'csrf_token': '', 'logs': '[{"action":"searchkeywordclient","json":{"type":"song","keyword":"谢谢你的爱","offset":0}}]', } dict2 = { 'csrf_token': '', 'hlposttag': '</span>', 'hlpretag': '<span class="s-fc7">', 'limit': '30', 'offset': '0', 's': '谢谢你的爱', 'total': 'true', 'type': '1', } dict3 = { 'csrf_token': '', 's': '谢谢你的爱', } dict4 = { 'csrf_token': '', 'br': '128000', 'ids': '[1297493260]', } # 获取歌曲url def __init__(self): self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'music.163.com', 'Referer': 'http://music.163.com/search/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } self.headers1 = { 'Origin': 'https://music.163.com', 'Range': 'bytes=0-', 'Referer': 'https://music.163.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } self.headers2 = { } self.start_url = "https://music.163.com/" self.session = requests.session() self.response = self.session.get(url=self.start_url, headers=self.headers) self.post_url1 = 'https://music.163.com/weapi/search/suggest/web?csrf_token=' # url1+dict2有正确的返回,但是只有4首曲目。 # url1+dict1参数错误。 # url1+dict3看起来和dict2差不多。。。 self.post_url2 = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token=' # url2+dict2可以返回一页的30首歌曲。这个才是我需要的。 # url2+dict1参数错误。 # url2+dict3参数错误。 self.post_url3 = 'https://music.163.com/weapi/search/suggest/multimatch?csrf_token=' # url3+dict2虽有有返回,但只有1首,排除 # url3+dict1参数错误。 # url3+dict3看起来和dict2差不多。。。 self.post_url4 = 'https://music.163.com/weapi/song/enhance/player/url?csrf_token=' # 这个就是获取歌曲url的 def _search_songs(self, keyword): dict2 = { 'csrf_token': '', 'hlposttag': '</span>', 'hlpretag': '<span class="s-fc7">', 'limit': '30', 'offset': '0', 's': keyword, 'total': 'true', 'type': '1', } data = Encryptor.d(dict2) response = self.session.post(self.post_url2, headers=self.headers, data=data) return response.text def search(self, keyword): text = self._search_songs(keyword) obj = json.loads(text) song_list = [] for index, i in enumerate(obj.get('result').get("songs")): id = i.get('id') name = i.get('name') ar = i.get('ar') list2 = [i.get('name') for i in ar] singer = "_".join(list2) song_list.append(Song(id=id, name=name, singer=singer, index=index)) return song_list def _song_url(self, song): dict4 = { 'csrf_token': '', 'br': '128000', 'ids': '[%s]' % song.id, } data = Encryptor.d(dict4) response = self.session.post(self.post_url4, headers=self.headers, data=data) return response.text def song(self, song): text = self._song_url(song) list1 = [i.get('url') for i in json.loads(text).get('data')] song.url_list = list1 def download(self, song): for index, i in enumerate(song.url_list): if len(song.url_list) == 1: name = "%s\\%s_%s.mp3" % (self.folder_path, song.name, song.singer) with open(name, 'wb') as f: response = requests.get(i, headers=self.headers1) f.write(response.content) else: name = "%s\\%s_%s_%s.mp3" % (self.folder_path, song.name, song.singer, index) with open(name, 'wb') as f: response = requests.get(i, headers=self.headers1) f.write(response.content) def mkdir(self): reg_key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders', ) # 利用系统的链表 desk_path = winreg.QueryValueEx(reg_key, "Desktop")[0] # 获取桌面路径 self.folder_path = os.path.join(desk_path, "网易云音乐") # try: os.mkdir(self.folder_path) except: pass def main(): song_downloader = Downloader() song_downloader.mkdir() print('欢迎使用网易云下载器') command = None while command != 'q': keywords = input('搜索歌曲请输入关键字,按“q”退出:') if keywords == "q": break try: song_list = song_downloader.search(keywords) except: print('发生错误,请检查网络') continue for i in song_list: print("%s %s %s" % (i.index, i.name, i.singer)) if int(i.index) > 10: break while True: command = input('请根据歌曲编号进行下载,按“q”退出,按“s”重新搜索') if command.strip() == 'q' or command == 's': break elif command.strip() in [str(i.index) for i in song_list]: song_downloader.song(song_list[int(command)]) song_downloader.download(song_list[int(command)]) print('正在下载歌曲%s %s' % (song_list[int(command)].name, song_list[int(command)].singer)) else: print('输入错误,请重新输入。') if __name__ == "__main__": main()

from bin import main if __name__ == '__main__': main() 网易云音乐API分析(附C#版教程源码):https://www.zhanghuanglong.com/detail/csharp-version-of-netease-cloud-music-api-analysis-(with-source-code) Python Spider: https://github.com/Jack-Cherish/python-spider
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!