Python爬虫JS逆向案例教学,看看千元以上的外包都是怎么样的(附源码)
知识点:
- requests
- execjs
第三方库:
- requests >>> pip install requests
- execjs >>> pip install PyExecJS
开发环境:
- 版 本:anaconda5.2.0(python3.6.5)
- 编辑器:pycharm
付费VIP完整版】只要看了就能学会的教程,80集Python基础入门视频教学
资料分享、解答群:1039649593
分析,单首歌曲所在的链接、加密的规则
选定目标
先点击动态加载,然后在点击播放按钮
先使用检索工具,复制歌曲ID,在检索工具里搜索,最后找到歌曲的数据包
找到加密之后的数据,复制encSecKey到检索工具里查找
找到数据包后,右键点击Open in Sources panel
分析数据,复制代码
运行后得到加密参数
开始写代码
第一个解密
import requests # 发送网络请求 import execjs # 调用js代码的模块 url = 'https://music.163.com/weapi/song/enhance/player/url/v1?csrf_token=f3cb47b2b44afb5c7d5b267cc60766cb' js = open('demo.js', 'r', encoding='utf-8').read() ctx = execjs.compile(js) # 调用js里面的函数 (第一个是函数名, 参数) result = ctx.call('start', song_id) data = { 'params': result['encText'], 'encSecKey': result['encSecKey'] } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36', # 用户信息标识 'cookie': '_iuqxldmzr_=32; _ntes_nnid=ebf3d77c667fa506d26f9ddb18340c1a,1631792204854; _ntes_nuid=ebf3d77c667fa506d26f9ddb18340c1a; NMTID=00OR9oja7jRdVKsdkWjsSkzfpe6kDoAAAF77mUMdg; WNMCID=xzerkz.1631792205013.01.0; WEVNSM=1.0.0; WM_NI=vBQ2%2Bw17HytDYS1GI3g7DkT7VTr%2B4EXODB%2F33ynkt8Lb7U7Q8x8H6wR%2BOmpKo2yVbM5u9jad1RshFBs0VYnhDdMY6YnieHA%2FXZUX9B0lfGL7deJ0g5xFtC36hy1cLdVJUGs%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee82b766aa92b998f05a92b88ab6c84a869f8a84f43af5edfbb0d769989aa7b0c22af0fea7c3b92ab69e9fd3aa7df3e9889af7509b8dffa8cc39a5e98c8cd73d968cbeccb74f9c918ca3ae39f1a7a28cf73490bef9b6b65bfca8f8a9b57d92f5818ef341adeeb7b7e147f7abf78be470f38ebd89c54ab1f09abaed6488bc83bab553aeb7aba9c4259c869bb9bc4af4b4aaa8cf3da8bb86d1e63fbc9abca4b23eaca7ff84cf7096969ba8f237e2a3; WM_TID=GAgBwBSskVpAURVAEAZ73v38OSZWFyaG; __csrf=f3cb47b2b44afb5c7d5b267cc60766cb; MUSIC_U=ef63db2826f2c58d750719abd63a4be1b03d88b788d670ba6fcb33633b202b19993166e004087dd3d78b6050a17a35e705925a4e6992f61dfe3f0151024f9e31; ntes_kaola_ad=1; JSESSIONID-WYYY=RkN1BuKQuGb4%5Csy%5CWRrXIDVpjajXMbdGrsnKDqT%5C9vS2wSxduz%2BcCS4ezpW0%5CCjs3zUBgSUyFA25V1qwv2nYxP%2BzmcKema2fyYpNfdnlpvbkAg%2FcC%2F8giulOPCM4kby5P1u4Xw%2BMXRViAaA4jxtFszXxwSK6MFddUGNo8%2B4dXnFm2NnJ%3A1631797886961; playerid=74269792' }
第二个解密
js = open('demo.js', 'r', encoding='utf-8').read() ctx = execjs.compile(js) # 调用js里面的函数 (第一个是函数名, 参数) result = ctx.call('main', input('请输入你想要下载的歌曲名称/歌手名称:')) data = { 'params': result['encText'], 'encSecKey': result['encSecKey'] } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36', # 用户信息标识 'cookie': '_iuqxldmzr_=32; _ntes_nnid=ebf3d77c667fa506d26f9ddb18340c1a,1631792204854; _ntes_nuid=ebf3d77c667fa506d26f9ddb18340c1a; NMTID=00OR9oja7jRdVKsdkWjsSkzfpe6kDoAAAF77mUMdg; WNMCID=xzerkz.1631792205013.01.0; WEVNSM=1.0.0; WM_NI=vBQ2%2Bw17HytDYS1GI3g7DkT7VTr%2B4EXODB%2F33ynkt8Lb7U7Q8x8H6wR%2BOmpKo2yVbM5u9jad1RshFBs0VYnhDdMY6YnieHA%2FXZUX9B0lfGL7deJ0g5xFtC36hy1cLdVJUGs%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee82b766aa92b998f05a92b88ab6c84a869f8a84f43af5edfbb0d769989aa7b0c22af0fea7c3b92ab69e9fd3aa7df3e9889af7509b8dffa8cc39a5e98c8cd73d968cbeccb74f9c918ca3ae39f1a7a28cf73490bef9b6b65bfca8f8a9b57d92f5818ef341adeeb7b7e147f7abf78be470f38ebd89c54ab1f09abaed6488bc83bab553aeb7aba9c4259c869bb9bc4af4b4aaa8cf3da8bb86d1e63fbc9abca4b23eaca7ff84cf7096969ba8f237e2a3; WM_TID=GAgBwBSskVpAURVAEAZ73v38OSZWFyaG; __csrf=f3cb47b2b44afb5c7d5b267cc60766cb; MUSIC_U=ef63db2826f2c58d750719abd63a4be1b03d88b788d670ba6fcb33633b202b19993166e004087dd3d78b6050a17a35e705925a4e6992f61dfe3f0151024f9e31; ntes_kaola_ad=1; JSESSIONID-WYYY=RkN1BuKQuGb4%5Csy%5CWRrXIDVpjajXMbdGrsnKDqT%5C9vS2wSxduz%2BcCS4ezpW0%5CCjs3zUBgSUyFA25V1qwv2nYxP%2BzmcKema2fyYpNfdnlpvbkAg%2FcC%2F8giulOPCM4kby5P1u4Xw%2BMXRViAaA4jxtFszXxwSK6MFddUGNo8%2B4dXnFm2NnJ%3A1631797886961; playerid=74269792' } url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token='
提取数据
songs = requests.post(url, data=data, headers=headers).json()['result']['songs'] for song in songs: song_name = song['name'] # 歌曲名称 song_id = song['id'] # 歌曲id singer_name = song['ar'][0]['name'] # 歌手名称
拿到每一首歌的url地址
mp3_url = requests.post(url, data=data, headers=headers).json()['data'][0]['url']
依次请求 歌曲 url
# 音频 图片 视频 二进制数据 mp3_data = requests.get(mp3_url, headers=headers).content
保存数据
with open(f'mp3/{song_name}-{singer_name}.mp3', mode='wb') as f: f.write(mp3_data) print(f'{song_name}-{singer_name}爬取成功!!!')
完整代码
import requests # 发送网络请求 import execjs # 调用js代码的模块 js = open('demo.js', 'r', encoding='utf-8').read() ctx = execjs.compile(js) # 调用js里面的函数 (第一个是函数名, 参数) result = ctx.call('main', input('请输入你想要下载的歌曲名称/歌手名称:')) data = { 'params': result['encText'], 'encSecKey': result['encSecKey'] } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36', # 用户信息标识 'cookie': '_iuqxldmzr_=32; _ntes_nnid=ebf3d77c667fa506d26f9ddb18340c1a,1631792204854; _ntes_nuid=ebf3d77c667fa506d26f9ddb18340c1a; NMTID=00OR9oja7jRdVKsdkWjsSkzfpe6kDoAAAF77mUMdg; WNMCID=xzerkz.1631792205013.01.0; WEVNSM=1.0.0; WM_NI=vBQ2%2Bw17HytDYS1GI3g7DkT7VTr%2B4EXODB%2F33ynkt8Lb7U7Q8x8H6wR%2BOmpKo2yVbM5u9jad1RshFBs0VYnhDdMY6YnieHA%2FXZUX9B0lfGL7deJ0g5xFtC36hy1cLdVJUGs%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee82b766aa92b998f05a92b88ab6c84a869f8a84f43af5edfbb0d769989aa7b0c22af0fea7c3b92ab69e9fd3aa7df3e9889af7509b8dffa8cc39a5e98c8cd73d968cbeccb74f9c918ca3ae39f1a7a28cf73490bef9b6b65bfca8f8a9b57d92f5818ef341adeeb7b7e147f7abf78be470f38ebd89c54ab1f09abaed6488bc83bab553aeb7aba9c4259c869bb9bc4af4b4aaa8cf3da8bb86d1e63fbc9abca4b23eaca7ff84cf7096969ba8f237e2a3; WM_TID=GAgBwBSskVpAURVAEAZ73v38OSZWFyaG; __csrf=f3cb47b2b44afb5c7d5b267cc60766cb; MUSIC_U=ef63db2826f2c58d750719abd63a4be1b03d88b788d670ba6fcb33633b202b19993166e004087dd3d78b6050a17a35e705925a4e6992f61dfe3f0151024f9e31; ntes_kaola_ad=1; JSESSIONID-WYYY=RkN1BuKQuGb4%5Csy%5CWRrXIDVpjajXMbdGrsnKDqT%5C9vS2wSxduz%2BcCS4ezpW0%5CCjs3zUBgSUyFA25V1qwv2nYxP%2BzmcKema2fyYpNfdnlpvbkAg%2FcC%2F8giulOPCM4kby5P1u4Xw%2BMXRViAaA4jxtFszXxwSK6MFddUGNo8%2B4dXnFm2NnJ%3A1631797886961; playerid=74269792' } url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token=' songs = requests.post(url, data=data, headers=headers).json()['result']['songs'] for song in songs: song_name = song['name'] # 歌曲名称 song_id = song['id'] # 歌曲id singer_name = song['ar'][0]['name'] # 歌手名称 url = 'https://music.163.com/weapi/song/enhance/player/url/v1?csrf_token=f3cb47b2b44afb5c7d5b267cc60766cb' js = open('demo.js', 'r', encoding='utf-8').read() ctx = execjs.compile(js) # 调用js里面的函数 (第一个是函数名, 参数) result = ctx.call('start', song_id) data = { 'params': result['encText'], 'encSecKey': result['encSecKey'] } headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36', # 用户信息标识 'cookie': '_iuqxldmzr_=32; _ntes_nnid=ebf3d77c667fa506d26f9ddb18340c1a,1631792204854; _ntes_nuid=ebf3d77c667fa506d26f9ddb18340c1a; NMTID=00OR9oja7jRdVKsdkWjsSkzfpe6kDoAAAF77mUMdg; WNMCID=xzerkz.1631792205013.01.0; WEVNSM=1.0.0; WM_NI=vBQ2%2Bw17HytDYS1GI3g7DkT7VTr%2B4EXODB%2F33ynkt8Lb7U7Q8x8H6wR%2BOmpKo2yVbM5u9jad1RshFBs0VYnhDdMY6YnieHA%2FXZUX9B0lfGL7deJ0g5xFtC36hy1cLdVJUGs%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee82b766aa92b998f05a92b88ab6c84a869f8a84f43af5edfbb0d769989aa7b0c22af0fea7c3b92ab69e9fd3aa7df3e9889af7509b8dffa8cc39a5e98c8cd73d968cbeccb74f9c918ca3ae39f1a7a28cf73490bef9b6b65bfca8f8a9b57d92f5818ef341adeeb7b7e147f7abf78be470f38ebd89c54ab1f09abaed6488bc83bab553aeb7aba9c4259c869bb9bc4af4b4aaa8cf3da8bb86d1e63fbc9abca4b23eaca7ff84cf7096969ba8f237e2a3; WM_TID=GAgBwBSskVpAURVAEAZ73v38OSZWFyaG; __csrf=f3cb47b2b44afb5c7d5b267cc60766cb; MUSIC_U=ef63db2826f2c58d750719abd63a4be1b03d88b788d670ba6fcb33633b202b19993166e004087dd3d78b6050a17a35e705925a4e6992f61dfe3f0151024f9e31; ntes_kaola_ad=1; JSESSIONID-WYYY=RkN1BuKQuGb4%5Csy%5CWRrXIDVpjajXMbdGrsnKDqT%5C9vS2wSxduz%2BcCS4ezpW0%5CCjs3zUBgSUyFA25V1qwv2nYxP%2BzmcKema2fyYpNfdnlpvbkAg%2FcC%2F8giulOPCM4kby5P1u4Xw%2BMXRViAaA4jxtFszXxwSK6MFddUGNo8%2B4dXnFm2NnJ%3A1631797886961; playerid=74269792' } # 如果你买了这首歌 有这首歌的权限 那么就可以下载 # 拿到每一首歌的url地址 mp3_url = requests.post(url, data=data, headers=headers).json()['data'][0]['url'] # 依次请求 歌曲 url # .content: 拿到网页当中的 二进制数据 # 音频 图片 视频 二进制数据 mp3_data = requests.get(mp3_url, headers=headers).content # 保存 with open(f'mp3/{song_name}-{singer_name}.mp3', mode='wb') as f: f.write(mp3_data) print(f'{song_name}-{singer_name}爬取成功!!!')