Python爬虫:从js逆向了解西瓜视频的下载链接的生成
文章内容在csdn上,链接为:Python爬虫:从js逆向了解西瓜视频的下载链接的生成
参考代码为:
import requests
from crawlers.userAgent import useragent
from lxml import etree
import json
import execjs
u = useragent()
url = input('输入链接:')
headers = {
'user-agent':u.getUserAgent(),
'cookie': '_ga=GA1.2.1050417450.1614331766; __gads=ID=219beb92a387f147-221a9bb312d00082:T=1642755398:RT=1642755398:S=ALNI_MbBZhXsNEhcjCou62SfFZv3EKypUg; dark=false; odin_tt=e889b4e035114ca274f380a0225a005df6ec13f085ec797a7ac0b6f9fc2dec97bf5328b4c87024a2285ce9add53034952cadf68eb306bf39eefad41902246acc; sid_guard=f6b9c19281ef9716eaa6f5c5a7734253%7C1666671058%7C5183999%7CSat%2C+24-Dec-2022+04%3A10%3A57+GMT; s_v_web_id=verify_lcd3hn9u_b2wJPaqV_gjbL_4hgh_8J3n_QWOnUCTNPFmS; MONITOR_WEB_ID=b4711595-bde2-460d-870d-4e57a1044873; ixigua-a-s=1; support_webp=true; support_avif=true; csrf_session_id=55aad65cfc16c343cca9cb5a7875c790; msToken=icA4BrE2TSaU9VlJYbvVUt3iEB_rko7GiAOsK-RZ0Hn12o5n9bmoZ8QxKgN8wt_o_-TA62249Dm3raTxK2ExmNY9_qexif3868muOvE3SZalaAYDuqCiuIOMqfNCO1o=; tt_scid=9acRPaBKm0oHEeEcBJWXPkZW07ARMqNfn9QvWpVm6WS8PA4ZXibRiKyWKWDNsT-sce27; ttwid=1%7C88hLt65tT2Y_6wV63zp5I3Bf28yx58wVdlSP3Tsr_44%7C1676202266%7Cf5aee47dfc3e0a1d4cdfbf761b6c65dd8fda07f8904927398cdb1e872acfb5e0; __ac_nonce=063e8d16500a52750f20b; __ac_signature=_02B4Z6wo00f01WElQMAAAIDA6m7bI5cpBo1hBURAADulPSpqsuE67pPP4Ky.j3XPBtoiYyaJVp0vqMdZqcQjXAMbwUWgSlEIlTjO0s5W8V7VtN6aoVcxqcVs6.a3Bvv9F8tL9tV5PK5iODa992; __ac_referer=https://www.ixigua.com/'
}
rsp = requests.get(url=url,headers=headers)
HTML = etree.HTML(rsp.text)
str_2 = HTML.xpath('//script[@id="SSR_HYDRATED_DATA"]/text()')[0]
print(str_2)
json_2 = str_2[str_2.find('{'):str_2.rfind('}')+1]
Irregulars = ['null','undefined','=false','=true','false','true']
# python中不规则的定义
for I in Irregulars:
if I in ['=false','=true']:
json_2 = json_2.replace(I,'='+I[1:].capitalize())
else:
json_2 = json_2.replace(I,'12')
dict_2 = json.loads(json_2)
print(dict_2)
with open(file='decode.js', mode='r', encoding='utf-8') as f:
js_2 = f.read()
ctx = execjs.compile(js_2)
videoResource = dict_2['anyVideo']['gidInformation']['packerData']['video']['videoResource']
for key in videoResource.keys():
if type(videoResource[key]) == dict:
print(key)
videoInfo = videoResource[key]
if 'dynamic_video' in videoInfo.keys():
videoList = videoInfo['dynamic_video']['dynamic_video_list']
for d in videoList:
definition = d['definition']
backup_url_1 = d['backup_url_1']
print(definition, ctx.call('base64decode', backup_url_1))
elif 'video_list' in videoInfo.keys():
videoObj = videoInfo['video_list']
for k in videoObj:
definition = videoObj[k]['definition']
backup_url_1 = videoObj[k]['backup_url_1']
print(definition, ctx.call('base64decode', backup_url_1))