(一)JS逆向——爬取有道翻译
分析发送请求的过程
该动态请求返回的数据是响应数据,想要的结果就在其中,但明显是加密过的,所以下一步要模拟发送请求,并找出是如何加密的。
看一下请求的载荷,多发几次请求,发现只有这两个参数是变化的,所以下一步要找到这两个参数是如何构建的,才能模拟发出请求。
在源代码中搜索,找到发送请求的位置,搜索内容可以是请求的路径,如https://dict.youdao.com/webtranslate 中的webtranslate ,也可以是JSON.stringify,一般请求的响应都伴随着序列化操作。
总之,定位到发送请求的位置。
然后通过打断点的方式,发现sign值是在k(t)函数中生成
进入k(t)函数,就能明确知道sign值是如何构建的,mysticTime值明显为时间戳,因此就能使用python程序模拟发出请求
timestamp = get_timestamp() d="fanyideskweb" e=str(timestamp) u="webfanyi" t="fsdsogkndfokasodnaso" target = f"client={d}&mysticTime={e}&product={u}&key={t}"
多尝试几次,就能发现只有e在变化,而e是时间戳,sign值就是对target这个字符串做了md5加密。
搞清楚的参数是怎么构建的,剩下就是模拟发出请求,得到返回字符串值。
下一步就是搞清楚,数据是如何解密的。
通过调用堆栈,找到上一层的调用函数La
js发送请求获得响应后,一般是调用then函数的内容,从then函数中明显可以看到decodeData函数,这部分就是用于解密的
进入函数后,可以发现数据通过aes的cbc模式,以及base64解密,所以使用python模拟解密过程即可获得结果。
注意获得的结果字符串中,看着像是base64加密过的字符串,但其中含有"-_",所以是base64的变种,要经过base64标准化的过程,才能解密。
# base64标准化 方式一 # data = data.replace("-","+").replace("_","/") # base64标准化 方式二 encrpty_data = base64.b64decode(data.encode(),altchars="-_")
使用python逆向
import requests import time import base64 import hashlib from Crypto.Cipher import AES from Crypto.Util.Padding import pad,unpad session = requests.session() def gengerate_sign(target): md5 = hashlib.md5() md5.update(target.encode()) sign = md5.hexdigest() return sign def gengerate_md5(target): md5 = hashlib.md5() md5.update(target.encode()) sign = md5.digest() return sign def get_timestamp(): return int(time.time()*1000) url = "https://dict.youdao.com/webtranslate" headers = { "User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36", "Referer":"https://fanyi.youdao.com/", "Content-Type":"application/x-www-form-urlencoded", "Cookie":"OUTFOX_SEARCH_USER_ID=1908778685@58.213.147.125; OUTFOX_SEARCH_USER_ID_NCOO=1210970638.8867629; DICT_DOCTRANS_SESSION_ID=YzE5ZWVlZDUtZjAxNS00OGFjLWExNjktODRjODU0YjJjZWJh", "Accept":"application/json, text/plain, */*", "Accept-Encoding":"gzip, deflate, br, zstd", "Accept-Language":"zh-CN,zh;q=0.9", "Connection":"keep-alive", "Content-Length":"313", "Host":"dict.youdao.com", "Origin":"https://fanyi.youdao.com", "Sec-Ch-Ua":'"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"', "Sec-Ch-Ua-Mobile":"?1", "Sec-Ch-Ua-Platform":"Android", "Sec-Fetch-Dest":"empty", "Sec-Fetch-Mode":"cors", "Sec-Fetch-Site":"same-site", } # 生成sign值 timestamp = get_timestamp() d="fanyideskweb" e=str(timestamp) u="webfanyi" t="fsdsogkndfokasodnaso" target = f"client={d}&mysticTime={e}&product={u}&key={t}" sign = gengerate_sign(target) # 请求体 word="apple" data={ "i": word, "from": "auto", "to": "", "useTerm": "false", "dictResult": "true", "keyid": "webfanyi", "sign": sign, "client": "fanyideskweb", "product": "webfanyi", "appVersion": "1.0.0", "vendor": "web", "pointParam": "client,mysticTime,product", "mysticTime": timestamp, "keyfrom": "fanyi.web", "mid": "1", "screen": "1", "model": "1", "network": "wifi", "abtest": "0", "yduuid": "abcdefg", } # 发送请求获取响应 res=session.post(url=url, data=data, headers=headers) data = res.text # base64标准化 方式一 # data = data.replace("-","+").replace("_","/") # base64标准化 方式二 encrpty_data = base64.b64decode(data.encode(),altchars="-_") # aes解密 key = gengerate_md5("ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl") IV = gengerate_md5("ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4") aes = AES.new(key, AES.MODE_CBC,IV) data = aes.decrypt(encrpty_data) print(data.decode())
使用js逆向解密数据
有时候使用python程序模拟比较复杂,可以直接使用对方的js代码来解密,使用execjs来模拟js的执行(需要安装node.js)。
需要注意的是,函数中的有些对象并不能明确是什么。类似于alloc,createHash,createDecipheriv这些,一般为一些库的标准方法,自定义方法的概率较小,因此这种可以百度找到类似的标准函数进行替换。
const a = Buffer.alloc(16, T(t)) , n = Buffer.alloc(16, T(o)) , r = crypto.createDecipheriv("aes-128-cbc", a, n);
如alloc,一般为Buffer.alloc,createDecipheriv一般是crypto库的方法
import requests
import time
import base64
import hashlib
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad,unpad
from functools import partial # 这玩意儿能锁定一个函数的参数
import subprocess
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8") # 固定写法
import execjs
session = requests.session()
def gengerate_sign(target):
md5 = hashlib.md5()
md5.update(target.encode())
sign = md5.hexdigest()
return sign
def gengerate_md5(target):
md5 = hashlib.md5()
md5.update(target.encode())
sign = md5.digest()
return sign
def get_timestamp():
return int(time.time()*1000)
url = "https://dict.youdao.com/webtranslate"
headers = {
"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36",
"Referer":"https://fanyi.youdao.com/",
"Content-Type":"application/x-www-form-urlencoded",
"Cookie":"OUTFOX_SEARCH_USER_ID=1908778685@58.213.147.125; OUTFOX_SEARCH_USER_ID_NCOO=1210970638.8867629; DICT_DOCTRANS_SESSION_ID=YzE5ZWVlZDUtZjAxNS00OGFjLWExNjktODRjODU0YjJjZWJh",
"Accept":"application/json, text/plain, */*",
"Accept-Encoding":"gzip, deflate, br, zstd",
"Accept-Language":"zh-CN,zh;q=0.9",
"Connection":"keep-alive",
"Content-Length":"313",
"Host":"dict.youdao.com",
"Origin":"https://fanyi.youdao.com",
"Sec-Ch-Ua":'"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
"Sec-Ch-Ua-Mobile":"?1",
"Sec-Ch-Ua-Platform":"Android",
"Sec-Fetch-Dest":"empty",
"Sec-Fetch-Mode":"cors",
"Sec-Fetch-Site":"same-site",
}
# 生成sign值
timestamp = get_timestamp()
d="fanyideskweb"
e=str(timestamp)
u="webfanyi"
t="fsdsogkndfokasodnaso"
target = f"client={d}&mysticTime={e}&product={u}&key={t}"
sign = gengerate_sign(target)
# 请求体
word="apple"
data={
"i": word,
"from": "auto",
"to": "",
"useTerm": "false",
"dictResult": "true",
"keyid": "webfanyi",
"sign": sign,
"client": "fanyideskweb",
"product": "webfanyi",
"appVersion": "1.0.0",
"vendor": "web",
"pointParam": "client,mysticTime,product",
"mysticTime": timestamp,
"keyfrom": "fanyi.web",
"mid": "1",
"screen": "1",
"model": "1",
"network": "wifi",
"abtest": "0",
"yduuid": "abcdefg",
}
# 发送请求获取响应
res=session.post(url=url, data=data, headers=headers)
data = res.text
# 读取js代码
with open("youdao.js") as f:
jscode = f.read()
# js编译
js = execjs.compile(jscode)
# 调用js函数
ret = js.call("jiemi",data)
print(ret)
js代码
const crypto = require('crypto') function T(e) { return crypto.createHash("md5").update(e).digest() } function jiemi(e){ t="ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl" o="ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4" if (!e) return null; const a = Buffer.alloc(16, T(t)) , n = Buffer.alloc(16, T(o)) , r = crypto.createDecipheriv("aes-128-cbc", a, n); let l = r.update(e, "base64", "utf-8"); return l += r.final("utf-8"), l }