JS 逆向解析爬虫
js 的base64 加密转成Python
1 import requests 2 import execjs 3 import json 4 import base64 5 import time 6 7 url='http://webapi.cninfo.com.cn/api/sysapi/p_sysapi1007' 8 data={ 9 "tdate": "2022-04-13", 10 "market": "SZE" 11 } 12 headers={ 13 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36', 14 'mcode': base64.b64encode(str(int(time.time())).encode()).decode(), 15 'Referer': 'http://webapi.cninfo.com.cn/' 16 } 17 r = requests.post(url=url,headers=headers,data=json.dumps(data)) 18 print(r.text)
Python执行js
1 with open('spider_test.js','r',encoding='utf-8') as f: 2 result = f.read() 3 # print(result) 4 res = execjs.compile(result) 5 cc = res.call("xx")# xx 是js 的函数名
AES解密
1 import requests 2 from Crypto.Cipher import AES 3 import json 4 5 6 url = 'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/list?pg=3&pgsz=15&total=0' 7 headers = { 8 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.38' 9 } 10 response = requests.get(url, headers=headers).text 11 # print(response) # 加密数据 12 # response = '' 13 14 15 f = 'jo8j9wGw%6HbxfFn' # 秘钥 16 m = '0123456789ABCDEF' # 偏移值 17 18 # # 转码 utf-8? 字节 16进制 19 m = bytes(m, encoding='utf-8') 20 f = bytes(f, encoding='utf-8') 21 # print(m, f) 22 23 # # 创建一个AES算法 秘钥 模式 偏移值 24 cipher = AES.new(f, AES.MODE_CBC, m) 25 # # 解密 26 decrypt_content = cipher.decrypt(bytes.fromhex(response)) 27 result = str(decrypt_content, encoding='utf-8') 28 29 30 # PKCS7 填充 JavaScript 31 length = len(result) # 字符串长度 32 33 unpadding = ord(result[length - 1]) # 得到最后一个字符串的ASCII 34 result = result[0:length - unpadding] 35 36 result = json.loads(result)['data']['list'] # dupms json.dumps() dict 格式 json的 37 # # {"键":"值"} 38 39 for i in result: 40 print(i)