Python爬虫-破解有道词典(破解MD5的JS加密算法)
破解有道词典
1.进行普通爬取尝试:
1 ''' 2 破解有道词典 3 V1 4 ''' 5 6 from urllib import request, parse 7 8 9 def youdao(key): 10 11 url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule" 12 13 data = { 14 "i": "boy", 15 "from":"AUTO", 16 "to": "AUTO", 17 "smartresult": "dict", 18 "client": "fanyideskweb", 19 "salt": "1523100789519", 20 "sign": "b8a55a436686cd89873fa46514ccedbe", 21 "doctype": "json", 22 "version": "2.1", 23 "keyfrom": "fanyi.web", 24 "action":"FY_BY_REALTIME", 25 "typoResult": "false" 26 } 27 28 # 参数data需要是bytes格式 29 data = parse.urlencode(data).encode() 30 31 headers = { 32 "Accept": "application/json,text/javascript,*/*;q=0.01", 33 #"Accept-Encoding": "gzip,deflate", 34 "Accept-Language": "zh-CN,zh;q=0.9", 35 "Connection": "keep-alive", 36 "Content-Length": "200", 37 "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", 38 "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517", 39 "Host": "fanyi.youdao.com", 40 "Origin": "http://fanyi.youdao.com", 41 "Referer": "http://fanyi.youdao.com/", 42 "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest" 43 } 44 45 req = request.Request(url=url, data=data, headers=headers) 46 47 rsp = request.urlopen(req) 48 49 html = rsp.read().decode() 50 print(html) 51 52 if __name__ == '__main__': 53 youdao("boy")
2.破解有道词典的JS-,MD5-加密算法进行爬取数据(处理JS加密代码)
1 ''' 2 V2 3 处理js加密代码 4 ''' 5 6 ''' 7 通过查找,能找到js代码中操作代码 8 9 1. 这个是计算salt的公式 r = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)); 10 2. sign: n.md5("fanyideskweb" + t + r + "ebSeFb%=XZ%T[KZ)c(sy!"); 11 md5一共需要四个参数,第一个和第四个都是固定值的字符串,第三个是所谓的salt,第二个是。。。。。 12 第二个参数就是输入的要查找的单词 13 14 ''' 15 16 17 def getSalt(): 18 ''' 19 salt公式是: "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)); 20 把他翻译成python代码 21 :return: 22 ''' 23 import time, random 24 25 salt = int(time.time()*1000) + random.randint(0,10) 26 27 return salt 28 29 def getMD5(v): 30 import hashlib 31 md5 = hashlib.md5() 32 33 # update需要一共bytes格式的参数 34 md5.update(v.encode("utf-8")) 35 36 sign = md5.hexdigest() 37 38 return sign 39 40 41 def getSign(key, salt): 42 43 sign = 'fanyideskweb'+ key + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!" 44 sign = getMD5(sign) 45 46 return sign 47 48 from urllib import request, parse 49 50 51 def youdao(key): 52 53 url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule" 54 55 salt = getSalt() 56 57 data = { 58 "i": key, 59 "from":"AUTO", 60 "to": "AUTO", 61 "smartresult": "dict", 62 "client": "fanyideskweb", 63 "salt": str(salt) , 64 "sign": getSign(key, salt), 65 "doctype": "json", 66 "version": "2.1", 67 "keyfrom": "fanyi.web", 68 "action":"FY_BY_REALTIME", 69 "typoResult": "false" 70 } 71 72 print(data) 73 74 # 参数data需要是bytes格式 75 data = parse.urlencode(data).encode() 76 77 headers = { 78 "Accept": "application/json,text/javascript,*/*;q=0.01", 79 #"Accept-Encoding": "gzip,deflate", 80 "Accept-Language": "zh-CN,zh;q=0.9", 81 "Connection": "keep-alive", 82 "Content-Length": len(data), 83 "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", 84 "Cookie": "OUTFOX_SEARCH_USER_ID=-1548144101@10.168.8.76;JSESSIONID=aaaTLWzfvp5Hfg9mAhFkw;OUTFOX_SEARCH_USER_ID_NCOO=1999296830.4784973;___rl__test__cookies=1523100789517", 85 "Host": "fanyi.youdao.com", 86 "Origin": "http://fanyi.youdao.com", 87 "Referer": "http://fanyi.youdao.com/", 88 "User-Agent": "Mozilla/5.0( X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36 X-Requested-With: XMLHttpRequest" 89 } 90 91 req = request.Request(url=url, data=data, headers=headers) 92 93 rsp = request.urlopen(req) 94 95 html = rsp.read().decode() 96 print(html) 97 98 if __name__ == '__main__': 99 youdao("boy")
=========================
==================================
==================================
======================================
==========================================
结果示例:
JS代码格式化工具:
http://tool.oschina.net/codeformat/js