有道翻译和百度翻译在线爬取
import requests import time import random from hashlib import md5 def get_salt_sign_ts(word): ts = str(int(time.time() * 1000)) salt = ts + str(random.randint(0, 9)) string = "fanyideskweb" + word + salt + "n%A-rKaT5fb[Gy?;N5@Tj" s = md5() s.update(string.encode()) sign = s.hexdigest() return salt, ts, sign def attack_yd(word): salt, ts, sign = get_salt_sign_ts(word) url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' headers = { 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Connection': 'keep-alive', 'Cookie': 'OUTFOX_SEARCH_USER_ID=1966607151@10.169.0.83; OUTFOX_SEARCH_USER_ID_NCOO=250069037.7227244; JSESSIONID=aaakkyCArmplF4qJhJHWw; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abc9CfcjVp7bS6v1XUIWw; ___rl__test__cookies=1563952124524', 'Host': 'fanyi.youdao.com', 'Referer': 'http://fanyi.youdao.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', } data = { 'i': word, 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', 'salt': salt, 'sign': sign, 'ts': ts, # 'bv': '6cf12640614e68ba598ee58ceccb0605', # 'doctype': 'json', # 'version': '2.1', 'keyfrom': 'fanyi.web', 'action': 'FY_BY_REALTlME', } html_json = requests.post(url, data=data, headers=headers).json() return html_json['translateResult'][0][0] if __name__ == '__main__': word = input('请输入要翻译的单词:') result = attack_yd(word) print(result) 请输入要翻译的单词:你好 {'tgt': 'hello', 'src': '你好'}
import requests import re import execjs class BaiduTranslateSpider: def __init__(self): self.get_url = 'https://fanyi.baidu.com/?aldtype=16047' self.headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'cookie': 'BAIDUID=68E904D92F2C8ACC62C7542C397FBD0B:FG=1; PSTM=1561529234; BIDUPSID=2633580F87BCDFE102C31514DA3EACA6; BDUSS=HdWTDhTajh0ZTd3QmFIbzZjeDdhTTE5Wkd0R1FGcFFSaDFJVVRRSHN3ZjNnbGRkSVFBQUFBJCQAAAAAAAAAAAEAAADE7I5C06LTwrDUxvjLq9fTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPf1L1339S9dM; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=3; H_PS_PSSID=29546_1466_21083_29578_29519_28518_29099_29568_28835_29221_29460_22157; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1563869496,1563869600,1563953532,1564019873; to_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; yjs_js_security_passport=47e142eebb082b8c92ef506657211ad704b97215_1564026801_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1564026808', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36', } # 获取token def get_token(self): html = requests.get(url=self.get_url, headers=self.headers).text # 用正则解析 pattern = re.compile(r"token: '(.*?)'", re.S) token = pattern.findall(html) print(token) # 获取sign def get_sign(self, word): with open('./node.js', 'r') as f: js_data = f.read() execjs_obj = execjs.compile(js_data) sign = execjs_obj.eval('e("{}")'.format(word)) return sign # 获取翻译结果 def get_result(self, word, fro, to): token = self.get_token() sign = self.get_sign(word) # 把formdata定义成字典 formdata = { 'from': fro, 'to': to, 'query': word, 'transtype': 'realtime', 'simple_means_flag': '3', 'sign': sign, 'token': token } html_json = requests.post(url='https://fanyi.baidu.com/v2transapi', data=formdata, headers=self.headers).json() return html_json['trans_result']['data'][0]['dst'] if __name__ == '__main__': spider = BaiduTranslateSpider() num = input('1.翻译英语,2.翻译汉语,请选择(1/2):') if num == '1': fro = 'zh' to = 'en' else: fro = 'en' to = 'zh' word = input('请输入要翻译的单词:') result = spider.get_result(word, 'zh', 'en') print(result)