有道翻译和百度翻译在线爬取

import requests
import time
import random
from hashlib import md5


def get_salt_sign_ts(word):
    ts = str(int(time.time() * 1000))
    salt = ts + str(random.randint(0, 9))
    string = "fanyideskweb" + word + salt + "n%A-rKaT5fb[Gy?;N5@Tj"
    s = md5()
    s.update(string.encode())
    sign = s.hexdigest()

    return salt, ts, sign


def attack_yd(word):
    salt, ts, sign = get_salt_sign_ts(word)
    url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
    headers = {
        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': 'OUTFOX_SEARCH_USER_ID=1966607151@10.169.0.83; OUTFOX_SEARCH_USER_ID_NCOO=250069037.7227244; JSESSIONID=aaakkyCArmplF4qJhJHWw; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abc9CfcjVp7bS6v1XUIWw; ___rl__test__cookies=1563952124524',
        'Host': 'fanyi.youdao.com',
        'Referer': 'http://fanyi.youdao.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
    }
    data = {
        'i': word,
        'from': 'AUTO',
        'to': 'AUTO',
        'smartresult': 'dict',
        'client': 'fanyideskweb',
        'salt': salt,
        'sign': sign,
        'ts': ts,
        # 'bv': '6cf12640614e68ba598ee58ceccb0605',
        # 'doctype': 'json',
        # 'version': '2.1',
        'keyfrom': 'fanyi.web',
        'action': 'FY_BY_REALTlME',
    }
    html_json = requests.post(url, data=data, headers=headers).json()
    return html_json['translateResult'][0][0]


if __name__ == '__main__':
    word = input('请输入要翻译的单词:')
    result = attack_yd(word)
    print(result)

请输入要翻译的单词:你好
{'tgt': 'hello', 'src': '你好'}

 

import requests
import re
import execjs


class BaiduTranslateSpider:
    def __init__(self):
        self.get_url = 'https://fanyi.baidu.com/?aldtype=16047'
        self.headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'cookie': 'BAIDUID=68E904D92F2C8ACC62C7542C397FBD0B:FG=1; PSTM=1561529234; BIDUPSID=2633580F87BCDFE102C31514DA3EACA6; BDUSS=HdWTDhTajh0ZTd3QmFIbzZjeDdhTTE5Wkd0R1FGcFFSaDFJVVRRSHN3ZjNnbGRkSVFBQUFBJCQAAAAAAAAAAAEAAADE7I5C06LTwrDUxvjLq9fTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAPf1L1339S9dM; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; delPer=0; PSINO=3; H_PS_PSSID=29546_1466_21083_29578_29519_28518_29099_29568_28835_29221_29460_22157; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1563869496,1563869600,1563953532,1564019873; to_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; from_lang_often=%5B%7B%22value%22%3A%22zh%22%2C%22text%22%3A%22%u4E2D%u6587%22%7D%2C%7B%22value%22%3A%22en%22%2C%22text%22%3A%22%u82F1%u8BED%22%7D%5D; yjs_js_security_passport=47e142eebb082b8c92ef506657211ad704b97215_1564026801_js; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1564026808',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
        }

    # 获取token
    def get_token(self):
        html = requests.get(url=self.get_url, headers=self.headers).text
        # 用正则解析
        pattern = re.compile(r"token: '(.*?)'", re.S)
        token = pattern.findall(html)
        print(token)

    # 获取sign
    def get_sign(self, word):
        with open('./node.js', 'r') as f:
            js_data = f.read()
        execjs_obj = execjs.compile(js_data)
        sign = execjs_obj.eval('e("{}")'.format(word))

        return sign

    # 获取翻译结果
    def get_result(self, word, fro, to):
        token = self.get_token()
        sign = self.get_sign(word)
        # 把formdata定义成字典
        formdata = {
            'from': fro,
            'to': to,
            'query': word,
            'transtype': 'realtime',
            'simple_means_flag': '3',
            'sign': sign,
            'token': token
        }
        html_json = requests.post(url='https://fanyi.baidu.com/v2transapi', data=formdata, headers=self.headers).json()
        return html_json['trans_result']['data'][0]['dst']


if __name__ == '__main__':
    spider = BaiduTranslateSpider()
    num = input('1.翻译英语,2.翻译汉语,请选择(1/2):')
    if num == '1':
        fro = 'zh'
        to = 'en'
    else:
        fro = 'en'
        to = 'zh'
    word = input('请输入要翻译的单词:')
    result = spider.get_result(word, 'zh', 'en')
    print(result)

 

posted @ 2019-07-25 10:42  一如年少模样  阅读(885)  评论(0编辑  收藏  举报