爬取有道翻译

'''
j---Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
jo--Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
job-Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule

三次的请求接口是一样的,由此推断
请求接口:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule

通过form表单
i:j   需要翻译的内容
from:AUTO  源语言
to:AUTO   目的语言  就是把哪一门语言翻译为哪一门语言
smartresult:dict
client:fanyideskweb
salt:1520339426459
sign:6edf4011d6b587550ef418fc9ba09b5e
doctype:json
version:2.1
keyfrom:fanyi.web
action:FY_BY_REALTIME
typoResult:false

i:jo
from:AUTO
to:AUTO
smartresult:dict
client:fanyideskweb
salt:1520339739042
sign:90d73568704068c16c27f32f4f99a8a5
doctype:json
version:2.1
keyfrom:fanyi.web
action:FY_BY_REALTIME
typoResult:false

要想在静态页面生成东西,可用js来进行
JS:将静态网页变成动态加载的过程
'''
from urllib import request,parse
import time,random
import hashlib #haslib md5加密的一个包
import json
def getMD5(value):
    #创建MD5对象
    md5 = hashlib.md5()
    #对指定的字符串进行加密
    md5.update(bytes(value,encoding='utf-8'))
    #拿出加密后的内容,并赋值给sign
    sign = md5.hexdigest()
    return sign

def fanyi(key):
    base_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'

    #生成salt
    #i = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)),
    i = int(time.time()*1000) + random.randint(0,10)

    #生成sign
    #o = n.md5("fanyideskweb" + t + i + "ebSeFb%=XZ%T[KZ)c(sy!");
    o_str = "fanyideskweb" + key + str(i) + "ebSeFb%=XZ%T[KZ)c(sy!"

    data = {
        "i": key,
        "from": "AUTO",
        "to": "AUTO",
        "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": i,
        "sign": getMD5(o_str),
        "doctype": "json",
        "version": "2.1",
        "keyfrom": "fanyi.web",
        "action": "FY_BY_REALTIME",
        "typoResult": "false"
    }
    #转码,拼接
    data = parse.urlencode(data)
    headers = {
        "Accept":"application/json, text/javascript, */*; q=0.01",
        #Accept-Encoding:gzip, deflate
        "Accept-Language":"zh-CN,zh;q=0.9",
        "Connection":"keep-alive",
        "Content-Length":len(data),
        "Content-Type":"application/x-www-form-urlencoded; charset=UTF-8",
        "Cookie":"OUTFOX_SEARCH_USER_ID_NCOO=1135910979.4269547; OUTFOX_SEARCH_USER_ID=-1364254390@10.168.1.8; fanyi-ad-id=40789; fanyi-ad-closed=1; JSESSIONID=aaafp6BJjIzwC4k7Mb5hw; ___rl__test__cookies=1520316265914",
        "Host":"fanyi.youdao.com",
        "Origin":"http://fanyi.youdao.com",
        "Referer":"http://fanyi.youdao.com/",
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
        "X-Requested-With":"XMLHttpRequest"
    }

    req = request.Request(base_url,bytes(data,encoding='utf-8'),headers=headers)
    response = request.urlopen(req)
    content = response.read()
    content = content.decode('utf-8')
    print(content)
    #数据处理
    json_data = json.loads(content)
    sr_dic = json_data['smartResult']
    item_list = sr_dic['entries']
    for i in item_list:
        if i == '':
            pass
        else:
            print(i.strip())

if __name__ == '__main__':
    while True:
        text = input('请输入翻译内容:')
        fanyi(text)
        if text == 'q':
            break

 

posted @ 2018-03-10 21:33  Bob__Zhang  阅读(1356)  评论(0编辑  收藏  举报