POST/有道翻译 有bug

1.发现在翻译时地址没有变,那是POST请求。

2.通过fidder抓包工具抓取url

3.对data分析,发现每次salt和sign都在变化。

4.查看源码,先用站长工具http://tool.chinaz.com/Tools/jsformat.aspx格式化,用sublines打开搜索到关键字salt和sign对其分析

5.代码如下,还有bug

# -*- coding:utf-8 -*-

import urllib
import urllib2
import time
from lxml import etree
import random
import hashlib

# 通过抓包的方式获取的url,并不是浏览器上显示的url
url="http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule&sessionFrom="
# 完整的headers
headers = {
        "Accept" : "application/json, text/javascript, */*; q=0.01",
        "X-Requested-With" : "XMLHttpRequest",
        "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
    }

# 用户接口输入
key = raw_input("请输入需要翻译的文字:")

# 发送到web服务器的表单数据
u = 'fanyideskweb'
d = key
f = str(int(time.time()*1000) + random.randint(1,10))
c = 'rY0D^0\'nM0}g5Mm1z%1G4'

formdata = {
"from":"AUTO",
"to":"AUTO",
"i" : key,
"smartresult":"dict",
"client":"fanyideskweb",
"salt":f,
"sign":hashlib.md5((u + d + f + c).encode('utf-8')).hexdigest()
,
"doctype":"json",
"version":"2.1", 
"keyfrom":"fanyi.web",
"action":"FY_BY_CLICKBUTTION",
"typoResult":"true" 
}

# 经过urlencode转码
data = urllib.urlencode(formdata)

# 如果Request()方法里的data参数有值,那么这个请求就是POST
# 如果没有,就是Get
request = urllib2.Request(url, data = data, headers = headers)

content = urllib2.urlopen(request).read()
# 把字符串转为html格式,xpath只能解析html格式
html = etree.HTML(content)
answer = html.xpath("//div/p/....")
print answer

 5.解决了一部分bug,因为返回来的数据是json数据不能有xpath解析

# -*- coding:utf-8 -*-

import urllib
import urllib2
import time
from lxml import etree
import random
import hashlib
import json

#. 通过抓包的方式获取的url,并不是浏览器上显示的url
url="http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null"
# 完整的headers
headers = {
        "Accept" : "application/json, text/javascript, */*; q=0.01",
        "X-Requested-With" : "XMLHttpRequest",
        "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
        "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
    }

# 用户接口输入
key = raw_input("请输入需要翻译的文字:")

# 发送到web服务器的表单数据
u = 'fanyideskweb'
d = key
f = str(int(time.time()*1000) + random.randint(1,10))
c = 'rY0D^0\'nM0}g5Mm1z%1G4'
g = key.decode("utf-8")
formdata = {
    "from":"AUTO",
    "to":"AUTO",
    "i" : key,
    "smartresult":"josn",
    "client":"fanyideskweb",
    "salt":f,
    "sign":hashlib.md5((u + g + f + c).encode("utf-8")).hexdigest()
    ,
    "doctype":"json",
    "version":"2.1", 
    "keyfrom":"fanyi.web",
    "action":"FY_BY_CLICKBUTTION",
    "typoResult":"true" 
    }

# 经过urlencode转码
data = urllib.urlencode(formdata)

# 如果Request()方法里的data参数有值,那么这个请求就是POST
# 如果没有,就是Get
request = urllib2.Request(url, data = data, headers = headers)
# 返回的是json文件
content = urllib2.urlopen(request).read().decode("utf-8")
print content
# 只能有json解析
target = json.loads(content)
print(target["translateResult"][0][0]["tgt"])

"""
print content
# 把字符串转为html格式,xpath只能解析html格式
html = etree.HTML(content)
answer = html.xpath("//div/div")[0].text
answer = answer.decode("utf-8").encode("gbk")
print answer
"""

bug不能翻译中文。。

posted @ 2017-10-11 21:32  cuzz_z  阅读(621)  评论(0编辑  收藏  举报