百度翻译接口破解

百度翻译网页地址

https://fanyi.baidu.com

代码

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import okhttp3.*;
import org.springframework.util.StringUtils;

import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class TransApi {
    private static final ObjectMapper objectMapper = new ObjectMapper();

    private static final OkHttpClient client = new OkHttpClient.Builder().build();

    /**
     * 令牌 window.common.token
     */
    private static final String TOKEN = "925160a8745a657e443b666ce5d3c383";

    /**
     * cookie
     */
    private static final String BAIDUID = "BAIDUID=FAE39815F9431170B8E230B87980567A:FG=1";

    /**
     * 种子签名 window.gtk
     */
    private static String seedSign = "320305.131321201";

    private static RequestBody getRequestBody(String query, String from, String to) throws Exception {
        String sign = Sign.getSign(query,seedSign);
        return new FormBody.Builder()
                .add("from", from)
                .add("to", to)
                .add("query", query)
                .add("transtype", "realtime")
                .add("simple_means_flag", "3")
                .add("sign", sign)
                .add("token", TOKEN)
                .add("domain", "common")
                .build();
    }

    private static Request.Builder getRequestBuilder() {
        return new Request.Builder()
                .header("Connection", "keep-alive")
                .header("sec-ch-ua", "\"Chromium\";v=\"94\", \"Google Chrome\";v=\"94\", \";Not A Brand\";v=\"99\"")
                .header("Accept", "*/*")
                .header("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
                .header("X-Requested-With", "XMLHttpRequest")
                .header("sec-ch-ua-mobile", "?0")
                .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36")
                .header("sec-ch-ua-platform", "\"Windows\"")
                .header("Origin", "https://fanyi.baidu.com")
                .header("Sec-Fetch-Site", "same-origin")
                .header("Sec-Fetch-Mode", "cors")
                .header("Sec-Fetch-Dest", "empty")
                .header("Referer", "https://fanyi.baidu.com/")
                .header("Accept-Language", "zh-CN,zh;q=0.9")
                .header("Cookie", BAIDUID);
    }

    /**
     * 英翻汉
     * @param query 原文
     * @return
     * @throws Exception
     */
    public static String enToZh(String query) throws Exception {
        return translate(query,"en","zh");
    }

    /**
     * 翻译,原文语种自动识别
     * @param query 原文
     * @param to 译文语种
     * @return
     * @throws Exception
     */
    public static String translate(String query,String to) throws Exception {
        String from = langDetect(query);
        return translate(query,from,to);
    }

    /**
     * 翻译
     * @param text 原文
     * @param from 原文语种
     * @param to 译文语种
     * @return
     * @throws Exception
     */
    public static String translate(String text,String from,String to) throws Exception {
        StringBuilder dst = new StringBuilder();
        StringBuilder part = new StringBuilder();
        if (text != null && text.length() > 0) {
            List<String> splits = Arrays.asList(text.split("\\.|。")).stream().filter(StringUtils::hasLength).collect(Collectors.toList());
            for (int i = 0; i < splits.size(); i++) {
                part.append(splits.get(i)).append(".");
                if (i == splits.size() - 1) {
                    dst.append(translatePart(part.toString(),from,to));
                } else if (part.length() > 8000) {
                    dst.append(translatePart(part.toString(),from,to));
                    part = new StringBuilder();
                }
            }
        }
        return dst.toString();
    }

	/**
     * 翻译片段
     * @param part 原文片段,小于8000字符
     * @param from 原文语种
     * @param to 译文语种
     * @return
     * @throws Exception
     */
    private static String translatePart(String query,String from,String to) throws Exception {
        RequestBody requestBody = getRequestBody(query,from,to);
        Request request = getRequestBuilder()
                .url("https://fanyi.baidu.com/v2transapi?from="+from+"&to="+to)
                .post(requestBody).build();
        Response response = client.newCall(request).execute();
        String result = response.body().string();
        JsonNode jsonNode = objectMapper.readTree(result);
        String res = jsonNode.at("/trans_result/data/0/dst").asText();
        return res;
    }

    /**
     *  检测语种
     * @param query
     * @return
     * @throws IOException
     */
    public static String langDetect(String query) throws IOException {
        FormBody formBody = new FormBody.Builder()
                .add("query", query).build();
        Request request = getRequestBuilder()
                .url("https://fanyi.baidu.com/langdetect")
                .post(formBody).build();
        Response response = client.newCall(request).execute();
        String result = response.body().string();
        JsonNode jsonNode = objectMapper.readTree(result);
        String res = jsonNode.at("/lan").asText();
        return res;
    }

    static class Sign {

        static ScriptEngine engine = new ScriptEngineManager().getEngineByName("javascript");

        /**
         * 调用js方法生成签名
         * @param query 原文
         * @param gtk 种子签名
         * @return 签名
         * @throws Exception
         */
        static String getSign(String query, String gtk) throws ScriptException, NoSuchMethodException {
            String func = "function getSign(query) {\n" +
                    "    function a(r, o) {\n" +
                    "        for (var t = 0; t < o.length - 2; t += 3) {\n" +
                    "            var a = o.charAt(t + 2);\n" +
                    "            a = a >= 'a' ? a.charCodeAt(0) - 87 : Number(a), a = '+' === o.charAt(t + 1) ? r >>> a : r << a, r = '+' === o.charAt(t) ? r + a & 4294967295 : r ^ a\n" +
                    "        }\n" +
                    "        return r\n" +
                    "    }\n" +
                    "\n" +
                    "    var C = null;\n" +
                    "    var hash = function (r, _gtk) {\n" +
                    "        var o = r.length;\n" +
                    "        o > 30 && (r = '' + r.substr(0, 10) + r.substr(Math.floor(o / 2) - 5, 10) + r.substr(-10, 10));\n" +
                    "        var t = void 0, t = null !== C ? C : (C = _gtk || '') || '';\n" +
                    "        for (var e = t.split('.'), h = Number(e[0]) || 0, i = Number(e[1]) || 0, d = [], f = 0, g = 0; g < r.length; g++) {\n" +
                    "            var m = r.charCodeAt(g);\n" +
                    "            128 > m ? d[f++] = m : (2048 > m ? d[f++] = m >> 6 | 192 : (55296 === (64512 & m) && g + 1 < r.length && 56320 === (64512 & r.charCodeAt(g + 1)) ? (m = 65536 + ((1023 & m) << 10) + (1023 & r.charCodeAt(++g)), d[f++] = m >> 18 | 240, d[f++] = m >> 12 & 63 | 128) : d[f++] = m >> 12 | 224, d[f++] = m >> 6 & 63 | 128), d[f++] = 63 & m | 128)\n" +
                    "        }\n" +
                    "        for (var S = h, u = '+-a^+6', l = '+-3^+b+-f', s = 0; s < d.length; s++) S += d[s], S = a(S, u);\n" +
                    "        return S = a(S, l), S ^= i, 0 > S && (S = (2147483647 & S) + 2147483648), S %= 1e6, S.toString() + '.' + (S ^ h)\n" +
                    "    }\n" +
                    "    return hash(query,'" + gtk + "')\n" +
                    "}\n";
            //执行js脚本定义函数
            engine.eval(func);
            Invocable invocable = (Invocable) engine;
            Object res = invocable.invokeFunction("getSign", query);
            return res.toString();
        }
    }

    public static void main(String[] args) throws Exception {
        String query = "hello";
        String result = enToZh(query);
        System.out.println(result);
        query = "こんにちは";
        result = translate(query,"zh");
        System.out.println(result);
    }
}

依赖

<dependencies>
    <!-- jackson,用于处理json -->
    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-databind</artifactId>
        <version>2.12.3</version>
    </dependency>
    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-core</artifactId>
        <version>2.12.3</version>
    </dependency>
    <!-- okhttp3,用于发送http请求 -->
    <dependency>
        <groupId>com.squareup.okhttp3</groupId>
        <artifactId>okhttp</artifactId>
        <version>4.9.1</version>
    </dependency>
</dependencies>

解释

  1. 以下是百度翻译接口的完整HTTP请求,格式是idea的http文件格式
POST https://fanyi.baidu.com/v2transapi?from=en&to=zh
Connection: keep-alive
sec-ch-ua: "Chromium";v="94", "Google Chrome";v="94", ";Not A Brand";v="99"
Accept: */*
X-Requested-With: XMLHttpRequest
sec-ch-ua-mobile: ?0
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36
sec-ch-ua-platform: "Windows"
Origin: https://fanyi.baidu.com
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: cors
Sec-Fetch-Dest: empty
Referer: https://fanyi.baidu.com/
Accept-Language: zh-CN,zh;q=0.9
Cookie: BAIDUID=D7029489CFEEAB420F853746FCA71824:FG=1
Content-Type: application/x-www-form-urlencoded; charset=UTF-8

from=en&to=zh&query=hello&transtype=realtime&simple_means_flag=3&sign=54706.276099&token=1bf7c3c20520a2203e4668c83073bfba&domain=common
  1. 在以上请求中有如下变量
  • cookie
  • from,原文的语种简称,英文是en
  • to,译文的语种简称,中文是zh
  • query,译文
  • sign,签名,原网页中是在js中生成,需要一个种子签名,可在页面通过window.gtk获取
  • token,令牌,原网页中的变量,可在页面通过window.common.token获取
  1. 代码中的Sign类就是网页中生成sign的js方法,利用java去执行
posted @ 2021-11-29 16:09  小小爬虫  阅读(1819)  评论(1编辑  收藏  举报