python爬虫案例--------爬取youdao在线翻译内容

python爬虫案例--------爬取youdao在线翻译内容

 

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/3/4 20:45
# @Author  : hyang
# @Site    : 
# @File    : scrapy_youdao.py
# @Software: PyCharm
import json
import hashlib as hasher
import requests
import random
import time
import ssl
import urllib3
# 解决某些环境下报<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
ssl._create_default_https_context = ssl._create_unverified_context
urllib3.disable_warnings() # 关闭警告
start_url = 'http://fanyi.youdao.com/'
post_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'

headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36",
        "Referer": "http://fanyi.youdao.com/"
}

# 得到js加密串
def get_JSKey(r_word):
    salt = int(time.time()* 1000) + random.randint(0,9)
    md = hasher.md5()
    md5_str = ("fanyideskweb" + r_word + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!").encode('utf-8')
    md.update(md5_str);
    sign = md.hexdigest()
    return {"salt":salt,"sign":sign}

def get_cookies(url):
    return  requests.get(url).cookies

def get_content(r_word,url,cookies,js_key):
    post_data = {
        "i": r_word,
        "from": "AUTO",
        "to": "AUTO",
        "smartresult": "dict",
        "client": "fanyideskweb",
        "salt": js_key["salt"],  # salt
        "sign":js_key["sign"],  # sign
        "doctype": "json",
        "version": "2.1",
        "keyfrom": "fanyi.web",
        "action": "FY_BY_REALTIME",
        "typoResult": "false"
    }
    response = requests.post(url, headers=headers, data=post_data, cookies=cookies)
    json_str = response.json()
    print(json_str)


if __name__ == '__main__':
    #js 加密文件fanyi.min.js
    '''
     t.asyRequest = function(e) {
        var t = e.i,
        i = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)),
        o = n.md5("fanyideskweb" + t + i + "ebSeFb%=XZ%T[KZ)c(sy!");
        r && r.abort(),
        r = n.ajax({
            type: "POST",
            contentType: "application/x-www-form-urlencoded; charset=UTF-8",
            url: "/bbk/translate_m.do",
            data: {
                i: e.i,
                client: "fanyideskweb",
                salt: i,
                sign: o,
                tgt: e.tgt,
                from: e.from,
                to: e.to,
                doctype: "json",
                version: "3.0",
                cache: !0
            },
    '''
    r_word = input("please input the word you want to translate : ")
    cookies = get_cookies(start_url)  # 得到cookies
    print('cookies=>', cookies)
    js_key = get_JSKey(r_word)
    print("js_key=>",js_key)
    get_content(r_word,post_url,cookies,js_key)  # 得到请求内容后返回的json

 

posted @ 2018-03-04 23:13  一只小小的寄居蟹  阅读(999)  评论(0编辑  收藏  举报