python爬虫案例--------爬取youdao在线翻译内容
python爬虫案例--------爬取youdao在线翻译内容
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/3/4 20:45 # @Author : hyang # @Site : # @File : scrapy_youdao.py # @Software: PyCharm import json import hashlib as hasher import requests import random import time import ssl import urllib3 # 解决某些环境下报<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed ssl._create_default_https_context = ssl._create_unverified_context urllib3.disable_warnings() # 关闭警告 start_url = 'http://fanyi.youdao.com/' post_url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' headers = { "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36", "Referer": "http://fanyi.youdao.com/" } # 得到js加密串 def get_JSKey(r_word): salt = int(time.time()* 1000) + random.randint(0,9) md = hasher.md5() md5_str = ("fanyideskweb" + r_word + str(salt) + "ebSeFb%=XZ%T[KZ)c(sy!").encode('utf-8') md.update(md5_str); sign = md.hexdigest() return {"salt":salt,"sign":sign} def get_cookies(url): return requests.get(url).cookies def get_content(r_word,url,cookies,js_key): post_data = { "i": r_word, "from": "AUTO", "to": "AUTO", "smartresult": "dict", "client": "fanyideskweb", "salt": js_key["salt"], # salt "sign":js_key["sign"], # sign "doctype": "json", "version": "2.1", "keyfrom": "fanyi.web", "action": "FY_BY_REALTIME", "typoResult": "false" } response = requests.post(url, headers=headers, data=post_data, cookies=cookies) json_str = response.json() print(json_str) if __name__ == '__main__': #js 加密文件fanyi.min.js ''' t.asyRequest = function(e) { var t = e.i, i = "" + ((new Date).getTime() + parseInt(10 * Math.random(), 10)), o = n.md5("fanyideskweb" + t + i + "ebSeFb%=XZ%T[KZ)c(sy!"); r && r.abort(), r = n.ajax({ type: "POST", contentType: "application/x-www-form-urlencoded; charset=UTF-8", url: "/bbk/translate_m.do", data: { i: e.i, client: "fanyideskweb", salt: i, sign: o, tgt: e.tgt, from: e.from, to: e.to, doctype: "json", version: "3.0", cache: !0 }, ''' r_word = input("please input the word you want to translate : ") cookies = get_cookies(start_url) # 得到cookies print('cookies=>', cookies) js_key = get_JSKey(r_word) print("js_key=>",js_key) get_content(r_word,post_url,cookies,js_key) # 得到请求内容后返回的json
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET制作智能桌面机器人:结合BotSharp智能体框架开发语音交互
· 软件产品开发中常见的10个问题及处理方法
· .NET 原生驾驭 AI 新基建实战系列:向量数据库的应用与畅想
· 从问题排查到源码分析:ActiveMQ消费端频繁日志刷屏的秘密
· 一次Java后端服务间歇性响应慢的问题排查记录
· 互联网不景气了那就玩玩嵌入式吧,用纯.NET开发并制作一个智能桌面机器人(四):结合BotSharp
· 一个基于 .NET 开源免费的异地组网和内网穿透工具
· 《HelloGitHub》第 108 期
· Windows桌面应用自动更新解决方案SharpUpdater5发布
· 我的家庭实验室服务器集群硬件清单