python 爬虫1
简单访问有道词典的翻译界面,将页面翻译功能简单呈现
import urllib.request
import urllib.parse
import json
content = input("请输入需要翻译的内容:")
url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'
data = {}
data['type']='AUTO'
data['i']= content
data['doctype']='json'
data['xmlVersion']='1.8'
data['keyfrom']='fanyi.web'
data['ue']='UTF-8'
data['action']='FY_BY_ENTER'
data['typoResult']='true'
data= urllib.parse.urlencode(data).encode('utf-8')
response = urllib.request.urlopen(url,data)
html = response.read().decode('utf-8')
print (html)
target = json.loads(html)
print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))
结果如下:
请输入需要翻译的内容:长城
{"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"长城","tgt":"The Great Wall"}]],"smartResult":{"type":1,"entries":["","The Great Wall"]}}
翻译结果: The Great Wall
>>> ================================ RESTART ================================
>>>
请输入需要翻译的内容:English
{"type":"EN2ZH_CN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"English","tgt":"英语"}]],"smartResult":{"type":1,"entries":["","n. 英语;英国人;英文;英格兰人","adj. 英国人的;英国的;英文的","vt. 把\u2026译成英语"]}}
翻译结果: 英语
增加request header文件
有两种方法:
- Request的headers参数修改
- Request.add_header()方法修改
import urllib.request
import urllib.parse
import json
content = input("请输入需要翻译的内容:")
url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'
head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'
data = {}
data['type']='AUTO'
data['i']= content
data['doctype']='json'
data['xmlVersion']='1.8'
data['keyfrom']='fanyi.web'
data['ue']='UTF-8'
data['action']='FY_BY_ENTER'
data['typoResult']='true'
data= urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url,data,head)
'''
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36')
'''
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
print (html)
target = json.loads(html)
print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))
结果如下:
请输入需要翻译的内容:明月
{"type":"ZH_CN2EN","errorCode":0,"elapsedTime":8,"translateResult":[[{"src":"明月","tgt":"The bright moon"}]],"smartResult":{"type":1,"entries":["","bright moon"]}}
翻译结果: The bright moon
增加time.sleep
import urllib.request
import urllib.parse
import json
import time
while True:
content = input('请输入需要翻译的内容(输入"q!"退出程序):')
if content == 'q!':
break
url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'
head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'
data = {}
data['type']='AUTO'
data['i']= content
data['doctype']='json'
data['xmlVersion']='1.8'
data['keyfrom']='fanyi.web'
data['ue']='UTF-8'
data['action']='FY_BY_ENTER'
data['typoResult']='true'
data= urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url,data,head)
'''
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36')
'''
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
print (html)
target = json.loads(html)
print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))
time.sleep(5)
结果是执行完每一次程序后暂停5秒,再执行下一次:
请输入需要翻译的内容(输入"q!"退出程序):皮卡丘
{"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"皮卡丘","tgt":"Pikachu"}]],"smartResult":{"type":1,"entries":["","Pikachu"]}}
翻译结果: Pikachu
请输入需要翻译的内容(输入"q!"退出程序):盗墓笔记
{"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"盗墓笔记","tgt":"Grave notes"}]],"smartResult":{"type":1,"entries":["","Time Raiders"]}}
翻译结果: Grave notes
请输入需要翻译的内容(输入"q!"退出程序):
```
专注数据分析
欢迎转载并注明出处
```