python 爬虫1

简单访问有道词典的翻译界面,将页面翻译功能简单呈现

import urllib.request
import urllib.parse
import json

content = input("请输入需要翻译的内容:")
url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'

data = {}
data['type']='AUTO'
data['i']= content
data['doctype']='json'
data['xmlVersion']='1.8'
data['keyfrom']='fanyi.web'
data['ue']='UTF-8'
data['action']='FY_BY_ENTER'
data['typoResult']='true'

data= urllib.parse.urlencode(data).encode('utf-8')
response = urllib.request.urlopen(url,data)
html = response.read().decode('utf-8')
print (html)

target = json.loads(html)
print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))

结果如下:

请输入需要翻译的内容:长城
                                                                                                                                              {"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"长城","tgt":"The Great Wall"}]],"smartResult":{"type":1,"entries":["","The Great Wall"]}}

翻译结果: The Great Wall
>>> ================================ RESTART ================================
>>> 
请输入需要翻译的内容:English
                                                                                                                                                                                                                                      {"type":"EN2ZH_CN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"English","tgt":"英语"}]],"smartResult":{"type":1,"entries":["","n. 英语;英国人;英文;英格兰人","adj. 英国人的;英国的;英文的","vt. 把\u2026译成英语"]}}

翻译结果: 英语

增加request header文件

有两种方法:

  • Request的headers参数修改
  • Request.add_header()方法修改
import urllib.request
import urllib.parse
import json

content = input("请输入需要翻译的内容:")
url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'

head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'

data = {}
data['type']='AUTO'
data['i']= content
data['doctype']='json'
data['xmlVersion']='1.8'
data['keyfrom']='fanyi.web'
data['ue']='UTF-8'
data['action']='FY_BY_ENTER'
data['typoResult']='true'

data= urllib.parse.urlencode(data).encode('utf-8')

req = urllib.request.Request(url,data,head)
'''
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36')
'''
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
print (html)

target = json.loads(html)
print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))

结果如下:

请输入需要翻译的内容:明月
                                                                                                                                              {"type":"ZH_CN2EN","errorCode":0,"elapsedTime":8,"translateResult":[[{"src":"明月","tgt":"The bright moon"}]],"smartResult":{"type":1,"entries":["","bright moon"]}}

翻译结果: The bright moon

增加time.sleep

import urllib.request
import urllib.parse
import json
import time

while True:
    content = input('请输入需要翻译的内容(输入"q!"退出程序):')
    if content == 'q!':
        break
    
    
    url= 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=dict2.index'

    head={}
    head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36'

    data = {}
    data['type']='AUTO'
    data['i']= content
    data['doctype']='json'
    data['xmlVersion']='1.8'
    data['keyfrom']='fanyi.web'
    data['ue']='UTF-8'
    data['action']='FY_BY_ENTER'
    data['typoResult']='true'

    data= urllib.parse.urlencode(data).encode('utf-8')

    req = urllib.request.Request(url,data,head)
    '''
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36')
    '''
    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')
    print (html)

    target = json.loads(html)
    print("翻译结果: %s" %(target['translateResult'][0][0]['tgt']))
    time.sleep(5)

结果是执行完每一次程序后暂停5秒,再执行下一次:

请输入需要翻译的内容(输入"q!"退出程序):皮卡丘
                                                                                                                                              {"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"皮卡丘","tgt":"Pikachu"}]],"smartResult":{"type":1,"entries":["","Pikachu"]}}

翻译结果: Pikachu
请输入需要翻译的内容(输入"q!"退出程序):盗墓笔记
                                                                                                                                              {"type":"ZH_CN2EN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"盗墓笔记","tgt":"Grave notes"}]],"smartResult":{"type":1,"entries":["","Time Raiders"]}}

翻译结果: Grave notes
请输入需要翻译的内容(输入"q!"退出程序):
posted @ 2016-06-23 03:14  li_volleyball  阅读(341)  评论(0编辑  收藏  举报