两个小应用场景

1.Python 实现有道翻译命令行版

完全参考：http://www.cnblogs.com/BeginMan/p/3644283.html

（1）申请有道翻译API：http://fanyi.youdao.com/openapi?path=data-mode

（2）拷贝代码到文件，填上API_KEY, 运行之（见代码一）

2. Python爬取豆瓣读书

完全参考知乎上面的： http://plough-man.com/?p=379

稍微改了一点代码，可能是版本和原作者的不一样。（见代码二）

代码一：（有道翻译API）

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date    : 2014-04-03 21:12:16
# @Function: 有道翻译命令行版
# @Author  : BeginMan

import os
import sys
import urllib
import urllib2
reload(sys)
sys.setdefaultencoding("utf-8")
import simplejson as json
import platform
import datetime

API_KEY = '******'
KEYFORM = '******'

    
def GetTranslate(txt):
    url = 'http://fanyi.youdao.com/openapi.do'
    data = {
    'keyfrom': KEYFORM,
    'key': API_KEY,
    'type': 'data',
    'doctype': 'json',
    'version': 1.1,
    'q': txt
    }
    data = urllib.urlencode(data)
    url = url+'?'+data
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    result = json.loads(response.read())
    return result
    
def Sjson(json_data):
    query = json_data.get('query','')                # 查询的文本
    translation = json_data.get('translation','')     # 翻译
    basic = json_data.get('basic','')                # basic 列表
    sequence = json_data.get('web',[])                # 短语列表
    phonetic,explains_txt,seq_txt,log_word_explains = '','','',''
    
    # 更多释义
    if basic:
        phonetic = basic.get('phonetic','')            # 音标
        explains = basic.get('explains',[])            # 更多释义 列表
        for obj in explains:
            explains_txt += obj+'\n'
            log_word_explains += obj+','    
    # 句子解析
    if sequence:
        for obj in sequence:
            seq_txt += obj['key']+'\n'
            values = ''
            for i in obj['value']:
                values += i+','
            seq_txt += values+'\n'
        
    print_format = '*'*40+'\n'
    print_format += u'查询对象:  %s [%s]\n' %(query,phonetic)    
    print_format += explains_txt
    print_format += '-'*20+'\n'+seq_txt
    print_format += '*'*40+'\n'
    print print_format
    choices = raw_input(u'是否写入单词本,回复（y/n）:')
    if choices in ['y','Y']:
        filepath = r'/home/beginman/pyword/%s.xml' %datetime.date.today()
        if (platform.system()).lower() == 'windows':
            filepath = r'E:\pyword\%s.xml' %datetime.date.today()
        fp = open(filepath,'a+')
        file = fp.readlines()
        if not file:
            fp.write('<wordbook>\n')
            fp.write(u"""    <item>\n    <word>%s</word>\n    <trans><![CDATA[%s]]></trans>\n    <phonetic><![CDATA[[%s]]]></phonetic>\n    <tags>%s</tags>\n    <progress>1</progress>\n    </item>\n\n""" %(query,log_word_explains,phonetic,datetime.date.today()))
        fp.close()
        print u'写入成功.'

        


def main():
    while True:
        txt = raw_input(u'请输入要查询的文本：\n')
        if txt:
            Sjson(GetTranslate(txt))

if __name__ == '__main__':
    main()

代码二（爬取豆瓣读书）：

#!/usr/bin/env python
# encoding: utf-8
# Python 2.7.3 test OK
# 把str编码由ascii改为utf8（或gb18030）
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import time
import requests
#from bs4 import BeautifulSoup
from BeautifulSoup import BeautifulSoup

file_name = 'book_list.txt'
file_content = '' # 最终要写到文件里的内容
file_content += '生成时间：' + time.asctime()

def book_spider(book_tag):
    global file_content

    url = "http://www.douban.com/tag/%s/book" % book_tag
    source_code = requests.get(url)
    # just get the code, no headers or anything
    plain_text = source_code.text
    # BeautifulSoup objects can be sorted through easy
    soup = BeautifulSoup(plain_text)

    title_divide = '\n' + '--' * 30 + '\n' + '--' * 30 + '\n'
    file_content += title_divide + '\t' * 4 + \
            book_tag + '：' + title_divide
    count = 1
    # 得到书籍列表的soup对象
    list_soup = soup.find('div', {'class': 'mod book-list'})
    for book_info in list_soup.findAll('dd'):
        title = book_info.find('a', {
            'class':'title'}).string.strip()

        desc = book_info.find('div', {'class':'desc'}).string.strip()
        desc_list = desc.split('/')
        author_info = '作者/译者： ' + '/'.join(desc_list[0:-3])
        pub_info = '出版信息： ' + '/'.join(desc_list[-3:])
        rating = book_info.find('span', {
            'class':'rating_nums'}).string.strip()
        file_content += "*%d\t《%s》\t评分：%s\n\t%s\n\t%s\n\n" % (
                count, title, rating, author_info, pub_info)
        count += 1


def do_spider(book_lists):
    for book_tag in book_lists:
        book_spider(book_tag)

book_lists = ['心理学','人物传记','中国历史','旅行','生活','科普']
do_spider(book_lists)

# 将最终结果写入文件
f = open(file_name, 'w')
f.write(file_content)
f.close()

爬取内容局部：

生成时间：Mon Jun 15 16:49:00 2015
------------------------------------------------------------
------------------------------------------------------------
				心理学：
------------------------------------------------------------
------------------------------------------------------------
*1	《沟通的艺术（插图修订第14版）》	评分：8.7
	作者/译者： [美]罗纳德·B·阿德勒 / 拉塞尔·F·普罗科特 / 黄素菲 / 李恩 
	出版信息：  世界图书出版公司·后浪出版公司 / 2015-1 / 68.00元

*2	《心理学与生活》	评分：8.7
	作者/译者： [美] 理查德·格里格 / 菲利普·津巴多 / 王垒 / 王甦 等 
	出版信息：  人民邮电出版社 / 2003-10 / 88.00元

*3	《万万没想到》	评分：8.7
	作者/译者： 万维钢（同人于野） 
	出版信息：  电子工业出版社 / 2014-10-1 / 39.80元

*4	《自控力》	评分：8.3
	作者/译者： [美]  凯利·麦格尼格尔 / 王岑卉 
	出版信息：  文化发展出版社(原印刷工业出版社) / 2012-8 / 39.80元

*5	《学会提问》	评分：8.6
	作者/译者： 布朗 / 赵玉芳 / 向景辉 
	出版信息：  中国轻工业出版社 / 2006-1 / 18.00元

*6	《献给阿尔吉侬的花束》	评分：9.3
	作者/译者： （美）丹尼尔•凯斯 / Daniel Keyes / 陈澄和 
	出版信息：  广西师范大学出版社 / 2015-4 / 36.00元

posted @ 2015-06-15 17:10 baopu 阅读(252) 评论(0) 编辑收藏举报

刷新页面返回顶部

baopu

两个小应用场景

公告