Python 爬取图书图片和地址

#-*- coding:utf-8 -*-

import xlwt
import urllib
import re

def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    return html
def get_book_message(url):
    page = urllib.urlopen(url)
    html = page.read()
    urls = []
    page.close()
    imgReg = r'src="(.+?\.jpg)"'
    titlReg = r'rel="bookmark">(.+?)</a>'
    divre = re.compile(imgReg)
    divreTitle = re.compile(titlReg)
    divlist = re.findall(divre,html)
    titleList = re.findall(divreTitle,html)
    bookMessage = {}
    for url,title in zip(divlist,titleList):
        bookMessage[title] = url
        
    return bookMessage
def run():
    value = get_book_message('http://www.allitebooks.com/')
    workbook = xlwt.Workbook(encoding = 'ascii')
    worksheet = workbook.add_sheet('MyBookMessage')
    worksheet.write(0,1,u'地址')
    worksheet.write(0,2,u'标题')
    index = 1
    for x in value:
        worksheet.write(index,1,x)
        worksheet.write(index,2,value[x])
        index = index + 1
    workbook.save('MyBookMessage.xls')
run()

 

posted @ 2017-10-17 16:31  探出的头  阅读(209)  评论(0编辑  收藏  举报