python excel handle

#!/bin/python
#coding=utf-8
import urllib,xlrd,lxml.html,re,pymongo,xlwt
fail=open('fail','w')
def getDocument(url,code='utf-8'):
    try:
        doc=lxml.html.fromstring(urllib.urlopen(url).read().decode(code))
        print 'utf-8'
    except:
        doc=lxml.html.fromstring(urllib.urlopen(url).read().decode('Gb2312'))
    return doc

def getBaiduIndex(keyword):
    keyword=keyword.encode('utf-8')
    indexXpath="//span[@class='nums']/text()"
    url='http://www.baidu.com/s?wd="'+keyword+'"'
    doc=getDocument(url)
    index=doc.xpath(indexXpath)
    if index==[]:
        return 0
    else:
        index=index[0].replace(',','')
    index=re.findall(r'\d+',index)
    return int(index[0])

def readExcelData():
    data=xlrd.open_workbook('福建111.xls')
    table=data.sheets()[0]
    rowsNumber=table.nrows
    for i in range(0,rowsNumber):
        print i
        line=table.row_values(i)
        try:
            index=getBaiduIndex(table.row_values(i)[0])
        except:
            fail.write(str(line))
        line.append(index)
        tmp=['name','address','lawer','occupy','phone','cellphone','weight']
        line=dict(zip(tmp,line))
        insertDatabase(line)
def readSotedDb():
    collection=pymongo.Connection('localhost',27017).excel.index
    newData=collection.find({},{'_id':0}).sort("weight",pymongo.DESCENDING)
    loop=0
    book=xlwt.Workbook()
    sheet=book.add_sheet('sheet 1')
    for i in newData:
        sheet.write(loop,0,i['name'])
        sheet.write(loop,1,i['address'])
        sheet.write(loop,2,i['lawer'])
        sheet.write(loop,3,i['occupy'])
        sheet.write(loop,4,i['phone'])
        sheet.write(loop,5,i['cellphone'])
        loop+=1
    book.save('new.xls')

def insertDatabase(document):
    collection=pymongo.Connection('localhost',27017).excel.index
    try:
       collection.insert(document)
    except:
       print 'Insert Data Error',document
readExcelData()

 

posted @ 2013-11-07 17:43  Epirus  阅读(747)  评论(0编辑  收藏  举报