python excel handle
#!/bin/python #coding=utf-8 import urllib,xlrd,lxml.html,re,pymongo,xlwt fail=open('fail','w') def getDocument(url,code='utf-8'): try: doc=lxml.html.fromstring(urllib.urlopen(url).read().decode(code)) print 'utf-8' except: doc=lxml.html.fromstring(urllib.urlopen(url).read().decode('Gb2312')) return doc def getBaiduIndex(keyword): keyword=keyword.encode('utf-8') indexXpath="//span[@class='nums']/text()" url='http://www.baidu.com/s?wd="'+keyword+'"' doc=getDocument(url) index=doc.xpath(indexXpath) if index==[]: return 0 else: index=index[0].replace(',','') index=re.findall(r'\d+',index) return int(index[0]) def readExcelData(): data=xlrd.open_workbook('福建111.xls') table=data.sheets()[0] rowsNumber=table.nrows for i in range(0,rowsNumber): print i line=table.row_values(i) try: index=getBaiduIndex(table.row_values(i)[0]) except: fail.write(str(line)) line.append(index) tmp=['name','address','lawer','occupy','phone','cellphone','weight'] line=dict(zip(tmp,line)) insertDatabase(line) def readSotedDb(): collection=pymongo.Connection('localhost',27017).excel.index newData=collection.find({},{'_id':0}).sort("weight",pymongo.DESCENDING) loop=0 book=xlwt.Workbook() sheet=book.add_sheet('sheet 1') for i in newData: sheet.write(loop,0,i['name']) sheet.write(loop,1,i['address']) sheet.write(loop,2,i['lawer']) sheet.write(loop,3,i['occupy']) sheet.write(loop,4,i['phone']) sheet.write(loop,5,i['cellphone']) loop+=1 book.save('new.xls') def insertDatabase(document): collection=pymongo.Connection('localhost',27017).excel.index try: collection.insert(document) except: print 'Insert Data Error',document readExcelData()