Python笔记:使用pywin32处理excel文件
因为前端同事须要批量的对excel文件做特殊处理,删除指定行,故写了此脚本。同一时候配合config.ini方便不熟悉py的同事使用
#!/usr/bin/env python #-*- coding:utf-8 -*- ####################################################### # 用于批量删除excel的指定行 # # 适用于全部office。前提须要安装pywin32和office软件 # ####################################################### import os import sys import time import glob import shutil import string import os.path import traceback import ConfigParser import win32com.client SPATH = "" #需处理的excel文件文件夹 DPATH = "" #处理后的excel存放文件夹 SKIP_FILE_LIST = [] #须要跳过的文件列表 MAX_SHEET_INDEX = 1 #每一个excel文件的前几个表须要处理 DELETE_ROW_LIST = [] #须要删除的行号 def dealPath(pathname=''): '''deal with windows file path''' if pathname: pathname = pathname.strip() if pathname: pathname = r'%s'%pathname pathname = string.replace(pathname, r'/', '\\') pathname = os.path.abspath(pathname) if pathname.find(":\\") == -1: pathname = os.path.join(os.getcwd(), pathname) return pathname class EasyExcel(object): '''class of easy to deal with excel''' def __init__(self): '''initial excel application''' self.m_filename = '' self.m_exists = False self.m_excel = win32com.client.DispatchEx('Excel.Application') #也能够用Dispatch,前者开启新进程,后者会复用进程中的excel进程 self.m_excel.DisplayAlerts = False #覆盖同名文件时不弹出确认框 def open(self, filename=''): '''open excel file''' if getattr(self, 'm_book', False): self.m_book.Close() self.m_filename = dealPath(filename) or '' self.m_exists = os.path.isfile(self.m_filename) if not self.m_filename or not self.m_exists: self.m_book = self.m_excel.Workbooks.Add() else: self.m_book = self.m_excel.Workbooks.Open(self.m_filename) def reset(self): '''reset''' self.m_excel = None self.m_book = None self.m_filename = '' def save(self, newfile=''): '''save the excel content''' assert type(newfile) is str, 'filename must be type string' newfile = dealPath(newfile) or self.m_filename if not newfile or (self.m_exists and newfile == self.m_filename): self.m_book.Save() return pathname = os.path.dirname(newfile) if not os.path.isdir(pathname): os.makedirs(pathname) self.m_filename = newfile self.m_book.SaveAs(newfile) def close(self): '''close the application''' self.m_book.Close(SaveChanges=1) self.m_excel.Quit() time.sleep(2) self.reset() def addSheet(self, sheetname=None): '''add new sheet, the name of sheet can be modify,but the workbook can't ''' sht = self.m_book.Worksheets.Add() sht.Name = sheetname if sheetname else sht.Name return sht def getSheet(self, sheet=1): '''get the sheet object by the sheet index''' assert sheet > 0, 'the sheet index must bigger then 0' return self.m_book.Worksheets(sheet) def getSheetByName(self, name): '''get the sheet object by the sheet name''' for i in xrange(1, self.getSheetCount()+1): sheet = self.getSheet(i) if name == sheet.Name: return sheet return None def getCell(self, sheet=1, row=1, col=1): '''get the cell object''' assert row>0 and col>0, 'the row and column index must bigger then 0' return self.getSheet(sheet).Cells(row, col) def getRow(self, sheet=1, row=1): '''get the row object''' assert row>0, 'the row index must bigger then 0' return self.getSheet(sheet).Rows(row) def getCol(self, sheet, col): '''get the column object''' assert col>0, 'the column index must bigger then 0' return self.getSheet(sheet).Columns(col) def getRange(self, sheet, row1, col1, row2, col2): '''get the range object''' sht = self.getSheet(sheet) return sht.Range(self.getCell(sheet, row1, col1), self.getCell(sheet, row2, col2)) def getCellValue(self, sheet, row, col): '''Get value of one cell''' return self.getCell(sheet,row, col).Value def setCellValue(self, sheet, row, col, value): '''set value of one cell''' self.getCell(sheet, row, col).Value = value def getRowValue(self, sheet, row): '''get the row values''' return self.getRow(sheet, row).Value def setRowValue(self, sheet, row, values): '''set the row values''' self.getRow(sheet, row).Value = values def getColValue(self, sheet, col): '''get the row values''' return self.getCol(sheet, col).Value def setColValue(self, sheet, col, values): '''set the row values''' self.getCol(sheet, col).Value = values def getRangeValue(self, sheet, row1, col1, row2, col2): '''return a tuples of tuple)''' return self.getRange(sheet, row1, col1, row2, col2).Value def setRangeValue(self, sheet, row1, col1, data): '''set the range values''' row2 = row1 + len(data) - 1 col2 = col1 + len(data[0]) - 1 range = self.getRange(sheet, row1, col1, row2, col2) range.Clear() range.Value = data def getSheetCount(self): '''get the number of sheet''' return self.m_book.Worksheets.Count def getMaxRow(self, sheet): '''get the max row number, not the count of used row number''' return self.getSheet(sheet).Rows.Count def getMaxCol(self, sheet): '''get the max col number, not the count of used col number''' return self.getSheet(sheet).Columns.Count def clearCell(self, sheet, row, col): '''clear the content of the cell''' self.getCell(sheet,row,col).Clear() def deleteCell(self, sheet, row, col): '''delete the cell''' self.getCell(sheet, row, col).Delete() def clearRow(self, sheet, row): '''clear the content of the row''' self.getRow(sheet, row).Clear() def deleteRow(self, sheet, row): '''delete the row''' self.getRow(sheet, row).Delete() def clearCol(self, sheet, col): '''clear the col''' self.getCol(sheet, col).Clear() def deleteCol(self, sheet, col): '''delete the col''' self.getCol(sheet, col).Delete() def clearSheet(self, sheet): '''clear the hole sheet''' self.getSheet(sheet).Clear() def deleteSheet(self, sheet): '''delete the hole sheet''' self.getSheet(sheet).Delete() def deleteRows(self, sheet, fromRow, count=1): '''delete count rows of the sheet''' maxRow = self.getMaxRow(sheet) maxCol = self.getMaxCol(sheet) endRow = fromRow+count-1 if fromRow > maxRow or endRow < 1: return self.getRange(sheet, fromRow, 1, endRow, maxCol).Delete() def deleteCols(self, sheet, fromCol, count=1): '''delete count cols of the sheet''' maxRow = self.getMaxRow(sheet) maxCol = self.getMaxCol(sheet) endCol = fromCol + count - 1 if fromCol > maxCol or endCol < 1: return self.getRange(sheet, 1, fromCol, maxRow, endCol).Delete() def echo(msg): '''echo message''' print msg def dealSingle(excel, sfile, dfile): '''deal with single excel file''' echo("deal with %s"%sfile) basefile = os.path.basename(sfile) excel.open(sfile) sheetcount = excel.getSheetCount() if not (basefile in SKIP_FILE_LIST or file in SKIP_FILE_LIST): for sheet in range(1, sheetcount+1): if sheet > MAX_SHEET_INDEX: continue reduce = 0 for row in DELETE_ROW_LIST: excel.deleteRow(sheet, row-reduce) reduce += 1 #excel.deleteRows(sheet, 2, 2) excel.save(dfile) def dealExcel(spath, dpath): '''deal with excel files''' start = time.time() #check source path exists or not spath = dealPath(spath) if not os.path.isdir(spath): echo("No this directory :%s"%spath) return #check destination path exists or not dpath = dealPath(dpath) if not os.path.isdir(dpath): os.makedirs(dpath) shutil.rmtree(dpath) #list the excel file filelist = glob.glob(os.path.join(spath, '*.xlsx')) if not filelist: echo('The path of %s has no excel file'%spath) return #deal with excel file excel = EasyExcel() for file in filelist: basefile = os.path.basename(file) destfile = os.path.join(dpath, basefile) dealSingle(excel, file, destfile) echo('Use time:%s'%(time.time()-start)) excel.close() def loadConfig(configfile='./config.ini'): '''parse config file''' global SPATH global DPATH global SKIP_FILE_LIST global MAX_SHEET_INDEX global DELETE_ROW_LIST file = dealPath(configfile) if not os.path.isfile(file): echo('Can not find the config.ini') return False parser = ConfigParser.ConfigParser() parser.read(file) SPATH = parser.get('pathconfig', 'spath').strip() DPATH = parser.get('pathconfig', 'dpath').strip() filelist = parser.get('otherconfig', 'filelist').strip() index = parser.get('otherconfig', 'maxindex').strip() rowlist = parser.get('otherconfig', 'deleterows').strip() if filelist: SKIP_FILE_LIST = filelist.split(";") if rowlist: DELETE_ROW_LIST = map(int, rowlist.split(";")) MAX_SHEET_INDEX = int(index) if index else MAX_SHEET_INDEX def main(): '''main function''' loadConfig() if SPATH and DPATH and MAX_SHEET_INDEX: dealExcel(SPATH, DPATH) raw_input("Please press any key to exit!") if __name__=="__main__": main()
config.ini文件例如以下:
[pathconfig] #;spath表示须要处理的excel文件文件夹 spath=./tests #;dpath表示处理后的excel文件文件夹 dpath=./dest [otherconfig] #;filelist表示不须要做特殊处理的excel文件列表,以英文分号分隔 filelist= #;maxindex表示须要处理每一个excel文件的前几张表 maxindex=1 #;deleterows表示须要删除的阿拉伯数字行号,用英文分号分隔 deleterows=2;3