8.通过pandas读取文档,写入文档,同时去除多个sheet重复行数据
1 # encoding:utf-8 2 3 import pandas as pd 4 5 class OpenMyXLS(): 6 def __init__(self, filepath): 7 self.filepath = filepath 8 self.openxls() 9 10 def openxls(self): 11 df = pd.read_excel(self.filepath,sheet_name=None) 12 print("所有的sheet名称:") 13 sheetNameList = list(df) 14 print(sheetNameList) 15 print('获取sheet数量:') 16 sheetNums = len(sheetNameList) 17 print(sheetNums) 18 print("获取索引对应的sheet对应的名称") 19 k = 0 20 writer = pd.ExcelWriter('../totest81.xlsx') 21 while k < len(sheetNameList): 22 sheetName = sheetNameList[k] 23 print(sheetName) 24 df = pd.read_excel(self.filepath, sheet_name=sheetName) 25 print("获取表头") 26 topNames = (df.keys()).values 27 print(topNames) 28 print("查找是否有81BOM列") 29 findArr = self.returnFiindAll(topNames) 30 print("整合后的表头数据") 31 print(findArr) 32 33 if findArr: 34 print("获取对应列的所有数据") 35 d = self.returnDict(findArr, df) 36 todf = pd.DataFrame(d) 37 38 todf.to_excel(writer, sheet_name=sheetName, index=False) 39 else: 40 print("没有找到想要的列表头") 41 k += 1 42 writer.save() 43 writer.close() 44 print("创建excel OK!!!") 45 46 df = pd.read_excel("../totest81.xlsx", sheet_name=None) 47 sheetNameList = list(df) 48 print(sheetNameList) 49 self.qcfxls('../totest81.xlsx', sheetNameList) 50 51 def qcfxls(self, filepath, sheetNameList): 52 writer = pd.ExcelWriter('../totest82.xlsx') 53 # 读取excel中的数据 54 j = 0 55 while j < len(sheetNameList): 56 sheetName = sheetNameList[j] 57 data = pd.DataFrame(pd.read_excel(filepath, sheetName)) 58 # 查看获取的数据 59 #print(data) 60 # 查看去除重复行的数据 61 no_re_row = data.drop_duplicates() 62 print(no_re_row) 63 # 将去除重复行的数据输出到excel表中 64 no_re_row.to_excel(writer, sheet_name=sheetName, index=False) 65 j +=1 66 writer.save() 67 writer.close() 68 print("去除重复数据OK") 69 70 '''返回插入EXCEL数据的字典形式''' 71 def returnDict(self, findArr, df): 72 i = 0 73 d = {} 74 while i < len(findArr): 75 allData1 = df.loc[:, findArr[i]].values 76 allData1a = self.qukongge(allData1) 77 d[findArr[i]] = allData1a 78 i += 1 79 return d 80 81 '''返回符合要求的表头''' 82 def returnFiindAll(self, topNames): 83 findArr = [] 84 for topName in topNames: 85 if topName == '81BOM': 86 findArr.append(topName) 87 print(findArr) 88 if '70 HSG Bin' in topName: 89 findArr.append(topName) 90 print(findArr) 91 if '70 MLB Bin' in topName: 92 findArr.append(topName) 93 print(findArr) 94 return findArr 95 96 '''去取字符串左右空格''' 97 def qukongge(self, lieDatas): 98 allData_strip = [] 99 for allData in lieDatas: 100 if isinstance(allData, str): 101 # print(allData.strip()) 102 allData_strip.append(allData.strip()) 103 else: 104 # print(allData) 105 allData_strip.append(allData) 106 return allData_strip 107 108 109 if __name__ == '__main__': 110 filepath = '../test81.xlsx' 111 oxl = OpenMyXLS(filepath)