8.通过pandas读取文档,写入文档,同时去除多个sheet重复行数据

  1 # encoding:utf-8
  2 
  3 import pandas as pd
  4 
  5 class OpenMyXLS():
  6     def __init__(self, filepath):
  7         self.filepath = filepath
  8         self.openxls()
  9 
 10     def openxls(self):
 11         df = pd.read_excel(self.filepath,sheet_name=None)
 12         print("所有的sheet名称:")
 13         sheetNameList = list(df)
 14         print(sheetNameList)
 15         print('获取sheet数量:')
 16         sheetNums = len(sheetNameList)
 17         print(sheetNums)
 18         print("获取索引对应的sheet对应的名称")
 19         k = 0
 20         writer = pd.ExcelWriter('../totest81.xlsx')
 21         while k < len(sheetNameList):
 22             sheetName = sheetNameList[k]
 23             print(sheetName)
 24             df = pd.read_excel(self.filepath, sheet_name=sheetName)
 25             print("获取表头")
 26             topNames = (df.keys()).values
 27             print(topNames)
 28             print("查找是否有81BOM列")
 29             findArr = self.returnFiindAll(topNames)
 30             print("整合后的表头数据")
 31             print(findArr)
 32 
 33             if findArr:
 34                 print("获取对应列的所有数据")
 35                 d = self.returnDict(findArr, df)
 36                 todf = pd.DataFrame(d)
 37 
 38                 todf.to_excel(writer, sheet_name=sheetName, index=False)
 39             else:
 40                 print("没有找到想要的列表头")
 41             k += 1
 42         writer.save()
 43         writer.close()
 44         print("创建excel OK!!!")
 45 
 46         df = pd.read_excel("../totest81.xlsx", sheet_name=None)
 47         sheetNameList = list(df)
 48         print(sheetNameList)
 49         self.qcfxls('../totest81.xlsx', sheetNameList)
 50 
 51     def qcfxls(self, filepath, sheetNameList):
 52         writer = pd.ExcelWriter('../totest82.xlsx')
 53         # 读取excel中的数据
 54         j = 0
 55         while j < len(sheetNameList):
 56             sheetName = sheetNameList[j]
 57             data = pd.DataFrame(pd.read_excel(filepath, sheetName))
 58             # 查看获取的数据
 59             #print(data)
 60             # 查看去除重复行的数据
 61             no_re_row = data.drop_duplicates()
 62             print(no_re_row)
 63             # 将去除重复行的数据输出到excel表中
 64             no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
 65             j +=1
 66         writer.save()
 67         writer.close()
 68         print("去除重复数据OK")
 69 
 70     '''返回插入EXCEL数据的字典形式'''
 71     def returnDict(self, findArr, df):
 72         i = 0
 73         d = {}
 74         while i < len(findArr):
 75             allData1 = df.loc[:, findArr[i]].values
 76             allData1a = self.qukongge(allData1)
 77             d[findArr[i]] = allData1a
 78             i += 1
 79         return d
 80 
 81     '''返回符合要求的表头'''
 82     def returnFiindAll(self, topNames):
 83         findArr = []
 84         for topName in topNames:
 85             if topName == '81BOM':
 86                 findArr.append(topName)
 87                 print(findArr)
 88             if '70 HSG Bin' in topName:
 89                 findArr.append(topName)
 90                 print(findArr)
 91             if '70 MLB Bin' in topName:
 92                 findArr.append(topName)
 93                 print(findArr)
 94         return findArr
 95 
 96     '''去取字符串左右空格'''
 97     def qukongge(self, lieDatas):
 98         allData_strip = []
 99         for allData in lieDatas:
100             if isinstance(allData, str):
101                 # print(allData.strip())
102                 allData_strip.append(allData.strip())
103             else:
104                 # print(allData)
105                 allData_strip.append(allData)
106         return allData_strip
107 
108 
109 if __name__ == '__main__':
110     filepath = '../test81.xlsx'
111     oxl = OpenMyXLS(filepath)

 

posted @ 2022-03-18 13:22  种太阳  阅读(182)  评论(0编辑  收藏  举报