9.使用pandas去除重复,空行,第一列表头为空的数据行
# encoding:utf-8 import pandas as pd class OpenMyXLS(): def __init__(self, filepath): self.filepath = filepath self.openxls() def openxls(self): df = pd.read_excel(self.filepath,sheet_name=None) print("所有的sheet名称:") sheetNameList = list(df) print(sheetNameList) print('获取sheet数量:') sheetNums = len(sheetNameList) print(sheetNums) print("获取索引对应的sheet对应的名称") k = 0 writer = pd.ExcelWriter('../totest81.xlsx') while k < len(sheetNameList): sheetName = sheetNameList[k] print(sheetName) df = pd.read_excel(self.filepath, sheet_name=sheetName) print("获取表头") topNames = (df.keys()).values print(topNames) print("查找是否有81BOM列") findArr = self.returnFiindAll(topNames) print("整合后的表头数据") print(findArr) if findArr: print("获取对应列的所有数据") d = self.returnDict(findArr, df) todf = pd.DataFrame(d) print("删除空行") todf = todf.dropna(how='all') print("删除该列中有nan的行") todf = todf.dropna(subset='81BOM') todf.to_excel(writer, sheet_name=sheetName, index=False) else: print("没有找到想要的列表头") k += 1 writer.save() writer.close() print("创建excel OK!!!") df = pd.read_excel("../totest81.xlsx", sheet_name=None) sheetNameList = list(df) print(sheetNameList) self.qcfxls('../totest81.xlsx', sheetNameList) def qcfxls(self, filepath, sheetNameList): writer = pd.ExcelWriter('../totest82.xlsx') # 读取excel中的数据 j = 0 while j < len(sheetNameList): sheetName = sheetNameList[j] data = pd.DataFrame(pd.read_excel(filepath, sheetName)) # 查看获取的数据 #print(data) # 查看去除重复行的数据 no_re_row = data.drop_duplicates() print(no_re_row) # 将去除重复行的数据输出到excel表中 no_re_row.to_excel(writer, sheet_name=sheetName, index=False) j +=1 writer.save() writer.close() print("去除重复数据OK") '''返回插入EXCEL数据的字典形式''' def returnDict(self, findArr, df): i = 0 d = {} while i < len(findArr): allData1 = df.loc[:, findArr[i]].values allData1a = self.qukongge(allData1) d[findArr[i]] = allData1a i += 1 return d '''返回符合要求的表头''' def returnFiindAll(self, topNames): findArr = [] for topName in topNames: if topName == '81BOM': findArr.append(topName) print(findArr) if '70 HSG Bin' in topName: findArr.append(topName) print(findArr) if '70 MLB Bin' in topName: findArr.append(topName) print(findArr) return findArr '''去取字符串左右空格''' def qukongge(self, lieDatas): allData_strip = [] for allData in lieDatas: if isinstance(allData, str): # print(allData.strip()) allData_strip.append(allData.strip()) else: # print(allData) allData_strip.append(allData) return allData_strip if __name__ == '__main__': filepath = '../test81.xlsx' oxl = OpenMyXLS(filepath)