9.使用pandas去除重复,空行,第一列表头为空的数据行

# encoding:utf-8

import pandas as pd

class OpenMyXLS():
    def __init__(self, filepath):
        self.filepath = filepath
        self.openxls()

    def openxls(self):
        df = pd.read_excel(self.filepath,sheet_name=None)
        print("所有的sheet名称:")
        sheetNameList = list(df)
        print(sheetNameList)
        print('获取sheet数量:')
        sheetNums = len(sheetNameList)
        print(sheetNums)
        print("获取索引对应的sheet对应的名称")
        k = 0
        writer = pd.ExcelWriter('../totest81.xlsx')
        while k < len(sheetNameList):
            sheetName = sheetNameList[k]
            print(sheetName)
            df = pd.read_excel(self.filepath, sheet_name=sheetName)
            print("获取表头")
            topNames = (df.keys()).values
            print(topNames)
            print("查找是否有81BOM列")
            findArr = self.returnFiindAll(topNames)
            print("整合后的表头数据")
            print(findArr)

            if findArr:
                print("获取对应列的所有数据")
                d = self.returnDict(findArr, df)
                todf = pd.DataFrame(d)
                print("删除空行")
                todf = todf.dropna(how='all')
                print("删除该列中有nan的行")
                todf = todf.dropna(subset='81BOM')
                todf.to_excel(writer, sheet_name=sheetName, index=False)
            else:
                print("没有找到想要的列表头")
            k += 1
        writer.save()
        writer.close()
        print("创建excel OK!!!")

        df = pd.read_excel("../totest81.xlsx", sheet_name=None)
        sheetNameList = list(df)
        print(sheetNameList)
        self.qcfxls('../totest81.xlsx', sheetNameList)

    def qcfxls(self, filepath, sheetNameList):
        writer = pd.ExcelWriter('../totest82.xlsx')
        # 读取excel中的数据
        j = 0
        while j < len(sheetNameList):
            sheetName = sheetNameList[j]
            data = pd.DataFrame(pd.read_excel(filepath, sheetName))
            # 查看获取的数据
            #print(data)
            # 查看去除重复行的数据
            no_re_row = data.drop_duplicates()
            print(no_re_row)
            # 将去除重复行的数据输出到excel表中
            no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
            j +=1
        writer.save()
        writer.close()
        print("去除重复数据OK")

    '''返回插入EXCEL数据的字典形式'''
    def returnDict(self, findArr, df):
        i = 0
        d = {}
        while i < len(findArr):
            allData1 = df.loc[:, findArr[i]].values
            allData1a = self.qukongge(allData1)
            d[findArr[i]] = allData1a
            i += 1
        return d

    '''返回符合要求的表头'''
    def returnFiindAll(self, topNames):
        findArr = []
        for topName in topNames:
            if topName == '81BOM':
                findArr.append(topName)
                print(findArr)
            if '70 HSG Bin' in topName:
                findArr.append(topName)
                print(findArr)
            if '70 MLB Bin' in topName:
                findArr.append(topName)
                print(findArr)
        return findArr

    '''去取字符串左右空格'''
    def qukongge(self, lieDatas):
        allData_strip = []
        for allData in lieDatas:
            if isinstance(allData, str):
                # print(allData.strip())
                allData_strip.append(allData.strip())
            else:
                # print(allData)
                allData_strip.append(allData)
        return allData_strip


if __name__ == '__main__':
    filepath = '../test81.xlsx'
    oxl = OpenMyXLS(filepath)

 

posted @ 2022-03-18 14:39  种太阳  阅读(993)  评论(0编辑  收藏  举报