13.pandas去除第二列为空的整行数据
1 # encoding:utf-8 2 3 import pandas as pd 4 from openpyxl import load_workbook 5 6 ''' 7 本案例获取所有符合的列数据 8 去除了每个栏位的空格 9 去除了空行数据 10 去除了第一列为空的整行数据 11 多个sheet 12 13 对多个sheet数据整合在一个sheet中 14 15 对整合在一个sheet中的数据只获取想要的列 16 17 获取对应行对应列的数据写到excel中 18 即第一列与后面每一列一一对应放入到新的一行中 19 组成一个2维数组,写入倒新的EXCEL中 20 21 去出重复,去除第二列有空行的数据 22 ''' 23 24 class OpenMyXLS(): 25 def __init__(self, filepath): 26 self.filepath = filepath 27 self.openxls() 28 29 def openxls(self): 30 '''获取第一个sheet的数据''' 31 df = pd.read_excel(self.filepath, sheet_name=0) 32 '''获取行数列数''' 33 hanglie = df.shape 34 hang = hanglie[0] 35 lie = hanglie[1] 36 print(hanglie) 37 38 '''将第i行第j列的值加到excel中,组成只有两列的数据''' 39 tofilepath = "../HSGtotest81_zh2_dange.xlsx" 40 result2 = self.get8170lists(df, hang, lie) 41 self.writerexcel(tofilepath,result2) 42 43 '''去除重复的行''' 44 readfilepath = tofilepath 45 sheetNameList = self.getSheetNames(readfilepath) 46 tofilepath = "../HSGtotest81_zh2_dange_qc.xlsx" 47 self.qcfxls(readfilepath, sheetNameList, tofilepath) 48 49 '''去除第二列有nan的整行数据''' 50 #findArr = [0, 1] 51 readfilepath = tofilepath 52 tofilepath = "../HSGtotest81_zh2_dange_qc_nan.xlsx" 53 sheetNameList = self.getSheetNames(readfilepath) 54 df = pd.read_excel(readfilepath, sheet_name=sheetNameList[0]) 55 topNames = (df.keys()).values 56 print(topNames) 57 self.quchunan(topNames, sheetNameList, readfilepath, tofilepath) 58 59 '''去除第二列有为空的整行数据''' 60 def quchunan(self, findArr, sheetNameList, readfilepath, tofilepath): 61 writer = pd.ExcelWriter(tofilepath) 62 df = pd.read_excel(readfilepath, sheet_name=sheetNameList[0]) 63 d = self.returnDict(findArr, df) 64 todf = pd.DataFrame(d) 65 todf = todf.dropna(subset=findArr[1]) 66 todf.to_excel(writer, sheet_name=sheetNameList[0], index=False) 67 writer.save() 68 writer.close() 69 print("创建excel OK!!!") 70 71 '''将二维数据写入excel中''' 72 def writerexcel(self, tofilepath, result2): 73 writer = pd.ExcelWriter(tofilepath) 74 df = pd.DataFrame(result2) 75 df.to_excel(writer, sheet_name="allhsgmlb", index=False) 76 writer.save() 77 writer.close() 78 79 '''获取81跟70配对,组合成2维数组''' 80 def get8170lists(self, df, hang, lie): 81 n = 0 82 m = 1 83 h_list = [] 84 h_lists = [] 85 while n < hang: 86 while m < lie: 87 valueij = df.values[n, 0] 88 h_list.append(valueij) 89 90 valueij = df.values[n, m] 91 h_list.append(valueij) 92 # print(h_list) 93 94 h_lists.append(h_list) 95 print(h_lists) 96 h_list = [] 97 m += 1 98 n += 1 99 m = 1 100 return h_lists 101 102 '''获取excel中所有sheet的名称''' 103 def getSheetNames(self, filepath): 104 df = pd.read_excel(filepath, sheet_name=None) 105 print("所有的sheet名称:") 106 sheetNameList = list(df) 107 print(sheetNameList) 108 return sheetNameList 109 110 '''根据sheet名称获取对应列名的数据,写入新的excel中''' 111 def foundtopnamedata(self, sheetNameList, file1): 112 k = 0 113 writer = pd.ExcelWriter(file1) 114 while k < len(sheetNameList): 115 sheetName = sheetNameList[k] 116 print(sheetName) 117 df = pd.read_excel(self.filepath, sheet_name=sheetName) 118 print("获取表头") 119 topNames = (df.keys()).values 120 print(topNames) 121 print("查找是否有81BOM列") 122 findArr = self.returnFiindHSG(topNames) 123 # findArr = self.returnFiindMLB(topNames) 124 print("整合后的表头数据") 125 print(findArr) 126 127 if findArr: 128 print("获取对应列的所有数据") 129 d = self.returnDict(findArr, df) 130 todf = pd.DataFrame(d) 131 print("删除空行") 132 todf = todf.dropna(how='all') 133 print("删除81BOM列中有nan的行") 134 todf = todf.dropna(subset='81BOM') 135 todf.to_excel(writer, sheet_name=sheetName, index=False) 136 else: 137 print("没有找到想要的列表头") 138 k += 1 139 writer.save() 140 writer.close() 141 print("创建excel OK!!!") 142 143 '''整合所有sheet放在第一个sheet中''' 144 def zhengheALLsheet(self, readfilepath, tofilepath1, tofilepath2): 145 writer1 = pd.ExcelWriter(tofilepath1) 146 writer2 = pd.ExcelWriter(tofilepath2) 147 148 sheetNameList = self.getSheetNames(readfilepath) 149 print(len(sheetNameList)) 150 151 k = 0 152 frames = [] 153 while k < len(sheetNameList): 154 sheetName = sheetNameList[k] 155 print(sheetName) 156 df1 = pd.read_excel(readfilepath, sheet_name=sheetName) 157 #print(df1) 158 frames.append(df1) 159 k += 1 160 print(frames) 161 162 result1 = pd.concat(frames, keys=sheetNameList) 163 result1.to_excel(writer1, sheet_name="allhsgmlb") 164 writer1.save() 165 writer1.close() 166 167 result2 = pd.concat(frames) 168 result2.to_excel(writer2, sheet_name="allhsgmlb", index=False) 169 writer2.save() 170 writer2.close() 171 print("数据整合到一个sheet中成功!") 172 173 '''去除重复行的数据''' 174 def qcfxls(self, readfilepath, sheetNameList, tofilepath): 175 writer = pd.ExcelWriter(tofilepath) 176 # 读取excel中的数据 177 j = 0 178 while j < len(sheetNameList): 179 sheetName = sheetNameList[j] 180 data = pd.DataFrame(pd.read_excel(readfilepath, sheetName)) 181 # 查看获取的数据 182 #print(data) 183 # 查看去除重复行的数据 184 no_re_row = data.drop_duplicates() 185 print(no_re_row) 186 # 将去除重复行的数据输出到excel表中 187 no_re_row.to_excel(writer, sheet_name=sheetName, index=False) 188 j += 1 189 writer.save() 190 writer.close() 191 print("去除重复数据OK") 192 193 '''返回插入EXCEL数据的字典形式''' 194 def returnDict(self, findArr, df): 195 i = 0 196 d = {} 197 while i < len(findArr): 198 allData1 = df.loc[:, findArr[i]].values 199 allData1a = self.qukongge(allData1) 200 d[findArr[i]] = allData1a 201 i += 1 202 return d 203 204 '''返回符合要求的表头''' 205 def returnFiindMLB(self, topNames): 206 findArr = [] 207 for topName in topNames: 208 if topName == '81BOM': 209 findArr.append(topName) 210 print(findArr) 211 if '70 MLB Bin' in topName: 212 findArr.append(topName) 213 print(findArr) 214 return findArr 215 216 '''返回符合要求的表头''' 217 def returnFiindHSG(self, topNames): 218 findArr = [] 219 for topName in topNames: 220 if topName == '81BOM': 221 findArr.append(topName) 222 print(findArr) 223 if '70 HSG Bin' in topName: 224 findArr.append(topName) 225 print(findArr) 226 return findArr 227 228 '''返回符合要求的表头''' 229 def returnFiindAll(self, topNames): 230 findArr = [] 231 for topName in topNames: 232 if topName == '81BOM': 233 findArr.append(topName) 234 print(findArr) 235 if '70 HSG Bin' in topName: 236 findArr.append(topName) 237 print(findArr) 238 if '70 MLB Bin' in topName: 239 findArr.append(topName) 240 print(findArr) 241 return findArr 242 243 '''去取字符串左右空格''' 244 def qukongge(self, lieDatas): 245 allData_strip = [] 246 for allData in lieDatas: 247 if isinstance(allData, str): 248 # print(allData.strip()) 249 allData_strip.append(allData.strip()) 250 else: 251 # print(allData) 252 allData_strip.append(allData) 253 return allData_strip 254 255 256 if __name__ == '__main__': 257 filepath = '../HSGtotest81_zh2.xlsx' 258 oxl = OpenMyXLS(filepath)