12.pandas将二维数组写入excel中
1 # encoding:utf-8 2 3 import pandas as pd 4 from openpyxl import load_workbook 5 6 ''' 7 本案例获取所有符合的列数据 8 去除了每个栏位的空格 9 去除了空行数据 10 去除了第一列为空的整行数据 11 多个sheet 12 13 对多个sheet数据整合在一个sheet中 14 15 对整合在一个sheet中的数据只获取想要的列 16 17 获取对应行对应列的数据写到excel中 18 即第一列与后面每一列一一对应放入到新的一行中 19 组成一个2维数组,写入倒新的EXCEL中 20 ''' 21 22 class OpenMyXLS(): 23 def __init__(self, filepath): 24 self.filepath = filepath 25 self.openxls() 26 27 def openxls(self): 28 '''获取第一个sheet的数据''' 29 df = pd.read_excel(self.filepath, sheet_name=0) 30 '''获取行数列数''' 31 hanglie = df.shape 32 hang = hanglie[0] 33 lie = hanglie[1] 34 print(hanglie) 35 36 '''将第i行第j列的值加到excel中,组成只有两列的数据''' 37 tofilepath = "../HSGtotest81_zh2_dange.xlsx" 38 result2 = self.get8170lists(df, hang, lie) 39 self.writerexcel(tofilepath,result2) 40 41 '''将二维数据写入excel中''' 42 def writerexcel(self, tofilepath, result2): 43 writer = pd.ExcelWriter(tofilepath) 44 df = pd.DataFrame(result2) 45 df.to_excel(writer, sheet_name="allhsgmlb", index=False) 46 writer.save() 47 writer.close() 48 49 '''获取81跟70配对,组合成2维数组''' 50 def get8170lists(self, df, hang, lie): 51 n = 0 52 m = 1 53 h_list = [] 54 h_lists = [] 55 while n < hang: 56 while m < lie: 57 valueij = df.values[n, 0] 58 h_list.append(valueij) 59 60 valueij = df.values[n, m] 61 h_list.append(valueij) 62 # print(h_list) 63 64 h_lists.append(h_list) 65 print(h_lists) 66 h_list = [] 67 m += 1 68 n += 1 69 m = 1 70 return h_lists 71 72 '''获取excel中所有sheet的名称''' 73 def getSheetNames(self, filepath): 74 df = pd.read_excel(filepath, sheet_name=None) 75 print("所有的sheet名称:") 76 sheetNameList = list(df) 77 return sheetNameList 78 79 '''根据sheet名称获取对应列名的数据,写入新的excel中''' 80 def foundtopnamedata(self, sheetNameList, file1): 81 k = 0 82 writer = pd.ExcelWriter(file1) 83 while k < len(sheetNameList): 84 sheetName = sheetNameList[k] 85 print(sheetName) 86 df = pd.read_excel(self.filepath, sheet_name=sheetName) 87 print("获取表头") 88 topNames = (df.keys()).values 89 print(topNames) 90 print("查找是否有81BOM列") 91 findArr = self.returnFiindHSG(topNames) 92 # findArr = self.returnFiindMLB(topNames) 93 print("整合后的表头数据") 94 print(findArr) 95 96 if findArr: 97 print("获取对应列的所有数据") 98 d = self.returnDict(findArr, df) 99 todf = pd.DataFrame(d) 100 print("删除空行") 101 todf = todf.dropna(how='all') 102 print("删除81BOM列中有nan的行") 103 todf = todf.dropna(subset='81BOM') 104 todf.to_excel(writer, sheet_name=sheetName, index=False) 105 else: 106 print("没有找到想要的列表头") 107 k += 1 108 writer.save() 109 writer.close() 110 print("创建excel OK!!!") 111 112 '''整合所有sheet放在第一个sheet中''' 113 def zhengheALLsheet(self, readfilepath, tofilepath1, tofilepath2): 114 writer1 = pd.ExcelWriter(tofilepath1) 115 writer2 = pd.ExcelWriter(tofilepath2) 116 117 sheetNameList = self.getSheetNames(readfilepath) 118 print(len(sheetNameList)) 119 120 k = 0 121 frames = [] 122 while k < len(sheetNameList): 123 sheetName = sheetNameList[k] 124 print(sheetName) 125 df1 = pd.read_excel(readfilepath, sheet_name=sheetName) 126 #print(df1) 127 frames.append(df1) 128 k += 1 129 print(frames) 130 131 result1 = pd.concat(frames, keys=sheetNameList) 132 result1.to_excel(writer1, sheet_name="allhsgmlb") 133 writer1.save() 134 writer1.close() 135 136 result2 = pd.concat(frames) 137 result2.to_excel(writer2, sheet_name="allhsgmlb", index=False) 138 writer2.save() 139 writer2.close() 140 print("数据整合到一个sheet中成功!") 141 142 '''去除重复行的数据''' 143 def qcfxls(self, readfilepath, sheetNameList, tofilepath): 144 writer = pd.ExcelWriter(tofilepath) 145 # 读取excel中的数据 146 j = 0 147 while j < len(sheetNameList): 148 sheetName = sheetNameList[j] 149 data = pd.DataFrame(pd.read_excel(readfilepath, sheetName)) 150 # 查看获取的数据 151 #print(data) 152 # 查看去除重复行的数据 153 no_re_row = data.drop_duplicates() 154 print(no_re_row) 155 # 将去除重复行的数据输出到excel表中 156 no_re_row.to_excel(writer, sheet_name=sheetName, index=False) 157 j +=1 158 writer.save() 159 writer.close() 160 print("去除重复数据OK") 161 162 '''返回插入EXCEL数据的字典形式''' 163 def returnDict(self, findArr, df): 164 i = 0 165 d = {} 166 while i < len(findArr): 167 allData1 = df.loc[:, findArr[i]].values 168 allData1a = self.qukongge(allData1) 169 d[findArr[i]] = allData1a 170 i += 1 171 return d 172 173 '''返回符合要求的表头''' 174 def returnFiindMLB(self, topNames): 175 findArr = [] 176 for topName in topNames: 177 if topName == '81BOM': 178 findArr.append(topName) 179 print(findArr) 180 if '70 MLB Bin' in topName: 181 findArr.append(topName) 182 print(findArr) 183 return findArr 184 185 '''返回符合要求的表头''' 186 def returnFiindHSG(self, topNames): 187 findArr = [] 188 for topName in topNames: 189 if topName == '81BOM': 190 findArr.append(topName) 191 print(findArr) 192 if '70 HSG Bin' in topName: 193 findArr.append(topName) 194 print(findArr) 195 return findArr 196 197 '''返回符合要求的表头''' 198 def returnFiindAll(self, topNames): 199 findArr = [] 200 for topName in topNames: 201 if topName == '81BOM': 202 findArr.append(topName) 203 print(findArr) 204 if '70 HSG Bin' in topName: 205 findArr.append(topName) 206 print(findArr) 207 if '70 MLB Bin' in topName: 208 findArr.append(topName) 209 print(findArr) 210 return findArr 211 212 '''去取字符串左右空格''' 213 def qukongge(self, lieDatas): 214 allData_strip = [] 215 for allData in lieDatas: 216 if isinstance(allData, str): 217 # print(allData.strip()) 218 allData_strip.append(allData.strip()) 219 else: 220 # print(allData) 221 allData_strip.append(allData) 222 return allData_strip 223 224 225 if __name__ == '__main__': 226 filepath = '../HSGtotest81_zh2.xlsx' 227 oxl = OpenMyXLS(filepath)