backup-analysis-barcode-distribute-recv-py
analysis_barcode.py
---------
1 # -*- coding:utf-8 -*- 2 3 # python3 4 5 import sys 6 import re 7 8 # 分析下发和收回的条码 9 # 1 下发过多少 10 # 2 回收了多少 11 # 3 下发过没有回收的有多少 12 # 4 没下发过但收的有多少 13 14 # 下发格式 15 # barcode=1234567890, machineNo=8, allowToFillCode=1, ctime=2020-07-22 07:27:39 16 17 # 回收的格式 18 # time=2020-07-22 06:18:19, barcode=1234567890, machineNo=2, ... 19 20 distribute_file_name_pattern = "RecordBarcodeDistribute-2020-{}.txt" 21 recv_file_name_pattern = "RecordRecvData-2020-{}.txt" 22 23 24 def read_distributed_info(date): 25 ''' 26 处理下发的数据 27 返回文件行数, 和 设备号和向该设备下发过的条码列表的字典 28 (rowCount, {"1": ["barcode0", "barcode1", ...], ...}) 29 ''' 30 31 def parse_distribute_row_info(row): 32 matchInfo = re.match(r'^barcode=(.{10}), machineNo=(\d+), allowToFillCode=(\d)', row) 33 if matchInfo is None: 34 return None 35 else: 36 return ( matchInfo.group(1), matchInfo.group(2), matchInfo.group(3)) 37 38 def append_to(rowInfo, resultDict): 39 barcode, machineNo, _ = rowInfo 40 if machineNo not in resultDict: 41 resultDict[machineNo] = [] 42 resultDict[machineNo].append(barcode) 43 44 infos = {} 45 rowCount = 0 46 47 fileName = distribute_file_name_pattern.format(date) 48 with open(fileName, "r") as distributeFile: 49 for row in distributeFile: 50 rowCount += 1 51 52 parseResult = parse_distribute_row_info(row) 53 if parseResult is None: 54 print("[WARN] not matched distribute info:", row) 55 elif parseResult[2] == "1": 56 append_to(parseResult, infos) 57 # else ignore 58 59 return (rowCount, infos) 60 61 62 63 def read_received_info(date): 64 """ 65 处理回收的数据 66 返回文件行数, 和 设备号和该设备返回的信息 67 (rowCount, {"1", [{"time": "yyyy-MM-dd HH:mm:ss", 68 "barcode": "barcode-value"}, 69 ...], 70 ... 71 } ) 72 """ 73 74 def parse_recv_row_info(row): 75 matchInfo = re.match(r'^time=(.{19}), barcode=([^,]+), machineNo=(\d+)', row) 76 if matchInfo is None: 77 return None 78 else: 79 return {"machineNo": matchInfo.group(3), 80 "barcode" : matchInfo.group(2), 81 "time": matchInfo.group(1)} 82 83 def append_to(rowInfo, resultDict): 84 machineNo = rowInfo["machineNo"] 85 if machineNo not in resultDict: 86 resultDict[machineNo] = [] 87 resultDict[machineNo].append({"barcode" : rowInfo["barcode"], "time" : rowInfo["time"]}) 88 89 90 infos = {} 91 rowCount = 0 92 93 fileName = recv_file_name_pattern.format(date) 94 with open(fileName, "r") as recvFile: 95 for row in recvFile: 96 rowCount += 1 97 98 parseResult = parse_recv_row_info(row) 99 if parseResult is None: 100 print("[WARN] not matched recv info:", row) 101 else: 102 append_to(parseResult, infos) 103 104 return (rowCount, infos) 105 106 107 108 def calculate_data_count(data): 109 result = 0 110 for a_list in data: 111 result += len(a_list) 112 return result 113 114 def calculate_count_by_machine(distributeInfo, recvInfo): 115 def merge_machine_no_set(machineNosA, machineNosB): 116 result = list(set(machineNosA).union(set(machineNosB))) 117 result.sort() 118 return result 119 120 result = [] 121 for machineNo in merge_machine_no_set(distributeInfo.keys(), recvInfo.keys()): 122 distCount = len(distributedInfo[machineNo]) if machineNo in distributedInfo else 0 123 recvCount = len(recvInfo[machineNo]) if machineNo in recvInfo else 0 124 result.append({"machineNo": machineNo, 125 "distCount": distCount, 126 "recvCount": recvCount, 127 "diff" : recvCount - distCount}) 128 129 return result 130 131 132 def get_repeated_recv_barcodes(recvInfo): 133 # 去重 134 def exists_in(item, itemList): 135 # 判断当前的信息是否与之前的记录重复 136 for i in itemList: 137 if (i["barcode"] == item["barcode"]) and (i["time"] == item["time"]): 138 return True 139 return False 140 141 repeatedInfo = [] 142 deDuplicationInfo = {} 143 144 for k, v in recvInfo.items(): 145 deDuplicationInfo[k] = [] 146 for item in v: 147 if exists_in(item, deDuplicationInfo[k]): 148 repeatedInfo.append((k, item["barcode"], item["time"])) 149 else: 150 deDuplicationInfo[k].append(item) 151 152 return (deDuplicationInfo, repeatedInfo) 153 154 155 def get_recv_but_not_distributed_barcodes(distributeInfo, recvInfo): 156 result = {} 157 distributeInfoCopy = {} 158 159 for k, v in distributeInfo.items(): 160 distributeInfoCopy[k] = v.copy() 161 162 for k, v in recvInfo.items(): 163 for item in v: 164 if item["barcode"] in distributeInfoCopy[k]: 165 distributeInfoCopy[k].remove(item["barcode"]) 166 elif k in result: 167 result[k].append(item["barcode"]) 168 else: 169 result[k] = [item["barcode"]] 170 171 return result 172 173 174 def get_distributed_but_not_recv_barcodes(distributeInfo, recvInfo): 175 # 下发但是没有回收的条码 176 def exists_in(barcode, itemList): 177 # 判断下发的条码是否回收 178 for item in itemList: 179 if barcode == item["barcode"]: 180 return True 181 return False 182 183 def remove_from(barcode, itemList): 184 # 从接收的数据副本中移除对应条码的记录 185 targetIdx = None 186 for item in itemList: 187 if barcode == item["barcode"]: 188 target = item 189 break 190 191 if target is not None: 192 itemList.remove(target) 193 else: 194 raise Exception("No target exists, in get barcodes distributed but not received.") 195 196 result = {} 197 recvInfoCopy = {} 198 199 for k, v in recvInfo.items(): 200 recvInfoCopy[k] = v.copy() 201 202 for k, v in distributeInfo.items(): 203 for barcode in v: 204 if exists_in(barcode, recvInfoCopy[k]): 205 remove_from(barcode, recvInfoCopy[k]) 206 elif k in result: 207 result[k].append(barcode) 208 else: 209 result[k] = [barcode] 210 211 return result 212 213 214 215 if __name__ == "__main__": 216 # 计算哪个日期的文件, 月日, 04-01 217 if len(sys.argv) == 1: 218 print("no input, stopped.") 219 sys.exit(0) 220 221 date = sys.argv[1] 222 223 # 读取文件 224 disFileRowCount, distributedInfo = read_distributed_info(date) 225 recvFileRowCount, recvInfo = read_received_info(date) 226 227 # 显示文件总行数 228 print("distributed file lines count:", disFileRowCount) 229 print("received file lines count:", recvFileRowCount) 230 231 # 显示下发和回收的总数量 232 print("\ndistributed count:", calculate_data_count(distributedInfo.values())) 233 print("received count:", calculate_data_count(recvInfo.values())) 234 235 # 每个设备的下发和回收数的统计, 及差值 236 print("\ncount by machine:") 237 print("machine | distributed-count | recv-count | recv-count - distributed-count") 238 for machineCountInfo in calculate_count_by_machine(distributedInfo, recvInfo): 239 print("{machineNo:2} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo)) 240 241 # 收到的条码有哪些重复, 和去重后的回收结果 242 print("\nrepeated recv barcode:\nmachine | barcode | time") 243 deDuplicationRecvInfo, repeatInfo = get_repeated_recv_barcodes(recvInfo) 244 for item in repeatInfo: 245 print("{:2} | {:10} | {}".format(*item)) 246 print("\ncount:", len(repeatInfo)) 247 248 # 去掉重复之后的单设备统计比较 249 print("\ncount by machine after de duplication:") 250 print("machine | distributed-count | recv-count | recv-count - distributed-count") 251 diffCountInfo = {"distCount" : 0, "recvCount": 0 , "diff" : 0} 252 for machineCountInfo in calculate_count_by_machine(distributedInfo, deDuplicationRecvInfo): 253 diffCountInfo["distCount"] += machineCountInfo["distCount"] 254 diffCountInfo["recvCount"] += machineCountInfo["recvCount"] 255 diffCountInfo["diff"] += machineCountInfo["diff"] 256 print("{machineNo:5} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo)) 257 print("count | {distCount:4} | {recvCount:4} | {diff:4}".format(**diffCountInfo)) 258 259 # 没下发但是有回收的条码 260 print("\nreceived but not distributed barcodes:") 261 recvNonDistributedCount = 0 262 for k, v in get_recv_but_not_distributed_barcodes(distributedInfo, deDuplicationRecvInfo).items(): 263 print(k, ": count:", len(v)) 264 recvNonDistributedCount += len(v) 265 for barcode in v: 266 print(" [{}]".format(barcode)) 267 print("count: ", recvNonDistributedCount) 268 269 # 下发后没回收的条码 270 print("\ndistributed but not received barcodes:") 271 distributedNonRecvCount = 0 272 for k, v in get_distributed_but_not_recv_barcodes(distributedInfo, deDuplicationRecvInfo).items(): 273 print(k, ": count:", len(v)) 274 distributedNonRecvCount += len(v) 275 for barcode in v: 276 print(" [{}]".format(barcode)) 277 print("count: ", distributedNonRecvCount)
--------- THE END ---------