maskrcnn_crop
# -*- coding: utf-8 -*- # ---------------------------- #! Copyright(C) 2022 # All right reserved. # 文件名称:xxx.py # 摘 要:xxx # 当前版本:1.0 # 作 者:刘恩甫 # 完成日期:2022-x-x # ----------------------------- from scipy.spatial import distance as dist import numpy as np import os import tensorflow as tf import cv2 import shutil import collections from PIL import Image import PIL.ImageColor as ImageColor import PIL.ImageDraw as ImageDraw import PIL.ImageFont as ImageFont from time import time import math image_shape=(600,600,3) # 载入对应关系 category_index = {1: {'id': 1, 'name': 'truck'}, 2: {'id': 2, 'name': 'crane'}, 3: {'id': 3, 'name': 'claw'},} def load_pb_and_get_input_output_node(pb_path): ''' 获取模型,获取输入输出节点 :param pb_path: :return: ''' #获取模型 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pb_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # 获取输入图像节点 image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') #获取输出节点 ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks_reframed']: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name) return image_tensor, tensor_dict,detection_graph def resize_img(img, dst_img_size): height_scale = dst_img_size[0] / img.shape[0] width_scale = dst_img_size[1] / img.shape[1] scale = min(height_scale, width_scale) resize_height = int(round(scale * img.shape[0])) resize_width = int(round(scale * img.shape[1])) resized_img = cv2.resize(img, (resize_width, resize_height)) before_y = int((dst_img_size[0] - resize_height) / 2) after_y = dst_img_size[0] - resize_height - before_y before_x = int((dst_img_size[1] - resize_width) / 2) after_x = dst_img_size[1] - resize_width - before_x pad_width = ((before_y, after_y), (before_x, after_x), (0, 0)) return np.pad(resized_img, pad_width, 'constant', constant_values=0),\ [before_y,after_y,before_x,after_x,scale] def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) STANDARD_COLORS = [ 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen' ] def visualize_boxes_and_labels_on_image_array( image,boxes,classes,scores,category_index,instance_masks=None, groundtruth_box_visualization_color='black', use_normalized_coordinates=False,max_boxes_to_draw=20,min_score_thresh=.6, agnostic_mode=False,line_thickness=4,skip_scores=False,skip_labels=False): ''' 可视化部分 :param image: :param boxes: :param classes: :param scores: :param category_index: :param instance_masks: :param groundtruth_box_visualization_color: :param use_normalized_coordinates: :param max_boxes_to_draw: :param min_score_thresh: 分类阈值 :param agnostic_mode: :param line_thickness: :param skip_scores: :param skip_labels: :return: ''' box_to_display_str_map = collections.defaultdict(list)#保存boxes的display_str box_to_color_map = collections.defaultdict(str)#保存boxes的color box_to_instance_masks_map = {}#保存每个box对应的mask #最大画max_boxes_to_draw个 for i in range(min(max_boxes_to_draw, boxes.shape[0])): if scores is None or scores[i] > min_score_thresh: box = tuple(boxes[i].tolist()) if instance_masks is not None: box_to_instance_masks_map[box] = instance_masks[i] if scores is None: box_to_color_map[box] = groundtruth_box_visualization_color else: display_str = '' if not skip_labels: if not agnostic_mode: if classes[i] in category_index.keys(): class_name = category_index[classes[i]]['name'] else: class_name = 'N/A' display_str = str(class_name) if not skip_scores: if not display_str: display_str = '{}%'.format(int(100*scores[i])) else: display_str = '{}: {}%'.format(display_str, int(100*scores[i])) box_to_display_str_map[box].append(display_str) if agnostic_mode: box_to_color_map[box] = 'DarkOrange' else: box_to_color_map[box] = STANDARD_COLORS[classes[i] % len(STANDARD_COLORS)] #画框和mask res_list = [] for box, color in box_to_color_map.items(): ymin, xmin, ymax, xmax = box int_box=int(xmin), int(ymin), int(xmax), int(ymax) #画mask # draw_mask_on_image_array(image,box_to_instance_masks_map[box],color=color) # # #画bounding_box # draw_bounding_box_on_image_array(image,ymin,xmin,ymax,xmax,color=color, # thickness=line_thickness,display_str_list=box_to_display_str_map[box], # use_normalized_coordinates=use_normalized_coordinates) # return image cls=box_to_display_str_map[box][0].split(':')[0] mask=box_to_instance_masks_map[box]*255 res_list.append([cls,int_box,mask]) return res_list def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): if image.dtype != np.uint8: raise ValueError('`image` not of type np.uint8') if mask.dtype != np.uint8: raise ValueError('`mask` not of type np.uint8') if np.any(np.logical_and(mask != 1, mask != 0)): raise ValueError('`mask` elements should be in [0, 1]') if image.shape[:2] != mask.shape: raise ValueError('The image has spatial dimensions %s but the mask has ' 'dimensions %s' % (image.shape[:2], mask.shape)) rgb = ImageColor.getrgb(color) pil_image = Image.fromarray(image) solid_color = np.expand_dims(np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)#复合函数 np.copyto(image, np.array(pil_image.convert('RGB'))) def draw_bounding_box_on_image_array(image,ymin,xmin,ymax,xmax,color='red', thickness=4,display_str_list=(), use_normalized_coordinates=True): """Adds a bounding box to an image (numpy array). Bounding box coordinates can be specified in either absolute (pixel) or normalized coordinates by setting the use_normalized_coordinates argument. Args: image: a numpy array with shape [height, width, 3]. ymin: ymin of bounding box. xmin: xmin of bounding box. ymax: ymax of bounding box. xmax: xmax of bounding box. color: color to draw bounding box. Default is red. thickness: line thickness. Default value is 4. display_str_list: list of strings to display in box (each to be shown on its own line). use_normalized_coordinates: If True (default), treat coordinates ymin, xmin, ymax, xmax as relative to the image. Otherwise treat coordinates as absolute. """ image_pil = Image.fromarray(np.uint8(image)).convert('RGB') draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, thickness, display_str_list, use_normalized_coordinates) np.copyto(image, np.array(image_pil)) def draw_bounding_box_on_image(image,ymin,xmin,ymax,xmax,color='red',thickness=4, display_str_list=(),use_normalized_coordinates=True): draw = ImageDraw.Draw(image) im_width, im_height = image.size if use_normalized_coordinates: (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height) else: (left, right, top, bottom) = (xmin, xmax, ymin, ymax) #画bounding box draw.line([(left, top), (left, bottom), (right, bottom),(right, top), (left, top)], width=thickness, fill=color) try: font = ImageFont.truetype('arial.ttf', 24) except IOError: font = ImageFont.load_default() # If the total height of the display strings added to the top of the bounding box exceeds the top of the image, # stack the strings below the bounding box instead of above. display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] # Each display_str has a top and bottom margin of 0.05x. total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) #string放置的底部 if top > total_display_str_height: text_bottom = top else: text_bottom = bottom + total_display_str_height # Reverse list and print from bottom to top. for display_str in display_str_list[::-1]: text_width, text_height = font.getsize(display_str) margin = np.ceil(0.05 * text_height) #文本域:[(左,上),(右,下)] draw.rectangle([(left, text_bottom - text_height - 2 * margin), (left + text_width,text_bottom)],fill=color) draw.text((left + margin, text_bottom - text_height - margin),display_str,fill='black',font=font) text_bottom -= text_height - 2 * margin def Perspective_transform(image, pts): '''透视变换''' pts=pts.squeeze().astype(np.float32) (tl, tr, br, bl) = pts widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # in the top-left, top-right, bottom-right, and bottom-left order dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") M = cv2.getPerspectiveTransform(pts, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) return warped def post_process(output_dict): '''类型转换等''' output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] output_dict['detection_masks'] = output_dict['detection_masks_reframed'][0] # 结果后处理,300 detection_boxes = output_dict['detection_boxes'] # normalized coordinate detection_classes = output_dict['detection_classes'] detection_scores = output_dict['detection_scores'] detection_masks = output_dict.get('detection_masks') # 对于detection_boxes进行后处理 # 考虑pad detection_boxes *= 600 # unnormalized detection_boxes[:, 0] -= resize_info_[0] # ymin, detection_boxes[:, 1] -= resize_info_[2] # xmin detection_boxes[:, 2] -= resize_info_[0] # ymax detection_boxes[:, 3] -= resize_info_[2] # xmax # 考虑scale detection_boxes /= resize_info_[4] # 对于detection_masks进行后处理 new_detection_masks = np.zeros((detection_masks.shape[0], image_np.shape[0], image_np.shape[1])) for i in range(len(detection_masks)): new_mask = detection_masks[i, resize_info_[0]:(image_shape[0] - resize_info_[1]), resize_info_[2]:(image_shape[1] - resize_info_[3])] new_mask = cv2.resize(new_mask, (image_np.shape[1], image_np.shape[0])) new_detection_masks[i] = new_mask new_detection_masks = new_detection_masks.astype(np.uint8) return detection_boxes, detection_classes, detection_scores,new_detection_masks def get_truck_mask(res_list,image_np): truck_list = [] truck_mask = np.zeros_like(image_np) for r in res_list: if r[0] == 'truck': truck_list.append(r) truck_mask = cv2.cvtColor(truck_mask, cv2.COLOR_BGR2GRAY) # 合并truck的mask if len(truck_list)!=0 : for i,v in enumerate(truck_list): truck_mask = cv2.bitwise_or(truck_mask, truck_list[i][2]) truck_mask = cv2.cvtColor(truck_mask, cv2.COLOR_GRAY2BGR) return truck_mask,truck_list def cos_dist(a, b): if len(a) != len(b): return None part_up = 0.0 a_sq = 0.0 b_sq = 0.0 for a1, b1 in zip(a, b): part_up += a1*b1 a_sq += a1**2 b_sq += b1**2 part_down = math.sqrt(a_sq*b_sq) if part_down == 0.0: return None else: return part_up / part_down def clockwise(pts): # sort the points based on their x-coordinates xSorted = pts[np.argsort(pts[:, 0]), :] # grab the left-most and right-most points from the sorted # x-roodinate points leftMost = xSorted[:2, :] rightMost = xSorted[2:, :] # now, sort the left-most coordinates according to their # y-coordinates so we can grab the top-left and bottom-left # points, respectively leftMost = leftMost[np.argsort(leftMost[:, 1]), :] (tl, bl) = leftMost # now that we have the top-left and bottom-left coordinate, use it as an # base vector to calculate the angles between the other two vectors vector_0 = np.array(bl - tl) vector_1 = np.array(rightMost[0] - tl) vector_2 = np.array(rightMost[1] - tl) angle = [np.arccos(cos_dist(vector_0, vector_1)), np.arccos(cos_dist(vector_0, vector_2))] (br, tr) = rightMost[np.argsort(angle), :] # return the coordinates in top-left, top-right,bottom-right, and bottom-left order return np.array([tl, tr, br, bl], dtype="float32") def getDist_P2P(Point0, PointA): '''# ***** 求两点间距离*****''' distance = math.pow((Point0[0] - PointA[0]), 2) + math.pow((Point0[1] - PointA[1]), 2) distance = math.sqrt(distance) return distance def get_rotated_rect(approx,truck_mask_copy): '''计算最小外接矩形,返回旋转矩形和顺时针的四个点''' approx = approx.squeeze() min_area_rect = cv2.minAreaRect(approx) angle = min_area_rect[-1] rotated_rect = cv2.boxPoints(min_area_rect) rotated_rect = np.int0(rotated_rect) rotated_rect = clockwise(rotated_rect) approx = np.array(clockwise(approx)).astype(np.float32) # 显示最小外接矩形 print('rotated_rect:',rotated_rect) for i in range(len(rotated_rect) - 1): cv2.line(truck_mask_copy, rotated_rect[i].astype(np.int), rotated_rect[i + 1].astype(np.int), (0, 255, 0)) cv2.line(truck_mask_copy, rotated_rect[-1].astype(np.int), rotated_rect[0].astype(np.int), (0, 255, 0)) return rotated_rect,approx def search_approx(epi_thres_list,cnt,truck_mask_copy): fitting_record = [] for epi_thres in epi_thres_list: # 多边形拟合 epsilon = epi_thres * cv2.arcLength(cnt, True) fitting_points = cv2.approxPolyDP(cnt, epsilon, True) fitting_record.append((epi_thres, fitting_points)) for record in fitting_record: if len(record[1]) == 4: approx = np.array(clockwise(record[1].squeeze())).astype(np.float32) approx = approx.reshape((-1, 1, 2)) print("approx 1",approx) break else: # 计算与拟合曲线距离最近的四个点 # 计算最小外接矩形,返回旋转矩形和顺时针的四个点 rotated_rect, approx = get_rotated_rect(fitting_record[0][1], truck_mask_copy) min_points = [] for rr in rotated_rect: tmp_min_record = [] for bb in approx: dist = getDist_P2P(rr, bb) tmp_min_record.append((dist, bb)) min_dist = sorted(tmp_min_record, key=lambda x: x[0])[0][1] min_points.append(min_dist) print('min_points',min_points) approx = np.array(clockwise(min_points)).astype(np.float32) print("approx 2", approx) return approx def get_warpPerspective(truck_mask,src_img,epi_thres_list = [0.01,0.02,0.03,0.04,0.05,0.075,0.1]): '''找到四个点进行透视变换,获得透视后的图像 epi_thres:外边框拟合参数,越小,拟合越精细 ''' # 二值化 gray = cv2.cvtColor(truck_mask, cv2.COLOR_BGR2GRAY) ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(contours)==0:#没有卡车的情况 return np.zeros_like(gray) else: truck_mask_copy = truck_mask.copy() cnt = contours[np.argmax([cv2.contourArea(cnt) for cnt in contours])] # 取面积最大的truck contour作为主卡车 #寻找approx,拟合的四个点 approx=search_approx(epi_thres_list, cnt, truck_mask_copy) approx = np.array(clockwise(approx.squeeze())) cv2.polylines(truck_mask_copy, [approx.astype(np.int)], True, (0, 0, 255), 2) for p in approx.squeeze(): cv2.circle(truck_mask_copy,(int(p[0]),int(p[1])),5,(255,255,255),5) cv2.imshow('truck_mask_copy', truck_mask_copy) #进行透视变换 warpPerspective = Perspective_transform(src_img, approx) if warpPerspective.shape[0] > warpPerspective.shape[1]: warpPerspective = np.rot90(warpPerspective) return warpPerspective if __name__ == '__main__': #path PATH_TO_FROZEN_GRAPH ='frozen_model/frozen_inference_graph.pb' PATH_TO_TEST_IMAGES_DIR = 'test_imgs' PATH_TO_TEST_RESULT_DIR = 'test_results' tmp_save_path = r'D:\liu_projects\package_maskrcnn_20220628\maskrcnn_inference\tmp_warpPerspective\\' #params resize_ratio=.1 #第一次图像缩放的系数 multiplying=int(1/resize_ratio) #第二次图像反放缩的系数 #mkdirs if os.path.exists(PATH_TO_TEST_RESULT_DIR): shutil.rmtree(PATH_TO_TEST_RESULT_DIR) os.makedirs(PATH_TO_TEST_RESULT_DIR) else: os.makedirs(PATH_TO_TEST_RESULT_DIR) #载入冻结模型,获取输入输出节点 image_tensor,tensor_dict,detection_graph=load_pb_and_get_input_output_node(PATH_TO_FROZEN_GRAPH) with detection_graph.as_default(): with tf.Session() as sess: for image_p in os.listdir(PATH_TO_TEST_IMAGES_DIR): print(image_p) t1=time() image_src=cv2.imread(PATH_TO_TEST_IMAGES_DIR+os.sep+image_p) #第一次resize,为了提升后处理速度 image_np=cv2.resize(image_src,dsize=None,fx=resize_ratio,fy=resize_ratio) #第二次resize,进行图像pad操作,(600,600,3) image_np_resize,resize_info_=resize_img(image_np,image_shape) #inference output_dict = sess.run(tensor_dict,feed_dict={image_tensor: np.expand_dims(image_np_resize, 0)}) #后处理操作 detection_boxes, detection_classes, detection_scores,new_detection_masks=post_process(output_dict) #获取结果,分别为cls,box,mask的排列 res_list = visualize_boxes_and_labels_on_image_array(image_np, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=new_detection_masks, use_normalized_coordinates=False, line_thickness=2) cv2.imshow('image_np', image_np) truck_mask,truck_list= get_truck_mask(res_list,image_np) if len(truck_list)!=0: #找到四个点进行透视变换,获得透视后的图像 try: warpPerspective=get_warpPerspective(truck_mask, image_np) # 第二次反放缩图像 # truck_mask = cv2.resize(truck_mask, dsize=None, fx=multiplying, fy=multiplying) # warpPerspective=get_warpPerspective(truck_mask, image_src) # warpPerspective=cv2.resize(warpPerspective,dsize=(2100,600),fx=None,fy=None) # cv2.imwrite(tmp_save_path+image_p,warpPerspective) cv2.imshow('warpPerspective', warpPerspective) except: print("图中没有卡车") cv2.imshow('truck_mask_copy', np.zeros_like(image_np)) cv2.imshow('warpPerspective', np.zeros_like(image_np)) else: print("图中没有卡车") cv2.imshow('truck_mask_copy', np.zeros_like(image_np)) cv2.imshow('warpPerspective', np.zeros_like(image_np)) cv2.waitKey(0) print("cost time:",time()-t1) # break