mmdetection 模型转ncnn后 在c++端增加后处理到完整输出
CMakeList 内容: cmake_minimum_required(VERSION 3.14) project(demo) set(CMAKE_CXX_STANDARD 17) include_directories(3rd_party/stb) find_package(OpenCV REQUIRED) if (OpenCV_FOUND) message("${OpenCV_DIR}") message(STATUS "The Opecv lib is found!") endif () include_directories(${OpenCV_INCLUDE_DIRS}) add_definitions(-w) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads) include_directories(3rd_party/ncnn-20220729/include) link_directories(3rd_party/ncnn-20220729/lib) add_executable(...) target_link_libraries(demo ${OpenCV_LIBS} ${CMAKE_THREAD_LIBS_INIT} ncnn)
// 准备一个 后处理头文件 #ifndef _NCNN_POSTPROCESS_H #define _NCNN_POSTPROCESS_H #include "ncnn/net.h" #ifndef FLT_MIN #define FLT_MIN 1.175494351e-38F #endif #ifndef FLT_MAX #define FLT_MAX 3.402823466e+38F #endif #define MIN(a, b) (a<b?a:b) #define MAX(a, b) (a>b?a:b) // 参考 ncnn/example/nanodet.cpp // 参考 ncnn/src/layer/detectionoutput.cpp struct BBoxRect { float score; float xmin; float ymin; float xmax; float ymax; float area; int label; }; static inline int ncnn_softmax_inplace(ncnn::Mat &bottom_blob, const int axis = 1) { // value = exp( value - global max value ) // sum all value // value = value / sum ncnn::Layer *_layer = ncnn::create_layer("Softmax"); ncnn::ParamDict pd; pd.set(0, axis); // axis pd.set(1, 1); _layer->load_param(pd); ncnn::Option opt; opt.num_threads = 1; opt.use_packing_layout = false; _layer->create_pipeline(opt); _layer->forward_inplace(bottom_blob, opt); _layer->destroy_pipeline(opt); delete _layer; return 0; } static inline int ncnn_sigmoid_inplace(ncnn::Mat &bottom_blob) { ncnn::Layer *_layer = ncnn::create_layer("Sigmoid"); ncnn::ParamDict pd; _layer->load_param(pd); ncnn::Option opt; opt.num_threads = 1; opt.use_packing_layout = false; _layer->create_pipeline(opt); _layer->forward_inplace(bottom_blob, opt); _layer->destroy_pipeline(opt); delete _layer; return 0; } static inline int ncnn_permute(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob) { // [c,h,w] --> [h,w,c] ncnn::ParamDict pd; pd.set(0,3); ncnn::Layer *_layer = ncnn::create_layer("Permute"); _layer->load_param(pd); ncnn::Option opt; opt.num_threads = 1; _layer->create_pipeline(opt); _layer->forward(bottom_blob, top_blob, opt); _layer->destroy_pipeline(opt); delete _layer; return 0; } static inline int ncnn_reshape(const ncnn::Mat &bottom_blob, ncnn::Mat &top_blob, const ncnn::ParamDict &pd) { // [h,w,c] --> [-1, c] ncnn::Layer *_layer = ncnn::create_layer("Reshape"); _layer->load_param(pd); ncnn::Option opt; opt.num_threads = 1; _layer->create_pipeline(opt); _layer->forward(bottom_blob, top_blob, opt); _layer->destroy_pipeline(opt); delete _layer; return 0; } // copy from ncnn === begin static inline float intersection_area(const BBoxRect &a, const BBoxRect &b) { if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin) { // no intersection return 0.f; } float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin); float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin); return inter_width * inter_height; } static inline void qsort_descent_inplace(std::vector<BBoxRect> &datas, int left, int right) { int i = left; int j = right; float p = datas[(left + right) / 2].score; while (i <= j) { while (datas[i].score > p) i++; while (datas[j].score < p) j--; if (i <= j) { // swap std::swap(datas[i], datas[j]); i++; j--; } } if (left < j) qsort_descent_inplace(datas, left, j); if (i < right) qsort_descent_inplace(datas, i, right); } static inline void nms_sorted_bboxes(const std::vector<BBoxRect> &bboxes, std::vector<int> &picked, float nms_threshold) { picked.clear(); const size_t n = bboxes.size(); for (size_t i = 0; i < n; i++) { const BBoxRect &a = bboxes[i]; int keep = 1; for (int j = 0; j < (int) picked.size(); j++) { const BBoxRect &b = bboxes[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = a.area + b.area - inter_area; // float IoU = inter_area / union_area if (inter_area > nms_threshold * union_area) { keep = 0; break; } } if (keep) picked.push_back(i); } } // yolo的模式, 在线生成anchor, 从中心点生成, 每个中心点生成1个 static void generate_proposals(const ncnn::Mat &cls_pred, const ncnn::Mat &dis_pred, int stride, const ncnn::Mat &in_pad, float prob_threshold, std::vector<BBoxRect> &objects) { const int num_grid = cls_pred.h; int num_grid_x; int num_grid_y; if (in_pad.w > in_pad.h) { num_grid_x = in_pad.w / stride; num_grid_y = num_grid / num_grid_x; } else { num_grid_y = in_pad.h / stride; num_grid_x = num_grid / num_grid_y; } const int num_class = cls_pred.w; const int reg_max_1 = dis_pred.w / 4; for (int i = 0; i < num_grid_y; i++) { for (int j = 0; j < num_grid_x; j++) { const int idx = i * num_grid_x + j; const float *scores = cls_pred.row(idx); // find label with max score int label = -1; float score = -FLT_MAX; for (int k = 0; k < num_class; k++) { if (scores[k] > score) { label = k; score = scores[k]; } } if (score >= prob_threshold) { ncnn::Mat bbox_pred(reg_max_1, 4, (void *) dis_pred.row(idx)); { ncnn::Layer *softmax = ncnn::create_layer("Softmax"); ncnn::ParamDict pd; pd.set(0, 1); // axis pd.set(1, 1); softmax->load_param(pd); ncnn::Option opt; opt.num_threads = 1; opt.use_packing_layout = false; softmax->create_pipeline(opt); softmax->forward_inplace(bbox_pred, opt); softmax->destroy_pipeline(opt); delete softmax; } float pred_ltrb[4]; for (int k = 0; k < 4; k++) { float dis = 0.f; const float *dis_after_sm = bbox_pred.row(k); for (int l = 0; l < reg_max_1; l++) { dis += l * dis_after_sm[l]; } pred_ltrb[k] = dis * stride; } float pb_cx = (j + 0.5f) * stride; float pb_cy = (i + 0.5f) * stride; BBoxRect _bbox{0}; _bbox.xmin = pb_cx - pred_ltrb[0]; _bbox.ymin = pb_cy - pred_ltrb[1]; _bbox.xmax = pb_cx + pred_ltrb[2]; _bbox.ymax = pb_cy + pred_ltrb[3]; _bbox.label = label; _bbox.score = score; _bbox.area = 0; objects.push_back(_bbox); } } } } // copy from ncnn === end static inline int vector_2_priorbox(const std::vector<std::vector<std::vector<float>>> &src_anchors, std::vector<ncnn::Mat> &dst_priorboxes) { // 转化vector 到 ncnn::mat, 保持原来的纬度 int num_levels = src_anchors.size(); dst_priorboxes.clear(); for (int i = 0; i < num_levels; ++i) { int h = src_anchors[i].size(); ncnn::Mat _boxes; _boxes.create(4, h, sizeof(float)); for (int j = 0; j < h; ++j) { float *data = _boxes.row(j); for (int k = 0; k < 4; ++k) { data[k] = src_anchors[i][j][k]; } } dst_priorboxes.push_back(_boxes); // printf("%d,%d,%d,%d\n", _boxes.c, _boxes.h, _boxes.w, _boxes.d); } return 0; } // 离线anchor模式, 使用已经生成的anchor数据文件, 进行解码 static void generate_proposals2(const ncnn::Mat &cls_scores, const ncnn::Mat &bboxes_offset, int stride, const ncnn::Mat &prior_boxes, const float prob_threshold, const int max_w, const int max_h, const float scale_w, const float scale_h, std::vector<BBoxRect> &objects) { // prior_boxes.shape: [N,4] // cls_pred.shape: [N, N_CLASS] // dis_preD.shape: [N, 4] objects.clear(); const int num_class = cls_scores.w; const int num_prior = prior_boxes.h; const float *location_ptr = bboxes_offset; const float *priorbox_ptr = prior_boxes; const float mean[4] = {0., 0., 0., 0.}; const float var[4] = {0.1, 0.1, 0.2, 0.2}; for (int i = 0; i < num_prior; i++) { // find label with max score const float *scores = cls_scores.row(i); // find label with max score int label = -1; float score = -FLT_MAX; for (int k = 0; k < num_class; k++) { if (scores[k] > score) { label = k; score = scores[k]; } } if (score < prob_threshold) continue; const float *delta = location_ptr + i * 4; const float *pb = priorbox_ptr + i * 4; float delta_cx = var[0] * delta[0] + mean[0]; float delta_cy = var[1] * delta[1] + mean[1]; float delta_w = var[2] * delta[2] + mean[2]; float delta_h = var[3] * delta[3] + mean[3]; // printf("xxx %.4f, %.4f, %.4f, %.4f, %.4f\n", delta_cx,delta_cy,delta_w,delta_h, 1-score); float pb_cx = (pb[0] + pb[2]) * 0.5f; float pb_cy = (pb[1] + pb[3]) * 0.5f; float pb_w = pb[2] - pb[0]; float pb_h = pb[3] - pb[1]; delta_cx *= pb_w; delta_cy *= pb_h; float gx = pb_cx + delta_cx; float gy = pb_cy + delta_cy; float gw = pb_w * exp(delta_w); float gh = pb_h * exp(delta_h); BBoxRect _bbox{0}; _bbox.xmin = MAX(0, gx - gw * 0.5f) / scale_w; _bbox.ymin = MAX(0, gy - gh * 0.5f) / scale_h; _bbox.xmax = MIN(max_w - 1, gx + gw * 0.5f) / scale_w; _bbox.ymax = MIN(max_h - 1, gy + gh * 0.5f) / scale_h; _bbox.label = label; _bbox.score = score; _bbox.area = 0; objects.push_back(_bbox); } } static inline float sigmoid(float x) { return static_cast<float>(1.f / (1.f + exp(-x))); } #endif //_NCNN_POSTPROCESS_H
针对mmdetection的anchor生成脚本如下:
import os import os.path as osp import numpy as np from mmcv import Config from mmdet.models import build_detector import math import argparse import pickle def save_cplus_h(multi_level_anchors, savefilepath): """ multi_level_anchors.shape: [[box1,box2, ...], [], ...] """ _template = ''' #ifndef _ANCHOR_H #define _ANCHOR_H #include "stdlib.h" #include <vector> /* * 目标检测的anchor框 头文件 * */ static std::vector<std::vector<std::vector<float>>> ssd_anchor_all_levels = { // // stride 8 // { // {-10., 0, -10., 20.}, // x1 y1 x2 y2 // {-10., 0, -10., 20.}, // {-10., 0, -10., 20.}, // }, // // // stride 16 // { // {-10., 0, -10., 20.}, // x1 y1 x2 y2 // {-10., 0, -10., 20.}, // {-10., 0, -10., 20.}, // }, // // // stride 32 // { // {-10., 0, -10., 20.}, // x1 y1 x2 y2 // {-10., 0, -10., 20.}, // {-10., 0, -10., 20.}, // }, // // //... @here }; #endif //_ANCHOR_H ''' with open(savefilepath, 'w', encoding='utf-8') as f: m = '' num_levels = len(multi_level_anchors) for i, one_level_anchors in enumerate(multi_level_anchors): # [N,4] print(f'=============level {i} =============') print(one_level_anchors.shape) a = '{\n' for x1, y1, x2, y2 in one_level_anchors: line = '{' + f'{x1}, {y1}, {x2}, {y2}' + '},\n' a += line a += '},\n' if i<num_levels-1 else '}\n' m += a _template = _template.replace('@here', m) f.write(_template) return def parse_args(): parser = argparse.ArgumentParser( description='Convert MMDetection models to ONNX') parser.add_argument('config', help='test config file path') parser.add_argument('--output-file', type=str, default='anchor.npy') parser.add_argument( '--shape', type=int, nargs='+', default=[3, 224, 320], help='input image size (CHW)') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() CONFIG = args.config assert args.output_file is not None INPUT_C, INPUT_H, INPUT_W = tuple(args.shape) cfg = Config.fromfile(CONFIG) model = build_detector(cfg.model) model.eval() assert hasattr(model, 'bbox_head') strides_tuple_list = model.bbox_head.anchor_generator.strides featmap_sizes = [] c, h, w = INPUT_C, INPUT_H, INPUT_W for sh, sw in strides_tuple_list: featmap_sizes.append((math.ceil(h / sh), math.ceil(w / sw))) # featmap_sizes.append((math.ceil(h / sh + 0.5), math.ceil(w / sw + 0.5))) print([sh, sw], featmap_sizes) multi_level_anchors = model.bbox_head.anchor_generator.grid_anchors(featmap_sizes, 'cpu') total_anchors = np.concatenate(multi_level_anchors, axis=0) print(total_anchors.shape) print(total_anchors[:10]) wh = np.array([[w, h] * 2]) total_anchors_norm = total_anchors / wh ###################### 转bin ####################################################### anchor_num = total_anchors.shape[0] total_anchors_bin_file = total_anchors_norm.reshape((1, 1, -1)) # todo: bbox_coder target_stds = cfg.model.bbox_head.bbox_coder.target_stds target_stds = np.array(target_stds * anchor_num).reshape((1, 1, -1)) # [1,2,anchor_nums * 4] total_anchors_bin_file_res = np.concatenate((total_anchors_bin_file, target_stds), axis=1) _savefilepath_bin = f'{args.output_file[:-4]}.bin' total_anchors_bin_file_res.astype('float32').tofile(_savefilepath_bin) # .npy for python caffe _savefilepath_npy = f'{args.output_file[:-4]}.npy' np.save(_savefilepath_npy, total_anchors_norm) print(_savefilepath_bin) print(_savefilepath_npy) # 单独保存每个尺度的anchor # [[box1,box2, ...], [], ...] _savefilepath_pkl_per_levels = f'{args.output_file[:-4]}_per_level.pkl' f = open(_savefilepath_pkl_per_levels, 'wb') multi_level_anchors = [i.cpu().numpy() for i in multi_level_anchors] pickle.dump(multi_level_anchors, f) f.close() print(_savefilepath_pkl_per_levels) # 保存c++ 的头文件 _savefilepath_h_per_levels = f'{args.output_file[:-4]}_per_level.h' save_cplus_h(multi_level_anchors, _savefilepath_h_per_levels) print(_savefilepath_h_per_levels) print('done.')
调用案例:
int NCNN_TEST_SIGMOID() { std::vector<ncnn::Mat> priorboxes; int ret = vector_2_priorbox(ssd_anchor_all_levels, priorboxes); assert(ret == 0); int num_levels = priorboxes.size(); std::cout << "num_levels: " << num_levels << std::endl; // 网络参数设置: const int target_sizeW = 224; const int target_sizeH = 224; const int target_Channel = 1; const int NUM_CLASS = 1; // 前景类为0, sigmoid const float confidence_thresh = 0.7f; const float nms_threshold = 0.5f; const float means[4] = {0.f, 0.f, 0.f, 0.f}; const float variances[4] = {0.1f, 0.1f, 0.2f, 0.2f}; const float mean_vals[3] = {128.f, 128.f, 128.f}; const float norm_vals[3] = {1 / 128.f, 1 / 128.f, 1 / 128.f}; const std::string model_root = "../ncnn_models/repvgg"; const std::string param = model_root + "/epoch_635.deploy.pth.sim-opt.param"; const std::string bin = model_root + "/epoch_635.deploy.pth.sim-opt.bin"; ncnn::Net _net; _net.opt.use_vulkan_compute = false; _net.opt.num_threads = 1; _net.opt.lightmode = false; ret = _net.load_param(param.c_str()); assert(ret == 0); ret = _net.load_model(bin.c_str()); assert(ret == 0); std::vector<const char *> inputs = _net.input_names(); std::vector<const char *> ouputs = _net.output_names(); for (int k = 0; k < inputs.size(); ++k) { printf("input name: %s\n", inputs[k]); } // // for (int i = 0; i < _net.layers().size(); ++i) { // std::cout << 'layer: ' << _net.layers()[i]->name << std::endl; // } std::string root = "/media/xx"; std::vector<cv::String> files; cv::glob(root + "/*.png", files, true); assert(files.size() > 0); for (auto &f: files) { std::string fname = getFilename(f); std::cout << "read file: " << fname << std::endl; cv::Mat img = cv::imread(f, 0); assert(!img.empty()); cv::cvtColor(img, img, cv::COLOR_BGR2GRAY); // std::cout << img.channels() << std::endl; // preprocess const float scale_w = target_sizeW * 1.0 / img.cols; const float scale_h = target_sizeH * 1.0 / img.rows; ncnn::Mat in = ncnn::Mat::from_pixels_resize(img.data, ncnn::Mat::PIXEL_GRAY, img.cols, img.rows, target_sizeW, target_sizeH); in.substract_mean_normalize(mean_vals, norm_vals); ncnn::Extractor ex = _net.create_extractor(); ex.set_num_threads(4); ex.input("input", in); auto t1 = GET_CURRENT_MISEC(); // forward std::vector<BBoxRect> proposals; std::vector<std::vector<std::string>> output_names = { {"65", "66"}, // stride 16 {"67", "68"} // stride 32 }; assert(output_names.size() == priorboxes.size()); for (int j = 0; j < priorboxes.size(); ++j) { printf("=======================level:%d========================\n", j); int level_idx = j; ncnn::Mat cls_pred; // [N*M, H,W] ncnn::Mat dis_pred; // [N*4, H,W] ret = ex.extract(output_names[level_idx][0].c_str(), cls_pred); assert(0 == ret); ret = ex.extract(output_names[level_idx][1].c_str(), dis_pred); assert(0 == ret); printf("cls_pred.shape: %d,%d,%d\n", cls_pred.c, cls_pred.h, cls_pred.w); printf("dis_pred.shape: %d,%d,%d\n", dis_pred.c, dis_pred.h, dis_pred.w); assert(cls_pred.dims == dis_pred.dims); assert(cls_pred.dims == 3); // permute --> reshape --> scores-sigmoid --> filter --> bbox_decode --> nms ncnn::Mat cls_pred_permute, dis_pred_permute; ret = ncnn_permute(cls_pred, cls_pred_permute); assert(0 == ret); ret = ncnn_permute(dis_pred, dis_pred_permute); assert(0 == ret); printf("cls_pred_permute.dims: %d\n", cls_pred_permute.dims); printf("cls_pred_permute.shape: %d,%d,%d\n", cls_pred_permute.c, cls_pred_permute.h, cls_pred_permute.w); printf("dis_pred_permute.dims: %d\n", dis_pred_permute.dims); printf("dis_pred_permute.shape: %d,%d,%d\n", dis_pred_permute.c, dis_pred_permute.h, dis_pred_permute.w); // [h,w,c] --> [-1, N_CLASS] // [h,w,c] --> [-1, 4] int _h1 = cls_pred_permute.w * cls_pred_permute.c * cls_pred_permute.h / NUM_CLASS; ncnn::Mat cls_pred_reshape = cls_pred_permute.reshape(NUM_CLASS, _h1); int _h2 = dis_pred_permute.w * dis_pred_permute.c * dis_pred_permute.h / 4; ncnn::Mat dis_pred_reshape = dis_pred_permute.reshape(4, _h2); printf("cls_pred_reshape.dims: %d\n", cls_pred_reshape.dims); printf("cls_pred_reshape.shape: %d,%d,%d\n", cls_pred_reshape.c, cls_pred_reshape.h, cls_pred_reshape.w); printf("dis_pred_reshape.dims: %d\n", dis_pred_reshape.dims); printf("dis_pred_reshape.shape: %d,%d,%d\n", dis_pred_reshape.c, dis_pred_reshape.h, dis_pred_reshape.w); //softmax or sigmoid assert(cls_pred_reshape.dims == 2); assert(dis_pred_reshape.dims == 2); ret = ncnn_sigmoid_inplace(cls_pred_reshape);
//ret = ncnn_softmax_inplace(cls_pred_reshape, 1); assert(0 == ret); std::vector<BBoxRect> objects_stride_; assert(priorboxes[level_idx].h == cls_pred_reshape.h); generate_proposals2(cls_pred_reshape, dis_pred_reshape, level_idx, priorboxes[level_idx], confidence_thresh, target_sizeW, target_sizeH, scale_w, scale_h, objects_stride_); proposals.insert(proposals.end(), objects_stride_.begin(), objects_stride_.end()); } auto span = GET_CURRENT_MISEC() - t1; printf("cnn forward() elapsed: %.5f ms\n", span); if (proposals.empty()) { printf("warning: no detection.\n"); continue; } in.release(); ex.clear(); // postprocess // sort all proposals by score from highest to lowest qsort_descent_inplace(proposals, 0, proposals.size() - 1); // apply nms with nms_threshold std::vector<int> picked; nms_sorted_bboxes(proposals, picked, nms_threshold); int count = picked.size(); std::vector<BBoxRect> objects; objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; } // show bboxes if (img.channels() == 1) { cv::cvtColor(img, img, cv::COLOR_GRAY2BGR); } for (int i = 0; i < objects.size(); ++i) { float x0 = objects[i].xmin; float y0 = objects[i].ymin; float x1 = objects[i].xmax; float y1 = objects[i].ymax; printf("bbox %d(x1,y1,x2,y2,label,score): [%.2f,%.2f,%.2f,%.2f, %d, %.2f]\n", i, x0, y0, x1, y1, objects[i].label, objects[i].score); cv::rectangle(img, cv::Point2f(x0, y0), cv::Point2f(x1, y1), cv::Scalar(0, 0, 255), 2); char tmp[200]; sprintf(tmp, "%d(%.2f)", objects[i].label, objects[i].score); cv::putText(img, tmp, cv::Point2f(x0, y0), cv::FONT_HERSHEY_COMPLEX, 0.5, cv::Scalar(0, 0, 255)); } cv::imshow("1", img); cv::waitKey(0); objects.clear(); img.release(); } //release _net.clear(); priorboxes.clear(); return 0; }