c++ trt engine

//g++ inferPeopleNet.cpp `pkg-config --cflags --libs opencv4` -I /usr/local/cuda-10.2/include -I /usr/include/aarch64-linux-gnu/ -L /usr/lib/aarch64-linux-gnu -lnvinfer -lnvinfer_plugin -L /usr/local/cuda-10.2/lib64 -lcudart -lcublas -lcurand

/* OpenCV headers */
#include <opencv2/core/core.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgcodecs/imgcodecs.hpp>

#include <iostream>
#include <fstream>
#include <cudnn.h>
#include "NvInfer.h"
#include "NvInferPlugin.h"

//#define MIN(a,b) ((a) < (b) ? (a) : (b))
//#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define CLIP(a,min,max) (MAX(MIN(a, max), min))
#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)
  
using namespace cv;
using namespace std;

class Logger : public nvinfer1::ILogger
{
public:
	void log(nvinfer1::ILogger::Severity severity, const char* msg) override
	{
		// suppress info-level messages
		if (severity == Severity::kINFO) return;

		switch (severity)
		{
			case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
			case Severity::kERROR: std::cerr << "ERROR: "; break;
			case Severity::kWARNING: std::cerr << "WARNING: "; break;
			case Severity::kINFO: std::cerr << "INFO: "; break;
			default: std::cerr << "UNKNOWN: "; break;
		}
		std::cerr << msg << std::endl;
	}
};


int main()
{
	std::string engineFilePath = "/opt/nvidia/deepstream/deepstream-5.0/samples/models/tlt_pretrained_models/peoplenet/resnet34_peoplenet_pruned.etlt_b1_gpu0_fp16.engine";
	std::string imagePath = "test.jpg";

	// General parameters
	uint16_t m_InputH, m_OutputW;
	uint16_t m_InputW, m_OutputH;
	uint16_t m_InputC;
	uint64_t m_InputSize, m_OutputBBoxSize, m_OutputConfidenceSize;
	uint16_t m_NumOutputClasses;

	// TRT specific parameters
	uint16_t m_maxBatchSize = 1;
	int m_InputIndex = -1;
	int m_OutputBboxIndex = -1, m_OutputClassIndex = -1;

	Logger m_Logger;
	nvinfer1::ICudaEngine* m_Engine;
	nvinfer1::IExecutionContext* m_Context;
	nvinfer1::IRuntime* runtime;

	std::vector<void*> m_Bindings;
	std::vector<float*> m_TrtOutputBuffers;  
	cudaStream_t m_CudaStream;


	m_InputW = 960;
	m_InputH = 544;
	m_InputC = 3;
	m_OutputW = 60;
	m_OutputH = 34;
	m_NumOutputClasses = 3;

	m_InputSize = m_InputW * m_InputH * m_InputC;
	m_OutputBBoxSize = m_OutputW * m_OutputH * m_NumOutputClasses * 4;
	m_OutputConfidenceSize = m_OutputW * m_OutputH * m_NumOutputClasses;
	
	
	// Deserializing engine	
	// reading the model in memory
	std::cout << "[Info] Loading TRT Engine...\n";
	std::stringstream trtModelStream;
	trtModelStream.seekg(0, trtModelStream.beg);
	std::ifstream cache(engineFilePath);
	assert(cache.good());
	trtModelStream << cache.rdbuf();
	cache.close();

	// calculating model size
	trtModelStream.seekg(0, std::ios::end);
	const int modelSize = trtModelStream.tellg();
	trtModelStream.seekg(0, std::ios::beg);
	void* modelMem = malloc(modelSize);
	trtModelStream.read((char*) modelMem, modelSize);

	runtime = nvinfer1::createInferRuntime(m_Logger);
	m_Engine = runtime->deserializeCudaEngine(modelMem, modelSize, nullptr);
	free(modelMem);
	runtime->destroy();
	std::cout << "[Info] Loading Complete!\n";

	if(m_Engine == nullptr)
	{
		std::cout << "[Error] TensorRT engine loading failed\n";
		return -1;	
	}

	m_Context = m_Engine->createExecutionContext();
	if(m_Context == nullptr)
	{
		std::cout << "[Error] TensorRT getting context failed\n";
		return -2;	
	}

	// Get the bindings
	std::cout << "[Info] Getting the Bindings...\n";
	m_Bindings.resize(m_Engine->getNbBindings(), nullptr);
	m_TrtOutputBuffers.resize(m_Engine->getNbBindings() - 1, nullptr);	
	m_InputIndex = m_Engine->getBindingIndex("input_1");
	m_OutputBboxIndex = m_Engine->getBindingIndex("output_bbox/BiasAdd");
	m_OutputClassIndex = m_Engine->getBindingIndex("output_cov/Sigmoid");
	if (m_InputIndex == -1 || m_OutputBboxIndex == -1 || m_OutputClassIndex == -1)
	{
		std::cout << "[Error] TensorRT binding not found\n";
		return -3;
	}
	std::cout << "[Info] Bindings size : " << m_Engine->getNbBindings() << "\n";
	std::cout << "[Info] Bindings " << m_InputIndex << " " << m_OutputBboxIndex << " " << m_OutputClassIndex << "\n";
	
	// Allocate Buffers	
	(cudaMalloc(&m_Bindings.at(m_InputIndex), m_maxBatchSize * m_InputSize * sizeof(float)));
	(cudaMalloc(&m_Bindings.at(m_OutputBboxIndex), m_maxBatchSize * m_OutputBBoxSize * sizeof(float)));
	(cudaMalloc(&m_Bindings.at(m_OutputClassIndex), m_maxBatchSize * m_OutputConfidenceSize * sizeof(float)));
	(cudaMallocHost(&m_TrtOutputBuffers[0], m_OutputBBoxSize * m_maxBatchSize * sizeof(float)));
	(cudaMallocHost(&m_TrtOutputBuffers[1], m_OutputConfidenceSize * m_maxBatchSize * sizeof(float)));
	(cudaStreamCreate(&m_CudaStream));


	// Loading input image to device
	std::cout << "[Info] Loading input image\n";
	Mat inputImage = imread(imagePath);
	Mat inferImage = cv::dnn::blobFromImage(inputImage, 0.0039215697906911373, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), true, false);
	//Mat inferImage = cv::dnn::blobFromImage(inputImage, 1.0, cv::Size(m_InputW, m_InputH), cv::Scalar(0.0, 0.0, 0.0), false, false);
	cudaMemcpyAsync(m_Bindings.at(m_InputIndex), inputImage.data,
								  m_maxBatchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice,
								  m_CudaStream);

	// Running Inference
	std::cout << "[Info] Running Inference\n";
	m_Context->enqueue(m_maxBatchSize, m_Bindings.data(), m_CudaStream, nullptr);

	cudaMemcpyAsync(m_TrtOutputBuffers.at(0), m_Bindings.at(m_OutputBboxIndex),
								  m_maxBatchSize * m_OutputBBoxSize * sizeof(float),
								  cudaMemcpyDeviceToHost, m_CudaStream);
	cudaMemcpyAsync(m_TrtOutputBuffers.at(1), m_Bindings.at(m_OutputClassIndex),
								  m_maxBatchSize * m_OutputConfidenceSize * sizeof(float),
								  cudaMemcpyDeviceToHost, m_CudaStream);

	// Decoding output buffers
	std::cout << "[Info] Decoding the output Buffers\n";
	int gridW = m_OutputW;
	int gridH = m_OutputH;
	int gridSize = gridW * gridH;
	float gcCentersX[gridW];
	float gcCentersY[gridH];
	float bboxNormX = 35.0;
	float bboxNormY = 35.0;
	float* outputBboxBuf = &m_TrtOutputBuffers.at(0)[0];
	float* outputCovBuf = &m_TrtOutputBuffers.at(1)[0];

	int strideX = DIVIDE_AND_ROUND_UP(m_InputW, gridW);
	int strideY = DIVIDE_AND_ROUND_UP(m_InputH, gridH);

	for (int i = 0; i < gridW; i++)
	{
		gcCentersX[i] = (float)(i * strideX + 0.5);
		gcCentersX[i] /= (float)bboxNormX;
	}
	for (int i = 0; i < gridH; i++)
	{
		gcCentersY[i] = (float)(i * strideY + 0.5);
		gcCentersY[i] /= (float)bboxNormY;
	}

	for (int c = 0; c < m_NumOutputClasses; c++)
	{
		float *outputX1 = outputBboxBuf + (c * 4 * gridW * gridH);

		float *outputY1 = outputX1 + gridSize;
		float *outputX2 = outputY1 + gridSize;
		float *outputY2 = outputX2 + gridSize;

		float threshold = 0.1;//detectionParams.perClassPreclusterThreshold[c];
		for (int h = 0; h < gridH; h++)
		{
			for (int w = 0; w < gridW; w++)
			{
				int i = w + h * gridW;
				float confidence = outputCovBuf[c * gridSize + i];
				if (confidence >= threshold)
				{
					//NvDsInferObjectDetectionInfo object;
					float rectX1f, rectY1f, rectX2f, rectY2f;

					rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX;
					rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY;
					rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX;
					rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY;

					//object.classId = c;
					//object.detectionConfidence = outputCovBuf[c * gridSize + i];

					/* Clip object box co-ordinates to network resolution */
					rectX1f = CLIP(rectX1f, 0, m_InputW - 1);
					rectY1f = CLIP(rectY1f, 0, m_InputH - 1);
					rectX2f = CLIP(rectX2f, 0, m_InputW - 1);
					rectY2f = CLIP(rectY2f, 0, m_InputH - 1);

					//Prevent underflows
					if(((rectX2f - rectX1f) < 0) || ((rectY2f - rectY1f) < 0))
						continue;

					// Detected boxes
					std::cout << "[Info] ClassIdx : " << c << " BBox : " << rectX1f << "," << rectY1f << "," << (rectX2f) << "," << (rectY2f) << "," << confidence << "\n";          
				}
			}
		}
	}
	//imshow("Display", inputImage);
	//waitKey();

	return 0;
}

 

posted on 2020-07-04 16:58  cdekelon  阅读(442)  评论(0编辑  收藏  举报

导航