TensorRT&Sample&Python[introductory_parser_samples]
本文是基于TensorRT 5.0.2基础上,关于其内部的introductory_parser_samples例子的分析和介绍。
1 引言
假设当前路径为:
TensorRT-5.0.2.6/samples
其对应当前例子文件目录树为:
# tree python
python/
├── common.py
├── introductory_parser_samples
│ ├── caffe_resnet50.py
│ ├── onnx_resnet50.py
│ ├── README.md
│ ├── requirements.txt
│ └── uff_resnet50.py
该例子展示如何使用TensorRT和包含的对应解析器(UFF,Caffe,ONNX解析器),基于在不同框架下训练的ResNet-50结构来进行inference。
- caffe_resnet50: 该例子展示如何构建基于Caffe解析器去解析Caffe训练的模型,并构建引擎然后进行inference;
- onnx_resnet50:该例子展示如何基于开源的ONNX解析ONNX模型,并inference;
- uff_resnet50: 该例子展示如何从一个UFF模型文件(从一个tf protobuf转换过来)构建引擎,然后inference。
2 caffe_resnet50
所需要的文件内容包含:
/TensorRT-5.0.2.6/python/data/resnet50/
├── binoculars-cc0.jpeg
├── binoculars.jpeg
├── canon-cc0.jpeg
├── class_labels.txt
├── mug-cc0.jpeg
├── reflex_camera.jpeg
├── ResNet50_fp32.caffemodel
├── resnet50-infer-5.uff
├── ResNet50_N2.prototxt
├── ResNet50.onnx
└── tabby_tiger_cat.jpg
先上完整代码,从main函数开始,逐个调用外部的参数完成整个流程,整个代码还是挺简单的:
# 该例子使用Caffe ResNet50 模型去创建一个TensorRT Inference Engine
import random
import argparse
from collections import namedtuple
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作
import tensorrt as trt
import sys, os
# sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
'''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''该函数就是一个参数解析函数。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 为了简洁,这里直接将路径硬编码到代码中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel",
DEPLOY_PATH = "ResNet50_N2.prototxt",
INPUT_SHAPE = (3, 224, 224),
OUTPUT_NAME = "prob",
DTYPE = trt.float32 ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:构建一个tensorRT engine '''
# The Caffe path is used for Caffe2 models.
def build_engine_caffe(model_file, deploy_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.CaffeParser() as parser:
# Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
builder.max_workspace_size = GiB(1)
# 载入caffe模型,然后进行解析,并填充TensorRT的network。该函数返回一个对象,其可以通过name进行检索tensors
model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
# 对于caffe,需要手动标记网络的输出;因为我们原本就该知道输出tensor的name,所以可以直接找到
network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):
# 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 为输入和输出分配device端内存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 创建一个流来copy输入/输出,并用于执行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 将输入图像变换成CHW Numpy数组
def normalize_image(image):
c, h, w = ModelData.INPUT_SHAPE
return np.asarray(image.resize((w, h),
Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 将输入数据移动到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 执行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 将结果从device端移动到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 读取模型文件,测试样本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
ModelData.DEPLOY_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三张测试图片
caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件,部署文件和标签文件
labels = open(labels_file, 'r').read().split('\n') # 读取标签
''' 2 - 用build_engine_caffe函数构建一个TensorRT engine. '''
with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine:
# Inference不论用哪个parser构建engine都是这个流程
''' 3 - 分配buffer和创建一个CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用于执行inference '''
with engine.create_execution_context() as context:
''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,并将其索引映射到对应的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
3 onnx_resnet50
从下面的代码和上面例子代码进行对比,发现还是相对一致的流程,就是其中个别函数有所不同。
# # 该例子使用ONNX ResNet50 模型去创建一个TensorRT Inference Engine
import random
from PIL import Image
from collections import namedtuple
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作
import tensorrt as trt
import sys, os
# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
'''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''该函数就是一个参数解析函数。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 为了简洁,这里直接将路径硬编码到代码中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx",
INPUT_SHAPE = (3, 224, 224),
DTYPE = trt.float32 ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:构建一个tensorRT engine '''
# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.OnnxParser(network, TRT_LOGGER) as parser:
# Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
builder.max_workspace_size = GiB(1)
''' 载入caffe模型,然后进行解析,并填充TensorRT的network'''
with open(model_file, 'rb') as model:
parser.parse(model.read())
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):
# 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 为输入和输出分配device端内存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 创建一个流来copy输入/输出,并用于执行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 将输入图像变换成CHW Numpy数组
def normalize_image(image):
'''这个函数与第一个例子略有不同 '''
c, h, w = ModelData.INPUT_SHAPE
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 该ResNet 5-需要一些预处理,特别是均值归一化
return (image_arr / 255.0 - 0.45) / 0.225
# 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 将输入数据移动到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 执行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 将结果从device端移动到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 读取模型文件,测试样本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三张测试图片
onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和标签文件
labels = open(labels_file, 'r').read().split('\n') # 读取标签
''' 2 - 用build_engine_onnx函数构建一个TensorRT engine. '''
with build_engine_onnx(onnx_model_file) as engine:
# Inference不论用哪个parser构建engine都是这个流程,因为这里都是resnet-50结构
''' 3 - 分配buffer和创建一个CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用于执行inference '''
with engine.create_execution_context() as context:
''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,并将其索引映射到对应的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
4 uff_resnet50
从下面的例子可以看出,这三个例子流程大致一致,只有个别区域有少许变化。
UFF是TensorRT内部使用的统一框架格式,用于表示优化前的网络结构图,可以将诸如pb等模型格式先转换成uff格式tensorrt-3-faster-tensorflow-inference
# 该例子使用UFF ResNet50 模型去创建一个TensorRT Inference Engine
import random
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的创建和清理工作
import tensorrt as trt
import sys, os
#sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。
def GiB(val):
'''以GB为单位,计算所需要的存储值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''该函数就是一个参数解析函数。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 为了简洁,这里直接将路径硬编码到代码中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
class ModelData(object):
MODEL_PATH = "resnet50-infer-5.uff"
INPUT_NAME = "input"
INPUT_SHAPE = (3, 224, 224)
OUTPUT_NAME = "GPU_0/tower_0/Softmax"
# We can convert TensorRT data types to numpy types with trt.nptype()
DTYPE = trt.float32
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(
MODEL_PATH = "resnet50-infer-5.uff",
INPUT_NAME = "input",
INPUT_SHAPE = (3, 224, 224),
OUTPUT_NAME = "GPU_0/tower_0/Softmax",
DTYPE = trt.float32 ) # 可以将TensorRT数据类型用trt.nptype()转换到numpy类型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:构建一个tensorRT engine '''
# The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility.
def build_engine_uff(model_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.UffParser() as parser:
# Workspace size是builder在构建engine时候最大可以使用的内存大小,其越高越好
builder.max_workspace_size = GiB(1)
''' 这里需要手动注册输入和输出节点到UFF'''
parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
parser.register_output(ModelData.OUTPUT_NAME)
''' 载入UFF模型,然后进行解析,并填充TensorRT的network'''
parser.parse(model_file, network)
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后创建一个流 '''
def allocate_buffers(engine):
# 设定维度,然后在host端内存创建page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 为输入和输出分配device端内存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 创建一个流来copy输入/输出,并用于执行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:读取测试样本,并归一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 将输入图像变换成CHW Numpy数组
def normalize_image(image):
c, h, w = ModelData.INPUT_SHAPE
return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 归一化该图片,然后copy到内存设定的pagelocked buffer区域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:执行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 将输入数据移动到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 执行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 将结果从device端移动到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 读取模型文件,测试样本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三张测试图片
uff_model_file, labels_file = data_files[3:] # UFF模型文件和标签文件
labels = open(labels_file, 'r').read().split('\n') # 读取标签
''' 2 - 用build_engine_uff函数构建一个TensorRT engine. '''
with build_engine_uff(uff_model_file) as engine:
# Inference不论用哪个parser构建engine都是这个流程,因为这里都是resnet-50结构
''' 3 - 分配buffer和创建一个CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用于执行inference '''
with engine.create_execution_context() as context:
''' 选择测试样本,然后进行归一化,并塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 运行该engine。输出是一个1000的向量,每个值表示分到该类的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,并将其索引映射到对应的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
.