本文是基于TensorRT 5.0.2基础上,关于其内部的introductory_parser_samples例子的分析和介绍。python
假设当前路径为:数组
TensorRT-5.0.2.6/samples
其对应当前例子文件目录树为:网络
# tree python python/ ├── common.py ├── introductory_parser_samples │ ├── caffe_resnet50.py │ ├── onnx_resnet50.py │ ├── README.md │ ├── requirements.txt │ └── uff_resnet50.py
该例子展现如何使用TensorRT和包含的对应解析器(UFF,Caffe,ONNX解析器),基于在不一样框架下训练的ResNet-50结构来进行inference。app
- caffe_resnet50: 该例子展现如何构建基于Caffe解析器去解析Caffe训练的模型,并构建引擎而后进行inference;
- onnx_resnet50:该例子展现如何基于开源的ONNX解析ONNX模型,并inference;
- uff_resnet50: 该例子展现如何从一个UFF模型文件(从一个tf protobuf转换过来)构建引擎,而后inference。
所须要的文件内容包含:框架
/TensorRT-5.0.2.6/python/data/resnet50/ ├── binoculars-cc0.jpeg ├── binoculars.jpeg ├── canon-cc0.jpeg ├── class_labels.txt ├── mug-cc0.jpeg ├── reflex_camera.jpeg ├── ResNet50_fp32.caffemodel ├── resnet50-infer-5.uff ├── ResNet50_N2.prototxt ├── ResNet50.onnx └── tabby_tiger_cat.jpg
先上完整代码,从main函数开始,逐个调用外部的参数完成整个流程,整个代码仍是挺简单的:dom
# 该例子使用Caffe ResNet50 模型去建立一个TensorRT Inference Engine import random import argparse from collections import namedtuple from PIL import Image import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os # sys.path.insert(1, os.path.join(sys.path[0], "..")) # import common # 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。 def GiB(val): '''以GB为单位,计算所须要的存储值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''该函数就是一个参数解析函数。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 为了简洁,这里直接将路径硬编码到代码中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE']) ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel", DEPLOY_PATH = "ResNet50_N2.prototxt", INPUT_SHAPE = (3, 224, 224), OUTPUT_NAME = "prob", DTYPE = trt.float32 ) # 能够将TensorRT数据类型用trt.nptype()转换到numpy类型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:构建一个tensorRT engine ''' # The Caffe path is used for Caffe2 models. def build_engine_caffe(model_file, deploy_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.CaffeParser() as parser: # Workspace size是builder在构建engine时候最大可使用的内存大小,其越高越好 builder.max_workspace_size = GiB(1) # 载入caffe模型,而后进行解析,并填充TensorRT的network。该函数返回一个对象,其能够经过name进行检索tensors model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # 对于caffe,须要手动标记网络的输出;由于咱们本来就该知道输出tensor的name,因此能够直接找到 network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而后建立一个流 ''' def allocate_buffers(engine): # 设定维度,而后在host端内存建立page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 为输入和输出分配device端内存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一个流来copy输入/输出,并用于执行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:读取测试样本,并归一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 将输入图像变换成CHW Numpy数组 def normalize_image(image): c, h, w = ModelData.INPUT_SHAPE return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 归一化该图片,而后copy到内存设定的pagelocked buffer区域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:执行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 将输入数据移动到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 执行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 将结果从device端移动到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 读取模型文件,测试样本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, ModelData.DEPLOY_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三张测试图片 caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件,部署文件和标签文件 labels = open(labels_file, 'r').read().split('\n') # 读取标签 ''' 2 - 用build_engine_caffe函数构建一个TensorRT engine. ''' with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine: # Inference不论用哪一个parser构建engine都是这个流程 ''' 3 - 分配buffer和建立一个CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用于执行inference ''' with engine.create_execution_context() as context: ''' 选择测试样本,而后进行归一化,并塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 运行该engine。输出是一个1000的向量,每一个值表示分到该类的几率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,并将其索引映射到对应的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
从下面的代码和上面例子代码进行对比,发现仍是相对一致的流程,就是其中个别函数有所不一样。async
# # 该例子使用ONNX ResNet50 模型去建立一个TensorRT Inference Engine import random from PIL import Image from collections import namedtuple import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os # import common # 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。 def GiB(val): '''以GB为单位,计算所须要的存储值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''该函数就是一个参数解析函数。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 为了简洁,这里直接将路径硬编码到代码中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE', 'DTYPE']) ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx", INPUT_SHAPE = (3, 224, 224), DTYPE = trt.float32 ) # 能够将TensorRT数据类型用trt.nptype()转换到numpy类型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:构建一个tensorRT engine ''' # The Onnx path is used for Onnx models. def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: # Workspace size是builder在构建engine时候最大可使用的内存大小,其越高越好 builder.max_workspace_size = GiB(1) ''' 载入caffe模型,而后进行解析,并填充TensorRT的network''' with open(model_file, 'rb') as model: parser.parse(model.read()) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而后建立一个流 ''' def allocate_buffers(engine): # 设定维度,而后在host端内存建立page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 为输入和输出分配device端内存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一个流来copy输入/输出,并用于执行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:读取测试样本,并归一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 将输入图像变换成CHW Numpy数组 def normalize_image(image): '''这个函数与第一个例子略有不一样 ''' c, h, w = ModelData.INPUT_SHAPE image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 该ResNet 5-须要一些预处理,特别是均值归一化 return (image_arr / 255.0 - 0.45) / 0.225 # 归一化该图片,而后copy到内存设定的pagelocked buffer区域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:执行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 将输入数据移动到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 执行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 将结果从device端移动到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 读取模型文件,测试样本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三张测试图片 onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和标签文件 labels = open(labels_file, 'r').read().split('\n') # 读取标签 ''' 2 - 用build_engine_onnx函数构建一个TensorRT engine. ''' with build_engine_onnx(onnx_model_file) as engine: # Inference不论用哪一个parser构建engine都是这个流程,由于这里都是resnet-50结构 ''' 3 - 分配buffer和建立一个CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用于执行inference ''' with engine.create_execution_context() as context: ''' 选择测试样本,而后进行归一化,并塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 运行该engine。输出是一个1000的向量,每一个值表示分到该类的几率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,并将其索引映射到对应的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
从下面的例子能够看出,这三个例子流程大体一致,只有个别区域有少量变化。
UFF是TensorRT内部使用的统一框架格式,用于表示优化前的网络结构图,能够将诸如pb等模型格式先转换成uff格式tensorrt-3-faster-tensorflow-inference函数
# 该例子使用UFF ResNet50 模型去建立一个TensorRT Inference Engine import random from PIL import Image import numpy as np import pycuda.driver as cuda import pycuda.autoinit # 该import会让pycuda自动管理CUDA上下文的建立和清理工做 import tensorrt as trt import sys, os #sys.path.insert(1, os.path.join(sys.path[0], "..")) # import common # 这里将common中的GiB和find_sample_data函数移动到该py文件中,保证自包含。 def GiB(val): '''以GB为单位,计算所须要的存储值,向左位移10bit表示KB,20bit表示MB ''' return val * 1 << 30 def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]): '''该函数就是一个参数解析函数。 Parses sample arguments. Args: description (str): Description of the sample. subfolder (str): The subfolder containing data relevant to this sample find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path. Returns: str: Path of data directory. Raises: FileNotFoundError ''' # 为了简洁,这里直接将路径硬编码到代码中。 data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/") subfolder_path = os.path.join(data_root, subfolder) if not os.path.exists(subfolder_path): print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.") data_path = subfolder_path if os.path.exists(subfolder_path) else data_root if not (os.path.exists(data_path)): raise FileNotFoundError(data_path + " does not exist.") for index, f in enumerate(find_files): find_files[index] = os.path.abspath(os.path.join(data_path, f)) if not os.path.exists(find_files[index]): raise FileNotFoundError(find_files[index] + " does not exist. ") if find_files: return data_path, find_files else: return data_path #----------------- class ModelData(object): MODEL_PATH = "resnet50-infer-5.uff" INPUT_NAME = "input" INPUT_SHAPE = (3, 224, 224) OUTPUT_NAME = "GPU_0/tower_0/Softmax" # We can convert TensorRT data types to numpy types with trt.nptype() DTYPE = trt.float32 _ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE']) ModelData = _ModelData( MODEL_PATH = "resnet50-infer-5.uff", INPUT_NAME = "input", INPUT_SHAPE = (3, 224, 224), OUTPUT_NAME = "GPU_0/tower_0/Softmax", DTYPE = trt.float32 ) # 能够将TensorRT数据类型用trt.nptype()转换到numpy类型 TRT_LOGGER = trt.Logger(trt.Logger.WARNING) '''main中第二步:构建一个tensorRT engine ''' # The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility. def build_engine_uff(model_file): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.UffParser() as parser: # Workspace size是builder在构建engine时候最大可使用的内存大小,其越高越好 builder.max_workspace_size = GiB(1) ''' 这里须要手动注册输入和输出节点到UFF''' parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) ''' 载入UFF模型,而后进行解析,并填充TensorRT的network''' parser.parse(model_file, network) return builder.build_cuda_engine(network) '''main中第三步:分配host和device端的buffers,而后建立一个流 ''' def allocate_buffers(engine): # 设定维度,而后在host端内存建立page-locked memory buffers (i.e. won't be swapped to disk)去存储host端的输入/输出。 h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE)) h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE)) # 为输入和输出分配device端内存. d_input = cuda.mem_alloc(h_input.nbytes) d_output = cuda.mem_alloc(h_output.nbytes) # 建立一个流来copy输入/输出,并用于执行inference。 stream = cuda.Stream() return h_input, d_input, h_output, d_output, stream '''main中第四步:读取测试样本,并归一化 ''' def load_normalized_test_case(test_image, pagelocked_buffer): # 将输入图像变换成CHW Numpy数组 def normalize_image(image): c, h, w = ModelData.INPUT_SHAPE return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel() # 归一化该图片,而后copy到内存设定的pagelocked buffer区域. np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image))) return test_image '''main中第五步:执行inference ''' def do_inference(context, h_input, d_input, h_output, d_output, stream): # 将输入数据移动到GPU的device端 cuda.memcpy_htod_async(d_input, h_input, stream) # 执行inference context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle) # 将结果从device端移动到host端 cuda.memcpy_dtoh_async(h_output, d_output, stream) # 同步流操做 stream.synchronize() def main(): ''' 1 - 读取模型文件,测试样本等等 ''' data_path, data_files = find_sample_data( description="Runs a ResNet50 network with a TensorRT inference engine.", subfolder="resnet50", find_files=["binoculars.jpeg", "reflex_camera.jpeg", "tabby_tiger_cat.jpg", ModelData.MODEL_PATH, "class_labels.txt"]) test_images = data_files[0:3] # 三张测试图片 uff_model_file, labels_file = data_files[3:] # UFF模型文件和标签文件 labels = open(labels_file, 'r').read().split('\n') # 读取标签 ''' 2 - 用build_engine_uff函数构建一个TensorRT engine. ''' with build_engine_uff(uff_model_file) as engine: # Inference不论用哪一个parser构建engine都是这个流程,由于这里都是resnet-50结构 ''' 3 - 分配buffer和建立一个CUDA流. ''' h_input, d_input, h_output, d_output, stream = allocate_buffers(engine) ''' 4 - 下面的context用于执行inference ''' with engine.create_execution_context() as context: ''' 选择测试样本,而后进行归一化,并塞入host端的page-locked buffer ''' test_image = random.choice(test_images) test_case = load_normalized_test_case(test_image, h_input) # 运行该engine。输出是一个1000的向量,每一个值表示分到该类的几率。 do_inference(context, h_input, d_input, h_output, d_output, stream) # 提取最高几率的元素,并将其索引映射到对应的label上 pred = labels[np.argmax(h_output)] if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]: print("Correctly recognized " + test_case + " as " + pred) else: print("Incorrectly recognized " + test_case + " as " + pred) if __name__ == '__main__': main()
.测试