PyTorch是一种开源机器学习框架,而TensorRT是一个针对深度学习推理的高性能推理软件库。PyTorch模型转换为TensorRT可以提高其推理效率。下面是PyTorch模型转TensorRT的步骤:
第一步:安装TensorRT
在官网上下载最新版的TensorRT,根据 CUDA(compute unified device architecture) 版本进行选择,安装完成后,需要执行以下步骤:
- (1)添加TensorRT到环境变量中
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-VERSION/lib:/usr/local/cuda-VERSION/lib64
其中,VERSION为TensorRT和CUDA的版本号。
- (2)测试TensorRT
/usr/local/TensorRT-VERSION/bin/trtexec --model=<model_path> --shape=data:3,<input_shape>,1 --saveEngine=<engine_path>
其中,
第二步:导出PyTorch模型到ONNX格式
TensorRT支持ONNX格式,因此需要将PyTorch模型先转换为ONNX格式。下面是一个示例:
import torch
import torchvision
import onnx
from onnx import optimizer
model = torchvision.models.resnet50(pretrained=True)
dummy_input = torch.randn(1, 3, 224, 224)
input_names = ["input1"]
output_names = ["output1"]
torch.onnx.export(model, dummy_input, "resnet50.onnx", verbose=True, input_names=input_names, output_names=output_names)
optimized_model = onnx.load("resnet50.onnx")
passes = ["eliminate_unused_initializer", "eliminate_nop_dropout", "eliminate_nop_transpose", "eliminate_identity"]
opt_model = optimizer.optimize(optimized_model, passes)
onnx.save(opt_model, "resnet50_opt.onnx")
第三步:加载并优化ONNX模型
将ONNX模型加载到TensorRT,并设置优化项,以生成高效的TensorRT引擎。下面是一个示例:
import tensorrt as trt
import onnx
import numpy as np
# 加载ONNX模型到TensorRT
model_path = "resnet50_opt.onnx"
engine_path = "resnet50.trt"
max_batch_size = 1
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_creation_flag) as network, \
trt.OnnxParser(network, TRT_LOGGER) as parser, builder.create_builder_config() as config:
builder.max_batch_size = max_batch_size
builder.max_workspace_size = 1 << 30
config.max_workspace_size = 1 << 30
with open(model_path, 'rb') as model:
if not parser.parse(model.read()):
print('ERROR: Failed to parse the ONNX file {}.'.format(model_path))
for error in range(parser.num_errors):
print(parser.get_error(error))
exit(0)
network.get_input(0).shape = [max_batch_size, 3, 224, 224]
config.add_optimization_profile(profile=trt.OptimizationProfile().set_batch_size_profile([max_batch_size], [np.zeros((max_batch_size,3,224,224), dtype=np.float32), np.zeros((max_batch_size,), dtype=np.int64)]))
engine = builder.build_engine(network, config)
with open(engine_path, "wb") as f:
f.write(engine.serialize())
print("TensorRT engine has been saved to file: {}".format(engine_path))
示例
下面给出一个示例,说明如何将PyTorch模型转换为TensorRT。
假设有以下的PyTorch模型:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 64, 3)
self.conv2 = nn.Conv2d(64, 128, 3)
self.dropout1 = nn.Dropout2d(0.25)
self.fc1 = nn.Linear(73728, 256)
self.fc2 = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = x.view(-1, 73728)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
首先,需要将该模型转换为ONNX格式:
import torch
import torchvision
import onnx
from onnx import optimizer
model = Net()
dummy_input = torch.randn(1, 3, 224, 224)
input_names = ["input1"]
output_names = ["output1"]
torch.onnx.export(model, dummy_input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)
optimized_model = onnx.load("model.onnx")
passes = ["eliminate_unused_initializer", "eliminate_nop_dropout", "eliminate_nop_transpose", "eliminate_identity"]
opt_model = optimizer.optimize(optimized_model, passes)
onnx.save(opt_model, "model_opt.onnx")
然后,需要将转换后的ONNX模型加载到TensorRT,并进行优化:
import tensorrt as trt
import onnx
import numpy as np
# 加载ONNX模型到TensorRT
model_path = "model_opt.onnx"
engine_path = "model.trt"
max_batch_size = 1
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_creation_flag) as network, \
trt.OnnxParser(network, TRT_LOGGER) as parser, builder.create_builder_config() as config:
builder.max_batch_size = max_batch_size
builder.max_workspace_size = 1 << 30
config.max_workspace_size = 1 << 30
with open(model_path, 'rb') as model:
if not parser.parse(model.read()):
print('ERROR: Failed to parse the ONNX file {}.'.format(model_path))
for error in range(parser.num_errors):
print(parser.get_error(error))
exit(0)
network.get_input(0).shape = [max_batch_size, 3, 224, 224]
config.add_optimization_profile(profile=trt.OptimizationProfile().set_batch_size_profile([max_batch_size], [np.zeros((max_batch_size,3,224,224), dtype=np.float32)]))
engine = builder.build_engine(network, config)
with open(engine_path, "wb") as f:
f.write(engine.serialize())
print("TensorRT engine has been saved to file: {}".format(engine_path))
将模型转换为TensorRT后,可以使用以下代码对其进行推理:
import tensorrt as trt
# 加载TensorRT引擎
engine_file = "model.trt"
with open(engine_file, "rb") as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
# 创建推理上下文
input_shape = [1, 3, 224, 224]
output_shape = [1, 10]
device_input = cuda.mem_alloc(trt.volume(input_shape) * trt.float32.itemsize)
device_output = cuda.mem_alloc(trt.volume(output_shape) * trt.float32.itemsize)
stream = cuda.Stream()
# 执行推理
with engine.create_execution_context() as context:
input_data = np.random.random(size=input_shape).astype(np.float32)
cuda.memcpy_htod_async(device_input, input_data, stream)
context.execute_async_v2(host_bindings=[int(device_input), int(device_output)], stream_handle=stream.handle)
output_data = np.empty(output_shape, dtype=np.float32)
cuda.memcpy_dtoh_async(output_data, device_output, stream)
stream.synchronize()
print("Output shape: {}".format(output_data.shape))
print("Output: {}".format(output_data))
这里使用了CUDA来管理输入和输出的Tensor,并执行TensorRT推理。注意,因为TensorRT对输入形状的限制比PyTorch更严格,因此在加载模型时需要注意输入形状的设置。这会影响到TensorRT引擎的生成和TensorRT推理的正确性。
以上就是PyTorch模型转换为TensorRT的完整攻略和两个示例。