1 平台:
jetson Nano + 128gtf卡+ jetpack:jetson-nano-sd-r32.1-2019-03-18 .zip
sampleSSD目录:/usr/src/tensorrt/sample/sampleSSD/
使用的data文件在“:/usr/src/tensorrt/data/ssd/
编译之后的可执行文件在:/usr/src/tensorrt/bin/
2 环境准备:
2.1 主要为了装上pillow
sudo apt-get -y install cmake
sudo apt-get install python3
sudo apt-get install python3-pip
sudo apt-get install python3-dev
sudo apt install libopenblas-dev libatlas-dev liblapack-dev
sudo apt install liblapacke-dev checkinstall
sudo pip3 install numpy scipy
sudo pip3 install pyyaml
sudo pip3 install scikit-build
sudo apt install libffi-dev
sudo pip3 install cffi
sudo apt-get install -qq protobuf-compiler python-pil python-lxml python-tk
sudo pip3 install -q Cython contextlib2 lxml matplotlib
sudo pip3 install -q pycocotools
sudo pip3 install -q pillow
2.3 8gb交换空间设置
sudo fallocate -l 8G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
echo "/swapfile swap swap defaults 0 0" | sudo tee --append /etc/fstab > /dev/null
3 文件准备
(这一步可以把PrepareINT8CalibrationBatches.sh和batchPrepare.py文件拷贝到其他主机上进行):
3.1a 下载VOC数据集并且转行成batches文件
sampleSSD目录:/usr/src/tensorrt/sample/sampleSSD/ 下的 PrepareINT8CalibrationBatches.sh
sudo /usr/src/tensorrt/sample/sampleSSD/ PrepareINT8CalibrationBatches.sh
等待下载voc数据集 并且转换batches(/usr/src/tensorrt/data/ssd/batches/)
3.1b如果你已经有了voc数据集(2007或者2012)
可以直接运行batchPrepare.py 直接进行batches转换:
python batchPrepare.py --inDir ./VOC2007/JPEGImages/ --outDir /usr/src/tensorrt/data/ssd/
转换完确认.batch 文件在/usr/src/tensorrt/data/ssd/batches/中
4.caffe模型下载并准备
4.1 model
models_VGGNet_VOC0712_SSD_300x300.tar.gz 链接
https://drive.google.com/file/d/0BzKzrI_SkD1_WVVTSmQxU0dVRzA/view
解压:
把其中的VGG_VOC0712_SSD_300x300_iter_120000.caffemodel 移动到/usr/src/tensorrt/data/ssd/
4.2 ssd.prototxt
修改上面解压的里面的deploy.prototxt 文件名为ssd.prototxt 并且按照官方文档修改内容:
Edit the deploy.prototxt file and change all the Flatten layers to Reshape operations with the following parameters:
把所有的Flatten层 都改成以下内容,type名记得更改为Reshape
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
Update the detection_out layer by adding the keep_count output, for example, add:
把以下内容加在top: "detection_out"下面
top: "keep_count"
Rename the deploy.prototxt file to ssd.prototxt and run the sample.
本文末尾处附上我修改好的ssd.prototxt!
5 编译运行
5.1 编译
cd /usr/src/tensorrt/sample/sampleSSD/
sudo make
5.2 运行
直接运行会卡死,必须关闭图形界面
ctrl+alt+ f3
登陆
sudo su
init 3#关闭图形 init 5 重启图形
cd /usr/src/tensorrt/bin/
./sample_ssd --mode INT8
加载网络很慢很慢,大概等待10分钟出结果
会输出检测bus图的里面的一年巴士汽车的结果,输出分数和边框坐标
在/usr/src/tensorrt/bin/中会有检测的结果图
附上我自己的测速结果:
jetson Nano + ssd+ int8 tensorrt+300*300*3 = 单张图像检测延时:300ms
本位末尾会附上我自己修改的sampleSSD.cpp
重新make 然后运行 repeat 是重复次数,重复多次以计算代码运算速度
还加上了对流程中各个部分的时间花费计算
./sample_ssd --mode INT8 --repeat 2000
6.附件
6.1修改好的ssd.prototxt:
name: "VGG_VOC0712_SSD_300x300_deploy"
input: "data"
input_shape {
dim: 1
dim: 3
dim: 300
dim: 300
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 1
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 1
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 1
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
pad: 6
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
dilation: 6
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 1024
kernel_size: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "conv6_1"
type: "Convolution"
bottom: "fc7"
top: "conv6_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_1_relu"
type: "ReLU"
bottom: "conv6_1"
top: "conv6_1"
}
layer {
name: "conv6_2"
type: "Convolution"
bottom: "conv6_1"
top: "conv6_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_relu"
type: "ReLU"
bottom: "conv6_2"
top: "conv6_2"
}
layer {
name: "conv7_1"
type: "Convolution"
bottom: "conv6_2"
top: "conv7_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_1_relu"
type: "ReLU"
bottom: "conv7_1"
top: "conv7_1"
}
layer {
name: "conv7_2"
type: "Convolution"
bottom: "conv7_1"
top: "conv7_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_relu"
type: "ReLU"
bottom: "conv7_2"
top: "conv7_2"
}
layer {
name: "conv8_1"
type: "Convolution"
bottom: "conv7_2"
top: "conv8_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_1_relu"
type: "ReLU"
bottom: "conv8_1"
top: "conv8_1"
}
layer {
name: "conv8_2"
type: "Convolution"
bottom: "conv8_1"
top: "conv8_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_relu"
type: "ReLU"
bottom: "conv8_2"
top: "conv8_2"
}
layer {
name: "conv9_1"
type: "Convolution"
bottom: "conv8_2"
top: "conv9_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_1_relu"
type: "ReLU"
bottom: "conv9_1"
top: "conv9_1"
}
layer {
name: "conv9_2"
type: "Convolution"
bottom: "conv9_1"
top: "conv9_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_relu"
type: "ReLU"
bottom: "conv9_2"
top: "conv9_2"
}
layer {
name: "conv4_3_norm"
type: "Normalize"
bottom: "conv4_3"
top: "conv4_3_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20
}
channel_shared: false
}
}
layer {
name: "conv4_3_norm_mbox_loc"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_loc"
top: "conv4_3_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_loc_flat"
type: "Reshape"
bottom: "conv4_3_norm_mbox_loc_perm"
top: "conv4_3_norm_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv4_3_norm_mbox_conf"
type: "Convolution"
bottom: "conv4_3_norm"
top: "conv4_3_norm_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv4_3_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv4_3_norm_mbox_conf"
top: "conv4_3_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv4_3_norm_mbox_conf_flat"
type: "Reshape"
bottom: "conv4_3_norm_mbox_conf_perm"
top: "conv4_3_norm_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv4_3_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv4_3_norm"
bottom: "data"
top: "conv4_3_norm_mbox_priorbox"
prior_box_param {
min_size: 30.0
max_size: 60.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 8
offset: 0.5
}
}
layer {
name: "fc7_mbox_loc"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_loc_perm"
type: "Permute"
bottom: "fc7_mbox_loc"
top: "fc7_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_loc_flat"
type: "Reshape"
bottom: "fc7_mbox_loc_perm"
top: "fc7_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "fc7_mbox_conf"
type: "Convolution"
bottom: "fc7"
top: "fc7_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "fc7_mbox_conf_perm"
type: "Permute"
bottom: "fc7_mbox_conf"
top: "fc7_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "fc7_mbox_conf_flat"
type: "Reshape"
bottom: "fc7_mbox_conf_perm"
top: "fc7_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "fc7_mbox_priorbox"
type: "PriorBox"
bottom: "fc7"
bottom: "data"
top: "fc7_mbox_priorbox"
prior_box_param {
min_size: 60.0
max_size: 111.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 16
offset: 0.5
}
}
layer {
name: "conv6_2_mbox_loc"
type: "Convolution"
bottom: "conv6_2"
top: "conv6_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_loc_perm"
type: "Permute"
bottom: "conv6_2_mbox_loc"
top: "conv6_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_loc_flat"
type: "Reshape"
bottom: "conv6_2_mbox_loc_perm"
top: "conv6_2_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv6_2_mbox_conf"
type: "Convolution"
bottom: "conv6_2"
top: "conv6_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv6_2_mbox_conf_perm"
type: "Permute"
bottom: "conv6_2_mbox_conf"
top: "conv6_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv6_2_mbox_conf_flat"
type: "Reshape"
bottom: "conv6_2_mbox_conf_perm"
top: "conv6_2_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv6_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv6_2"
bottom: "data"
top: "conv6_2_mbox_priorbox"
prior_box_param {
min_size: 111.0
max_size: 162.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 32
offset: 0.5
}
}
layer {
name: "conv7_2_mbox_loc"
type: "Convolution"
bottom: "conv7_2"
top: "conv7_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 24
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_loc_perm"
type: "Permute"
bottom: "conv7_2_mbox_loc"
top: "conv7_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_loc_flat"
type: "Reshape"
bottom: "conv7_2_mbox_loc_perm"
top: "conv7_2_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv7_2_mbox_conf"
type: "Convolution"
bottom: "conv7_2"
top: "conv7_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 126
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv7_2_mbox_conf_perm"
type: "Permute"
bottom: "conv7_2_mbox_conf"
top: "conv7_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv7_2_mbox_conf_flat"
type: "Reshape"
bottom: "conv7_2_mbox_conf_perm"
top: "conv7_2_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv7_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv7_2"
bottom: "data"
top: "conv7_2_mbox_priorbox"
prior_box_param {
min_size: 162.0
max_size: 213.0
aspect_ratio: 2
aspect_ratio: 3
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 64
offset: 0.5
}
}
layer {
name: "conv8_2_mbox_loc"
type: "Convolution"
bottom: "conv8_2"
top: "conv8_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_loc_perm"
type: "Permute"
bottom: "conv8_2_mbox_loc"
top: "conv8_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_loc_flat"
type: "Reshape"
bottom: "conv8_2_mbox_loc_perm"
top: "conv8_2_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv8_2_mbox_conf"
type: "Convolution"
bottom: "conv8_2"
top: "conv8_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv8_2_mbox_conf_perm"
type: "Permute"
bottom: "conv8_2_mbox_conf"
top: "conv8_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv8_2_mbox_conf_flat"
type: "Reshape"
bottom: "conv8_2_mbox_conf_perm"
top: "conv8_2_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv8_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv8_2"
bottom: "data"
top: "conv8_2_mbox_priorbox"
prior_box_param {
min_size: 213.0
max_size: 264.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 100
offset: 0.5
}
}
layer {
name: "conv9_2_mbox_loc"
type: "Convolution"
bottom: "conv9_2"
top: "conv9_2_mbox_loc"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_loc_perm"
type: "Permute"
bottom: "conv9_2_mbox_loc"
top: "conv9_2_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_loc_flat"
type: "Reshape"
bottom: "conv9_2_mbox_loc_perm"
top: "conv9_2_mbox_loc_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv9_2_mbox_conf"
type: "Convolution"
bottom: "conv9_2"
top: "conv9_2_mbox_conf"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 84
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "conv9_2_mbox_conf_perm"
type: "Permute"
bottom: "conv9_2_mbox_conf"
top: "conv9_2_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv9_2_mbox_conf_flat"
type: "Reshape"
bottom: "conv9_2_mbox_conf_perm"
top: "conv9_2_mbox_conf_flat"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "conv9_2_mbox_priorbox"
type: "PriorBox"
bottom: "conv9_2"
bottom: "data"
top: "conv9_2_mbox_priorbox"
prior_box_param {
min_size: 264.0
max_size: 315.0
aspect_ratio: 2
flip: true
clip: false
variance: 0.1
variance: 0.1
variance: 0.2
variance: 0.2
step: 300
offset: 0.5
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv4_3_norm_mbox_loc_flat"
bottom: "fc7_mbox_loc_flat"
bottom: "conv6_2_mbox_loc_flat"
bottom: "conv7_2_mbox_loc_flat"
bottom: "conv8_2_mbox_loc_flat"
bottom: "conv9_2_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv4_3_norm_mbox_conf_flat"
bottom: "fc7_mbox_conf_flat"
bottom: "conv6_2_mbox_conf_flat"
bottom: "conv7_2_mbox_conf_flat"
bottom: "conv8_2_mbox_conf_flat"
bottom: "conv9_2_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv4_3_norm_mbox_priorbox"
bottom: "fc7_mbox_priorbox"
bottom: "conv6_2_mbox_priorbox"
bottom: "conv7_2_mbox_priorbox"
bottom: "conv8_2_mbox_priorbox"
bottom: "conv9_2_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_conf_reshape"
type: "Reshape"
bottom: "mbox_conf"
top: "mbox_conf_reshape"
reshape_param {
shape {
dim: 0
dim: -1
dim: 21
}
}
}
layer {
name: "mbox_conf_softmax"
type: "Softmax"
bottom: "mbox_conf_reshape"
top: "mbox_conf_softmax"
softmax_param {
axis: 2
}
}
layer {
name: "mbox_conf_flatten"
type: "Reshape"
bottom: "mbox_conf_softmax"
top: "mbox_conf_flatten"
reshape_param {
shape {
dim: 0
dim: -1
dim: 1
dim: 1
}
}
}
layer {
name: "detection_out"
type: "DetectionOutput"
bottom: "mbox_loc"
bottom: "mbox_conf_flatten"
bottom: "mbox_priorbox"
top: "detection_out"
top: "keep_count"
include {
phase: TEST
}
detection_output_param {
num_classes: 21
share_location: true
background_label_id: 0
nms_param {
nms_threshold: 0.45
top_k: 400
}
save_output_param {
label_map_file: "data/VOC0712/labelmap_voc.prototxt"
}
code_type: CENTER_SIZE
keep_top_k: 200
confidence_threshold: 0.01
}
}
6.2 我修改的sampleSSD.cpp
#include <cassert>
#include <cmath>
#include <cstring>
#include <cuda_runtime_api.h>
#include <unordered_map>
#include "BatchStream.h"
#include "NvCaffeParser.h"
#include "NvInferPlugin.h"
#include "common.h"
#include <sys/time.h>
timeval starttime,endtime;
timeval timer,current;
using namespace nvinfer1;
using namespace nvcaffeparser1;
using namespace plugin;
using std::vector;
static Logger gLogger;
// Network details
const char* gNetworkName = "ssd"; // Network name
static const int kINPUT_C = 3; // Input image channels
static const int kINPUT_H = 300; // Input image height
static const int kINPUT_W = 300; // Input image width
static const int kOUTPUT_CLS_SIZE = 21; // Number of classes
static const int kKEEP_TOPK = 200; // Number of total bboxes to be kept per image after NMS step. It is same as detection_output_param.keep_top_k in prototxt file
int repeat_times=1000;
enum MODE
{
kFP32,
kFP16,
kINT8,
kUNKNOWN
};
struct Param
{
MODE modelType{MODE::kFP32}; // Default run FP32 precision
} params;
std::ostream& operator<<(std::ostream& o, MODE dt)
{
switch (dt)
{
case kFP32: o << "FP32"; break;
case kFP16: o << "FP16"; break;
case kINT8: o << "INT8"; break;
case kUNKNOWN: o << "UNKNOWN"; break;
}
return o;
}
static const std::vector<std::string> kDIRECTORIES{"data/samples/ssd/", "data/ssd/"}; // Data directory
const std::string gCLASSES[kOUTPUT_CLS_SIZE]{"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; // List of class labels
static const char* kINPUT_BLOB_NAME = "data"; // Input blob name
static const char* kOUTPUT_BLOB_NAME0 = "detection_out"; // Output blob name
static const char* kOUTPUT_BLOB_NAME1 = "keep_count"; // Output blob name
// INT8 calibration variables
static const int kCAL_BATCH_SIZE = 1; // Batch size
static const int kFIRST_CAL_BATCH = 0; // First batch
static const int kNB_CAL_BATCHES = 500; // Number of batches
#define CalibrationMode 1 //Set to '0' for Legacy calibrator and any other value for Entropy calibrator
// Visualization
const float kVISUAL_THRESHOLD = 0.6f;
static int gUseDLACore{-1};
class Int8LegacyCalibrator : public nvinfer1::IInt8LegacyCalibrator
{
public:
Int8LegacyCalibrator(BatchStream& stream, int firstBatch, double cutoff, double quantile, const char* networkName, bool readCache = true)
: mStream(stream)
, mFirstBatch(firstBatch)
, mReadCache(readCache)
, mNetworkName(networkName)
{
nvinfer1::Dims dims = mStream.getDims();
mInputCount = mStream.getBatchSize() * dims.d[1] * dims.d[2] * dims.d[3];
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
reset(cutoff, quantile);
}
virtual ~Int8LegacyCalibrator()
{
CHECK(cudaFree(mDeviceInput));
}
int getBatchSize() const override { return mStream.getBatchSize(); }
double getQuantile() const override { return mQuantile; }
double getRegressionCutoff() const override { return mCutoff; }
bool getBatch(void* bindings[], const char* names[], int nbBindings) override
{
if (!mStream.next())
return false;
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) override
{
mCalibrationCache.clear();
std::ifstream input(calibrationTableName(), std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good())
{
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? &mCalibrationCache[0] : nullptr;
}
void writeCalibrationCache(const void* cache, size_t length) override
{
std::ofstream output(calibrationTableName(), std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
const void* readHistogramCache(size_t& length) override
{
length = mHistogramCache.size();
return length ? &mHistogramCache[0] : nullptr;
}
void writeHistogramCache(const void* cache, size_t length) override
{
mHistogramCache.clear();
std::copy_n(reinterpret_cast<const char*>(cache), length, std::back_inserter(mHistogramCache));
}
void reset(double cutoff, double quantile)
{
mCutoff = cutoff;
mQuantile = quantile;
mStream.reset(mFirstBatch);
}
private:
std::string calibrationTableName()
{
assert(mNetworkName != NULL);
return std::string("CalibrationTable") + mNetworkName;
}
BatchStream mStream;
int mFirstBatch;
double mCutoff, mQuantile;
bool mReadCache{true};
const char* mNetworkName;
size_t mInputCount;
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache, mHistogramCache;
};
class Int8EntropyCalibrator : public IInt8EntropyCalibrator
{
public:
Int8EntropyCalibrator(BatchStream& stream, int firstBatch, bool readCache = true)
: mStream(stream)
, mReadCache(readCache)
{
nvinfer1::Dims dims = mStream.getDims();
mInputCount = mStream.getBatchSize() * dims.d[1] * dims.d[2] * dims.d[3];
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
mStream.reset(firstBatch);
}
virtual ~Int8EntropyCalibrator()
{
CHECK(cudaFree(mDeviceInput));
}
int getBatchSize() const override { return mStream.getBatchSize(); }
bool getBatch(void* bindings[], const char* names[], int nbBindings) override
{
if (!mStream.next())
{
return false;
}
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
assert(!strcmp(names[0], kINPUT_BLOB_NAME));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) override
{
mCalibrationCache.clear();
std::ifstream input(calibrationTableName(), std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good())
{
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? &mCalibrationCache[0] : nullptr;
}
virtual void writeCalibrationCache(const void* cache, size_t length) override
{
std::ofstream output(calibrationTableName(), std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
private:
static std::string calibrationTableName()
{
assert(gNetworkName);
return std::string("CalibrationTable") + gNetworkName;
}
BatchStream mStream;
size_t mInputCount;
bool mReadCache{true};
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache;
};
std::string locateFile(const std::string& input)
{
return locateFile(input, kDIRECTORIES);
}
void caffeToTRTModel(const std::string& deployFile, // Name for caffe prototxt
const std::string& modelFile, // Name for model
const std::vector<std::string>& outputs, // Network outputs
unsigned int maxBatchSize, // Batch size - NB must be at least as large as the batch we want to run with)
MODE mode, // Precision mode
IHostMemory** trtModelStream) // Output stream for the TensorRT model
{
// Create the builder
IBuilder* builder = createInferBuilder(gLogger);
// Parse the caffe model to populate the network, then set the outputs
INetworkDefinition* network = builder->createNetwork();
ICaffeParser* parser = createCaffeParser();
DataType dataType = DataType::kFLOAT;
if (mode == kFP16)
dataType = DataType::kHALF;
std::cout << "Begin parsing model..." << std::endl;
std::cout << mode << " mode running..." << std::endl;
const IBlobNameToTensor* blobNameToTensor = parser->parse(locateFile(deployFile).c_str(),
locateFile(modelFile).c_str(),
*network,
dataType);
std::cout << "End parsing model..." << std::endl;
// Specify which tensors are outputs
for (auto& s : outputs)
network->markOutput(*blobNameToTensor->find(s.c_str()));
// Build the engine
builder->setMaxBatchSize(maxBatchSize);
builder->setMaxWorkspaceSize(36 << 20);
// Calibrator life time needs to last until after the engine is built.
std::unique_ptr<IInt8Calibrator> calibrator;
ICudaEngine* engine;
if (mode == kINT8)
{
#if CalibrationMode == 0
std::cout << "Using Legacy Calibrator" << std::endl;
BatchStream calibrationStream(kCAL_BATCH_SIZE, kNB_CAL_BATCHES, "./batches/batch_calibration", kDIRECTORIES);
calibrator.reset(new Int8LegacyCalibrator(calibrationStream, 0, kCUTOFF, kQUANTILE, gNetworkName, true));
#else
std::cout << "Using Entropy Calibrator" << std::endl;
BatchStream calibrationStream(kCAL_BATCH_SIZE, kNB_CAL_BATCHES, "./batches/batch_calibration", kDIRECTORIES);
calibrator.reset(new Int8EntropyCalibrator(calibrationStream, kFIRST_CAL_BATCH));
#endif
builder->setInt8Mode(true);
builder->setInt8Calibrator(calibrator.get());
}
else
{
builder->setFp16Mode(mode == kFP16);
}
std::cout << "Begin building engine..." << std::endl;
samplesCommon::enableDLA(builder, gUseDLACore);
engine = builder->buildCudaEngine(*network);
assert(engine);
std::cout << "End building engine..." << std::endl;
// Once the engine is built. Its safe to destroy the calibrator.
calibrator.reset();
// We don't need the network any more, and we can destroy the parser
network->destroy();
parser->destroy();
// Serialize the engine, then close everything down
(*trtModelStream) = engine->serialize();
engine->destroy();
builder->destroy();
}
void doInference(IExecutionContext& context, float* inputData, float* detectionOut, int* keepCount, int batchSize)
{
const ICudaEngine& engine = context.getEngine();
// input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
// of these, but in this case we know that there is exactly 1 input and 2 output.
assert(engine.getNbBindings() == 3);
void* buffers[3];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// note that indices are guaranteed to be less than IEngine::getNbBindings()
int inputIndex = engine.getBindingIndex(kINPUT_BLOB_NAME),
outputIndex0 = engine.getBindingIndex(kOUTPUT_BLOB_NAME0),
outputIndex1 = engine.getBindingIndex(kOUTPUT_BLOB_NAME1);
// Create GPU buffers and a stream
CHECK(cudaMalloc(&buffers[inputIndex], batchSize * kINPUT_C * kINPUT_H * kINPUT_W * sizeof(float))); // Data
CHECK(cudaMalloc(&buffers[outputIndex0], batchSize * kKEEP_TOPK * 7 * sizeof(float))); // Detection_out
CHECK(cudaMalloc(&buffers[outputIndex1], batchSize * sizeof(int))); // KeepCount (BBoxs left for each batch)
cudaStream_t stream;
CHECK(cudaStreamCreate(&stream));
// DMA the input to the GPU, execute the batch asynchronously, and DMA it back:
CHECK(cudaMemcpyAsync(buffers[inputIndex], inputData, batchSize * kINPUT_C * kINPUT_H * kINPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CHECK(cudaMemcpyAsync(detectionOut, buffers[outputIndex0], batchSize * kKEEP_TOPK * 7 * sizeof(float), cudaMemcpyDeviceToHost, stream));
CHECK(cudaMemcpyAsync(keepCount, buffers[outputIndex1], batchSize * sizeof(int), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release the stream and the buffers
cudaStreamDestroy(stream);
CHECK(cudaFree(buffers[inputIndex]));
CHECK(cudaFree(buffers[outputIndex0]));
CHECK(cudaFree(buffers[outputIndex1]));
}
void printHelp()
{
printf("Usage: ./sampleSSD [--mode FP32|FP16|INT8] [--useDLACore id]\n");
exit(0);
}
void parseOptions(int argc, char** argv)
{
int i;
for (i = 1; i < argc; i++)
{
char* optName = argv[i];
if (0 == strcmp(optName, "--help"))
{
goto error;
}
else if (0 == strcmp(optName, "--mode"))
{
if (++i == argc)
{
printf("Specify the mode \n");
goto error;
}
params.modelType = (strcmp(argv[i], "FP32") == 0 ? kFP32 : (strcmp(argv[i], "FP16") == 0 ? kFP16 : (strcmp(argv[i], "INT8") == 0 ? kINT8 : kUNKNOWN)));
if (params.modelType == kUNKNOWN)
{
printf("Mode type %s is Unknown!\n", argv[i]);
goto error;
}
}
else if (0 == strcmp(optName, "--useDLACore"))
{
if (++i == argc)
{
printf("Specify the DLA core id to use.\n");
goto error;
}
gUseDLACore = std::atoi(argv[i]);
}
else if (0 == strcmp(optName, "--repeat"))
{
if (++i == argc)
{
printf("Specify the repeat times.\n");
goto error;
}
sscanf( argv[i], "%d", &repeat_times ); // 将字符串转换成整数 i = 15
}
else
{
goto error;
}
}
return;
error:
printHelp();
}
int main(int argc, char** argv)
{
double time_cost;
printf("**** Modified by lidawei0124 ****\n");
printf("default repeat times: %d\n",repeat_times);
parseOptions(argc, argv);
printf("now repeat times: %d\n",repeat_times);
//time
gettimeofday(&timer,0);
//time
initLibNvInferPlugins(&gLogger, "");
//time
gettimeofday(¤t,0);
time_cost =1000000*(current.tv_sec - timer.tv_sec)+ current.tv_usec - timer.tv_usec;
time_cost /=1000;//除以1000则毫秒,除以1000000秒级,除以1微妙
printf("initLibNvInferPlugins(&gLogger, ""):time: %f ms\n",time_cost);
//time
//time
gettimeofday(&timer,0);
//time
IHostMemory* trtModelStream{nullptr};
//time
gettimeofday(¤t,0);
time_cost =1000000*(current.tv_sec - timer.tv_sec)+ current.tv_usec - timer.tv_usec;
time_cost /=1000;//除以1000则毫秒,除以1000000秒级,除以1微妙
printf("IHostMemory* trtModelStream{nullptr}:time: %f ms\n",time_cost);
//time
//time
gettimeofday(&timer,0);
//time
// Create a TensorRT model from the caffe model and serialize it to a stream
const int N = 1; // Batch size
caffeToTRTModel("ssd.prototxt",
"VGG_VOC0712_SSD_300x300_iter_120000.caffemodel",
std::vector<std::string>{kOUTPUT_BLOB_NAME0, kOUTPUT_BLOB_NAME1},
N, params.modelType, &trtModelStream);
std::vector<std::string> imageList = {"bus.ppm"}; // Input image list
std::vector<samplesCommon::PPM<kINPUT_C, kINPUT_H, kINPUT_W>> ppms(N);
//time
gettimeofday(¤t,0);
time_cost =1000000*(current.tv_sec - timer.tv_sec)+ current.tv_usec - timer.tv_usec;
time_cost /=1000;//除以1000则毫秒,除以1000000秒级,除以1微妙
printf("caffeToTRTModel and serialize:time: %f ms\n",time_cost);
//time
//time
gettimeofday(&timer,0);
//time
//creat logger
std::cout << "*** deserializing" << std::endl;
IRuntime* runtime = createInferRuntime(gLogger);
// Deserialize the engine
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream->data(), trtModelStream->size(), nullptr);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
trtModelStream->destroy();
//time
gettimeofday(¤t,0);
time_cost =1000000*(current.tv_sec - timer.tv_sec)+ current.tv_usec - timer.tv_usec;
time_cost /=1000;//除以1000则毫秒,除以1000000秒级,除以1微妙
printf("Deserialize the engine:time: %f ms\n",time_cost);
//time
//init timer and counter
double epoch_time=0;
gettimeofday(&starttime,0);
gettimeofday(&timer,0);
//loops:
printf("start epochs......\n");
for (int repeat=1;repeat<=repeat_times;repeat++)//1--N
{
//read ppm img
for (int i = 0; i < N; ++i)
{
readPPMFile(locateFile(imageList[i]), ppms[i]);
}
float pixelMean[3]{104.0f, 117.0f, 123.0f}; // In BGR order
// Host memory for input buffer
float* data = new float[N * kINPUT_C * kINPUT_H * kINPUT_W];
for (int i = 0, volImg = kINPUT_C * kINPUT_H * kINPUT_W; i < N; ++i)
{
for (int c = 0; c < kINPUT_C; ++c)
{
// The color image to input should be in BGR order
for (unsigned j = 0, volChl = kINPUT_H * kINPUT_W; j < volChl; ++j)
{
data[i * volImg + c * volChl + j] = float(ppms[i].buffer[j * kINPUT_C + 2 - c]) - pixelMean[c];
}
}
}
// Host memory for outputs
float* detectionOut = new float[N * kKEEP_TOPK * 7];
int* keepCount = new int[N];
// Run inference
doInference(*context, data, detectionOut, keepCount, N);
//show ouput
for (int p = 0; p < N; ++p)
{
for (int i = 0; i < keepCount[p]; ++i)
{
float* det = detectionOut + (p * kKEEP_TOPK + i) * 7;
if (det[2] < kVISUAL_THRESHOLD)
continue;
assert((int) det[1] < kOUTPUT_CLS_SIZE);
std::string storeName = gCLASSES[(int) det[1]] + "-" + std::to_string(det[2]) +"-"+ std::to_string(repeat)+ ".ppm";
if (repeat%100==0)
{
gettimeofday(¤t,0);
epoch_time =1000000*(current.tv_sec - timer.tv_sec)+ current.tv_usec - timer.tv_usec;
epoch_time /=1000;//除以1000则毫秒,除以1000000秒级,除以1微妙
printf("repeat : %d ,epoch_time of : %f ms\n",repeat,epoch_time);
gettimeofday(&timer,0);
std::cout << " Image name:" << ppms[p].fileName.c_str() << ", Label :" << gCLASSES[(int) det[1]].c_str() << ","
<< " confidence: " << det[2] * 100.f
<< " xmin: " << det[3] * kINPUT_W
<< " ymin: " << det[4] * kINPUT_H
<< " xmax: " << det[5] * kINPUT_W
<< " ymax: " << det[6] * kINPUT_H
<< std::endl;
samplesCommon::writePPMFileWithBBox(storeName, ppms[p], {det[3] * kINPUT_W, det[4] * kINPUT_H, det[5] * kINPUT_W,
det[6] * kINPUT_H});
}
//
}
}
}
//time count
gettimeofday(&endtime,0);
double timeuse =1000000*(endtime.tv_sec - starttime.tv_sec)+ endtime.tv_usec - starttime.tv_usec;
timeuse /=1000000;//除以1000则进行毫秒计时,如果除以1000000则进行秒级别计时,如果除以1则进行微妙级别计时
printf("Time to do %d repeats is: %f seconds\n",repeat-1,timeuse);
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
//delete[] data;
//delete[] detectionOut;
//delete[] keepCount;
// Note: Once you call shutdownProtobufLibrary, you cannot use the parsers anymore.
shutdownProtobufLibrary();
return EXIT_SUCCESS;
}
参考:https://blog.csdn.net/lidawei0124/article/details/90116124
来源:CSDN
作者:今天依旧要努力
链接:https://blog.csdn.net/alphonse2017/article/details/103694720