Skip to content

Commit 859108b

Browse files
committed
support cuda preprocess
1 parent 8191599 commit 859108b

26 files changed

Lines changed: 459 additions & 623 deletions

README.md

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,61 @@
11
# TensorRT_Inference_Demo
2-
A repo that uses TensorRT to deploy wll-trained models.
2+
3+
<div align="center">
4+
5+
[![Cuda](https://img.shields.io/badge/CUDA-11.3-%2376B900?logo=nvidia)](https://developer.nvidia.com/cuda-toolkit-archive)
6+
[![](https://img.shields.io/badge/TensorRT-8.6.0.12-%2376B900.svg?style=flat&logo=tensorrt)](https://developer.nvidia.com/nvidia-tensorrt-8x-download)
7+
[![](https://img.shields.io/badge/ubuntu-20.04-orange.svg?style=flat&logo=ubuntu)](https://releases.ubuntu.com/20.04/)
8+
</div>
9+
10+
## 1.Introduction
11+
This repo use TensorRT-8.x to deploy well-trained models.
12+
13+
## 2.Update
14+
15+
- [x] [YOLOv5](https://github.com/ultralytics/yolov5)
16+
- [x] [YOLOv5-seg](https://github.com/ultralytics/yolov5)
17+
- [x] [YOLOv7](https://github.com/WongKinYiu/yolov7)
18+
- [x] [YOLOv8](https://github.com/ultralytics/ultralytics)
19+
- [ ] [YOLOv8-seg](https://github.com/ultralytics/ultralytics)
20+
21+
22+
## 3.Support Models
23+
24+
| Models | Device | BatchSize | Mode | Input Shape(HxW) | FPS |
25+
|-|-|:-:|:-:|:-:|:-:|
26+
| YOLOv5-n v7.0 |RTX3090 | 1 | FP32 | 640x640 | 264 |
27+
| YOLOv5-s v7.0 |RTX3090 | 1 | FP32 | 640x640 | 210 |
28+
| YOLOv5-s v7.0 |RTX3090 | 32 | FP32 | 640x640 | - |
29+
| YOLOv5-m v7.0 |RTX3090 | 1 | FP32 | 640x640 | 140 |
30+
| YOLOv5-l v7.0 |RTX3090 | 1 | FP32 | 640x640 | 105 |
31+
| YOLOv5-x v7.0 |RTX3090 | 1 | FP32 | 640x640 | 75 |
32+
| YOLOv7 |RTX3090 | 1 | FP32 | 640x640 | 115 |
33+
| YOLOv7x |RTX3090 | 1 | FP32 | 640x640 | - |
34+
| YOLOv8-n |RTX3090 | 1 | FP32 | 640x640 | 222 |
35+
| YOLOv8-s |RTX3090 | 1 | FP32 | 640x640 | 171 |
36+
| YOLOv8-m |RTX3090 | 1 | FP32 | 640x640 | 122 |
37+
| YOLOv8-l |RTX3090 | 1 | FP32 | 640x640 | 88 |
38+
| YOLOv8-x |RTX3090 | 1 | FP32 | 640x640 | 68 |
39+
| RT-DETR |RTX3090 | 1 | FP32 | 640x640 | - |
40+
| RT-DETR |RTX3090 | 1 | FP32 | 640x640 | - |
41+
| SOLO(r50) |RTX3090 | 1 | FP32 | 480x640 | - |
42+
| SOLOv2(r50) |RTX3090 | 1 | INT8 | 480x640 | - |
43+
44+
## 4.Install
45+
1. Clone the repo.
46+
```
47+
git clone https://github.com/Li-Hongda/TensorRT_Inference_Demo.git
48+
cd TensorRT_Inference_Demo/object_detection
49+
```
50+
2. Change the path [here]() to your TensorRT path, and [here]() to your CUDA path. Then,
51+
```
52+
mkdir build && cd build
53+
cmake ..
54+
make -j$(nproc)
55+
```
56+
3. The executable file will be generated in `bin` in the repo directory if compile successfully.Then enjoy yourself with command like this:
57+
```
58+
cd bin
59+
./object_detection yolov5 /path/to/input/dir false
60+
```
61+

configs/yolov7.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
yolov7:
2-
onnx_file: "../weights/yolov7/yolov7.onnx"
3-
engine_file: "../weights/yolov7/yolov7.trt"
2+
onnx_file: "../weights/yolov7/yolov7x.onnx"
3+
engine_file: "../weights/yolov7/yolov7x.trt"
44
type: "coco80"
55
mode: "fp32"
66
batchSize: 1
77
inputChannel: 3
88
imageWidth: 640
99
imageHeight: 640
10-
obj_threshold: 0.4
10+
obj_threshold: 0.25
1111
nms_threshold: 0.45
1212
strides: [8, 16, 32]
1313
imgMean: [ 0, 0, 0 ]

include/basemodel.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define BASEMODEL_H
33

44
#include "common.h"
5+
#include "cuda_preprocess.h"
56

67
class Model
78
{
@@ -16,16 +17,18 @@ class Model
1617
bool ReadTrtFile();
1718
void OnnxToTRTModel();
1819
std::vector<float> PreProcess(std::vector<cv::Mat> &image);
20+
void batch_preprocess(std::vector<cv::Mat> &image);
1921
std::string onnx_file;
2022
std::string engine_file;
2123
std::string mode;
24+
AffineMatrix dst2src;
2225
int batchSize;
2326
int inputChannel;
2427
int imageWidth;
2528
int imageHeight;
2629
std::string names[10];
2730
float **cpu_buffers = new float* [10];
28-
void *gpu_buffers[10]{};
31+
float *gpu_buffers[10]{};
2932
std::vector<int64_t> bufferSize;
3033
std::shared_ptr<nvinfer1::ICudaEngine> engine;
3134
std::unique_ptr<nvinfer1::IExecutionContext> context;

include/classification.h

Lines changed: 0 additions & 26 deletions
This file was deleted.

include/cuda_preprocess.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "common.h"
4+
#include <cstdint>
5+
6+
7+
struct AffineMatrix {
8+
float v0, v1, v2;
9+
float v3, v4, v5;
10+
};
11+
12+
void cuda_preprocess_init(int max_image_size);
13+
void cuda_preprocess_destroy();
14+
15+
void preprocess(uint8_t* src, AffineMatrix d2s, int src_width, int src_height,
16+
float* dst, int dst_width, int dst_height,
17+
cudaStream_t stream);
18+
19+

include/detection.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct Box {
7171
int label;
7272
float score;
7373
};
74-
74+
const static int kMaxInputImageSize = 1024 * 1024;
7575
struct Detections {
7676
std::vector<Box> dets;
7777
};
@@ -88,6 +88,7 @@ class Detection : public Model
8888
std::vector<std::string> image_names);
8989
void Visualize(const std::vector<Detections> &detections, std::vector<cv::Mat> &imgBatch,
9090
cv::String save_name, int fps, cv::Size size);
91+
cv::Rect get_rect(cv::Mat& img, float bbox[4]);
9192
static float DIoU(const Box &det_a, const Box &det_b);
9293

9394
protected:

include/instance_segmentation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class InstanceSegmentation : public Model
2929
void Visualize(const std::vector<Segmentations> &segmentations, std::vector<cv::Mat> &imgBatch,
3030
cv::String save_name, int fps, cv::Size size);
3131
static float DIoU(const Instance &det_a, const Instance &det_b);
32+
cv::Mat scale_mask(cv::Mat mask, cv::Mat img);
3233

3334
protected:
3435
virtual std::vector<Segmentations> PostProcess(const std::vector<cv::Mat> &vec_Mat, float *output1, float *output2)=0;

include/yolov8.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class YOLOv8_seg : public YOLO_seg {
1313
public:
1414
explicit YOLOv8_seg(const YAML::Node &config);
1515
protected:
16-
std::vector<Detections> PostProcess(const std::vector<cv::Mat> &imgBatch, float *output);
16+
std::vector<Segmentations> PostProcess(const std::vector<cv::Mat> &imgBatch, float *output1, float *output2);
1717
};
1818

1919
#endif

instance_segmentation/CMakeLists.txt

Lines changed: 0 additions & 57 deletions
This file was deleted.

instance_segmentation/main.cpp

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)