Li-Hongda
diff --git a/‎README.md‎
Lines changed: 60 additions & 1 deletion b/‎README.md‎
Lines changed: 60 additions & 1 deletion
diff --git a/‎configs/yolov7.yaml‎
Lines changed: 3 additions & 3 deletions b/‎configs/yolov7.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎include/basemodel.h‎
Lines changed: 4 additions & 1 deletion b/‎include/basemodel.h‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎include/classification.h‎
Lines changed: 0 additions & 26 deletions b/‎include/classification.h‎
Lines changed: 0 additions & 26 deletions
diff --git a/‎include/cuda_preprocess.h‎
Lines changed: 19 additions & 0 deletions b/‎include/cuda_preprocess.h‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎include/detection.h‎
Lines changed: 2 additions & 1 deletion b/‎include/detection.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎include/instance_segmentation.h‎
Lines changed: 1 addition & 0 deletions b/‎include/instance_segmentation.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/yolov8.h‎
Lines changed: 1 addition & 1 deletion b/‎include/yolov8.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎instance_segmentation/CMakeLists.txt‎
Lines changed: 0 additions & 57 deletions b/‎instance_segmentation/CMakeLists.txt‎
Lines changed: 0 additions & 57 deletions
diff --git a/‎instance_segmentation/main.cpp‎
Lines changed: 0 additions & 34 deletions b/‎instance_segmentation/main.cpp‎
Lines changed: 0 additions & 34 deletions
@@ -1,2 +1,61 @@
 # TensorRT_Inference_Demo
-A repo that uses TensorRT to deploy wll-trained models.
+
+<div align="center">
+
+  [![Cuda](https://img.shields.io/badge/CUDA-11.3-%2376B900?logo=nvidia)](https://developer.nvidia.com/cuda-toolkit-archive)
+  [![](https://img.shields.io/badge/TensorRT-8.6.0.12-%2376B900.svg?style=flat&logo=tensorrt)](https://developer.nvidia.com/nvidia-tensorrt-8x-download)
+  [![](https://img.shields.io/badge/ubuntu-20.04-orange.svg?style=flat&logo=ubuntu)](https://releases.ubuntu.com/20.04/)
+</div>
+
+## 1.Introduction
+This repo use TensorRT-8.x to deploy well-trained models.
+
+## 2.Update
+
+- [x] [YOLOv5](https://github.com/ultralytics/yolov5)
+- [x] [YOLOv5-seg](https://github.com/ultralytics/yolov5)
+- [x] [YOLOv7](https://github.com/WongKinYiu/yolov7)
+- [x] [YOLOv8](https://github.com/ultralytics/ultralytics)
+- [ ] [YOLOv8-seg](https://github.com/ultralytics/ultralytics)
+
+
+## 3.Support Models
+
+| Models | Device | BatchSize | Mode | Input Shape(HxW) | FPS |
+|-|-|:-:|:-:|:-:|:-:|
+| YOLOv5-n v7.0 |RTX3090 | 1 | FP32 | 640x640 | 264 |
+| YOLOv5-s v7.0 |RTX3090 | 1 | FP32 | 640x640 | 210 |
+| YOLOv5-s v7.0 |RTX3090 | 32 | FP32 | 640x640 | - |
+| YOLOv5-m v7.0 |RTX3090 | 1 | FP32 | 640x640 | 140 |
+| YOLOv5-l v7.0 |RTX3090 | 1 | FP32 | 640x640 | 105 |
+| YOLOv5-x v7.0 |RTX3090 | 1 | FP32 | 640x640 | 75 |
+| YOLOv7 |RTX3090 | 1 | FP32 | 640x640 | 115 |
+| YOLOv7x |RTX3090 | 1 | FP32 | 640x640 | - |
+| YOLOv8-n |RTX3090 | 1 | FP32 | 640x640 | 222 |
+| YOLOv8-s |RTX3090 | 1 | FP32 | 640x640 | 171 |
+| YOLOv8-m |RTX3090 | 1 | FP32 | 640x640 | 122 |
+| YOLOv8-l |RTX3090 | 1 | FP32 | 640x640 | 88 |
+| YOLOv8-x |RTX3090 | 1 | FP32 | 640x640 | 68 |
+| RT-DETR |RTX3090 | 1 | FP32 | 640x640 | - |
+| RT-DETR |RTX3090 | 1 | FP32 | 640x640 | - |
+| SOLO(r50) |RTX3090 | 1 | FP32 | 480x640 | - |
+| SOLOv2(r50) |RTX3090 | 1 | INT8 | 480x640 | - |
+
+## 4.Install
+1. Clone the repo.
+```
+git clone https://github.com/Li-Hongda/TensorRT_Inference_Demo.git
+cd TensorRT_Inference_Demo/object_detection
+```
+2. Change the path [here]() to your TensorRT path, and [here]() to your CUDA path. Then,
+```
+mkdir build && cd build
+cmake ..
+make -j$(nproc)
+```
+3. The executable file will be generated in `bin` in the repo directory if compile successfully.Then enjoy yourself with command like this:
+```
+cd bin
+./object_detection yolov5 /path/to/input/dir false
+```
+
@@ -1,13 +1,13 @@
 yolov7:
-    onnx_file:     "../weights/yolov7/yolov7.onnx"
-    engine_file:   "../weights/yolov7/yolov7.trt"
+    onnx_file:     "../weights/yolov7/yolov7x.onnx"
+    engine_file:   "../weights/yolov7/yolov7x.trt"
     type:          "coco80"
     mode:          "fp32"
     batchSize:     1
     inputChannel:  3
     imageWidth:    640
     imageHeight:   640
-    obj_threshold: 0.4
+    obj_threshold: 0.25
     nms_threshold: 0.45
     strides:       [8, 16, 32]
     imgMean:       [ 0, 0, 0 ]
 
@@ -2,6 +2,7 @@
 #define BASEMODEL_H
 
 #include "common.h"
+#include "cuda_preprocess.h"
 
 class Model
 {
@@ -16,16 +17,18 @@ class Model
     bool ReadTrtFile();
     void OnnxToTRTModel();
     std::vector<float> PreProcess(std::vector<cv::Mat> &image);
+    void batch_preprocess(std::vector<cv::Mat> &image);
     std::string onnx_file;
     std::string engine_file;
     std::string mode;
+    AffineMatrix dst2src;
     int batchSize;
     int inputChannel;
     int imageWidth;
     int imageHeight;
     std::string names[10];
     float **cpu_buffers = new float* [10];
-    void *gpu_buffers[10]{};
+    float *gpu_buffers[10]{};
     std::vector<int64_t> bufferSize;    
     std::shared_ptr<nvinfer1::ICudaEngine> engine;
     std::unique_ptr<nvinfer1::IExecutionContext> context;
 
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "common.h"
+#include <cstdint>
+
+
+struct AffineMatrix {
+    float v0, v1, v2;
+    float v3, v4, v5;  
+};
+
+void cuda_preprocess_init(int max_image_size);
+void cuda_preprocess_destroy();
+
+void preprocess(uint8_t* src, AffineMatrix d2s, int src_width, int src_height,
+                     float* dst, int dst_width, int dst_height,
+                     cudaStream_t stream);					 
+
+
@@ -71,7 +71,7 @@ struct Box {
     int label;
     float score;
 };
-
+const static int kMaxInputImageSize = 1024 * 1024;
 struct Detections {
     std::vector<Box> dets;
 };
@@ -88,6 +88,7 @@ class Detection : public Model
                      std::vector<std::string> image_names);
     void Visualize(const std::vector<Detections> &detections, std::vector<cv::Mat> &imgBatch,
                      cv::String save_name, int fps, cv::Size size); 
+    cv::Rect get_rect(cv::Mat& img, float bbox[4]);
     static float DIoU(const Box &det_a, const Box &det_b);
 
 protected:
 
@@ -29,6 +29,7 @@ class InstanceSegmentation : public Model
     void Visualize(const std::vector<Segmentations> &segmentations, std::vector<cv::Mat> &imgBatch,
                      cv::String save_name, int fps, cv::Size size); 
     static float DIoU(const Instance &det_a, const Instance &det_b);
+    cv::Mat scale_mask(cv::Mat mask, cv::Mat img);
 
 protected:
     virtual std::vector<Segmentations> PostProcess(const std::vector<cv::Mat> &vec_Mat, float *output1, float *output2)=0;
 
@@ -13,7 +13,7 @@ class YOLOv8_seg : public YOLO_seg {
 public:
     explicit YOLOv8_seg(const YAML::Node &config);
 protected:    
-    std::vector<Detections> PostProcess(const std::vector<cv::Mat> &imgBatch, float *output);
+    std::vector<Segmentations> PostProcess(const std::vector<cv::Mat> &imgBatch, float *output1, float *output2);
 };
 
 #endif