support yolov6

Li-Hongda · Li-Hongda · commit 6c744b651f61 · 2023-05-21T22:25:32.000+08:00
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@ This repo use TensorRT-8.x to deploy well-trained models, both image preprocessi
 + 2023.05.12 🚀 Support cuda preprocess for speed up.
 + 2023.05.16 🚀 Support cuda box postprocess.
 + 2023.05.19 🚀 Support cuda mask postprocess and support rtdetr.
++ 2023.05.21 🚀 Support yolov6.
 </details>
 
 ## 3.Support Models
@@ -37,7 +38,7 @@ This repo use TensorRT-8.x to deploy well-trained models, both image preprocessi
 - [x] [YOLOv8](https://github.com/ultralytics/ultralytics)<br>
 - [x] [YOLOv8-seg](https://github.com/ultralytics/ultralytics)<br>
 - [x] [RT-DETR](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/rtdetr)<br>
-- [ ] [YOLOv6](https://github.com/meituan/YOLOv6) (to be continued)<br>
+- [ ] [YOLOv6](https://github.com/meituan/YOLOv6)<br>
 - [ ] [YOLO-NAS](https://github.com/Deci-AI/super-gradients) (to be continued)<br>
 </details>
 
@@ -46,13 +47,13 @@ All speed tests were performed on RTX 3090 with COCO Val set.The time calculated
 
 | Models | BatchSize | Mode | Resolution |  FPS  |
 |-|-|:-:|:-:|:-:|
-| YOLOv5-s v7.0  | 1 | FP32 | 640x640 | 468 |
+| YOLOv5-s v7.0  | 1 | FP32 | 640x640 | 200 |
 | YOLOv5-s v7.0  | 32 | FP32 | 640x640 | - |
-| YOLOv5-seg-s v7.0  | 1 | FP32 | 640x640 | - |
-| YOLOv7  | 1 | FP32 | 640x640 | 154 |
+| YOLOv5-seg-s v7.0  | 1 | FP32 | 640x640 | 155 |
+| YOLOv6-s v3  | 1 | FP32 | 640x640 | 163 |
+| YOLOv7  | 1 | FP32 | 640x640 | 107 |
 | YOLOv8-s  | 1 | FP32 | 640x640 | 171 |
-| YOLOv8-s  | 1 | FP32 | 640x640 | - |
-| RT-DETR  | 1 | FP32 | 640x640 | - |
+| YOLOv8-seg-s  | 1 | FP32 | 640x640 | 122 |
 | RT-DETR  | 1 | FP32 | 640x640 | - |
 </div>
 
@@ -96,7 +97,7 @@ cd bin
 ```
 
 > Notes:
-> 1. The output of the model is required for post-processing is num_bboxes (imageHeight x image Width) x num_pred(num_cls + coordinates + confidence),while the output of YOLOv8 is num_pred * num_bboxes,which means the predicted values of the same box are not contiguous in memory.For convenience, the corresponding dimensions of the original pytorch output need to be transposed when exporting to ONNX model.
+> 1. The output of the model is required for post-processing is num_bboxes (imageHeight x image Width) x num_pred(num_cls + coordinates + confidence),while the output of YOLOv8 is num_pred x num_bboxes,which means the predicted values of the same box are not contiguous in memory.For convenience, the corresponding dimensions of the original pytorch output need to be transposed when exporting to ONNX model.
 
 
 
diff --git a/include/yolov6.h b/include/yolov6.h
@@ -3,7 +3,7 @@
 
 #include "yolov8.h"
 
-class YOLOv6 : public YOLOv8 {
+class YOLOv6 : public YOLO {
 public:
     explicit YOLOv6(const YAML::Node &config);
 };
diff --git a/src/detection.cpp b/src/detection.cpp
@@ -89,9 +89,9 @@ void Detection::Inference(const std::string &input_path, const std::string &save
         if (imgBatch.size() == batchSize or index == image_list.size()){
             auto infer_start = std::chrono::high_resolution_clock::now();
             auto det_results = InferenceImages(imgBatch);
-            Visualize(det_results, imgBatch, imgInfo);
             auto infer_end = std::chrono::high_resolution_clock::now();
             total_time += std::chrono::duration<float, std::milli>(infer_end - infer_start).count();            
+            Visualize(det_results, imgBatch, imgInfo);
             imgBatch.clear();
             imgInfo.clear(); 
         }
diff --git a/src/instance_segmentation.cpp b/src/instance_segmentation.cpp
@@ -81,7 +81,10 @@ void InstanceSegmentation::Inference(const std::string &input_path, const std::s
     for (const std::string &image_name : image_list) {
         index++;
         // TODO: figure out why double free.
+        auto load_start = std::chrono::high_resolution_clock::now();
         cv::Mat img = cv::imread(image_name);
+        auto load_end = std::chrono::high_resolution_clock::now();
+        total_time += std::chrono::duration<float, std::milli>(load_end - load_start).count();
         imgBatch.emplace_back(img.clone());
         auto save_name = replace(image_name, input_path, save_path);
         imgInfo.emplace_back(save_name);
diff --git a/src/yolov6.cpp b/src/yolov6.cpp
@@ -1,3 +1,11 @@
 #include "yolov6.h"
 
-YOLOv6::YOLOv6(const YAML::Node &config) : YOLOv8(config) {}
+YOLOv6::YOLOv6(const YAML::Node &config) : YOLO(config) {
+    int index = 0;
+    num_bboxes = 0;
+    for (const int &stride : strides)
+    {
+        num_bboxes += int(imageHeight / stride) * int(imageWidth / stride);
+        index+=1;
+    }
+}