fix bugs

Li-Hongda · Li-Hongda · commit 2829a90c49fd · 2023-05-21T15:31:05.000+08:00
diff --git a/include/detection.h b/include/detection.h
@@ -56,7 +56,7 @@ struct Box {
     float score;
     int label;
 };
-const static int kMaxInputImageSize = 1024 * 1024;
+const static int maxImageSize = 2048 * 2048;
 struct Detections {
     std::vector<Box> dets;
 };
diff --git a/include/instance_segmentation.h b/include/instance_segmentation.h
@@ -11,7 +11,6 @@ struct Instance {
     int label;
     float score;    
     cv::Mat mask;
-    float pred_mask[32];
 };
 
 struct Segmentations {
diff --git a/include/yolov6.h b/include/yolov6.h
@@ -1,9 +1,9 @@
 #ifndef YOLOV6_H
 #define YOLOV6_H
 
-#include "yolo.h"
+#include "yolov8.h"
 
-class YOLOv6 : public YOLO {
+class YOLOv6 : public YOLOv8 {
 public:
     explicit YOLOv6(const YAML::Node &config);
 };
diff --git a/object_detection/main.cpp b/object_detection/main.cpp
@@ -13,12 +13,12 @@ int main(int argc, char **argv)
     auto cfg = cfg_dir + "/" + model_arch + cfg_suffix;
     auto model = build_model(model_arch, cfg);
     check_dir(savepath, false);
-    if (model == nullptr)
-        return -1;
+    if (model == nullptr) return -1;
     model->LoadEngine();
-    if (video)
+    if (video) {
+        savepath = savepath + "test.mp4";
         model->Inference(inputpath, savepath, true);
-    else
+    } else
         model->Inference(inputpath, savepath);
     return 0;
 }
diff --git a/object_detection/rtdetr/CMakeLists.txt b/object_detection/rtdetr/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.10)
 
-set(CMAKE_BUILD_TYPE "Release")
+set(CMAKE_BUILD_TYPE "Debug")
+# set(CMAKE_BUILD_TYPE "Release")
 
 project(rtdetr)
 
diff --git a/object_detection/yolov5/CMakeLists.txt b/object_detection/yolov5/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.10)
 
-set(CMAKE_BUILD_TYPE "Release")
+set(CMAKE_BUILD_TYPE "Debug")
+# set(CMAKE_BUILD_TYPE "Release")
 
 project(yolov5)
 
diff --git a/object_detection/yolov7/CMakeLists.txt b/object_detection/yolov7/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.10)
 
-set(CMAKE_BUILD_TYPE "Release")
+set(CMAKE_BUILD_TYPE "Debug")
+# set(CMAKE_BUILD_TYPE "Release")
 
 project(yolov7)
 
diff --git a/object_detection/yolov8/CMakeLists.txt b/object_detection/yolov8/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.10)
 
-set(CMAKE_BUILD_TYPE "Release")
+set(CMAKE_BUILD_TYPE "Debug")
+# set(CMAKE_BUILD_TYPE "Release")
 
 project(yolov8)
 
diff --git a/src/detection.cpp b/src/detection.cpp
@@ -17,7 +17,6 @@ Detection::Detection(const YAML::Node &config) : Model(config) {
 }
 
 void Detection::Inference(const std::string &input_path, const cv::String &save_path, const bool video) {
-
     cv::VideoCapture capture;
     capture.open(input_path);
     cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT));        
@@ -34,33 +33,26 @@ void Detection::Inference(const std::string &input_path, const cv::String &save_
     int index = 0;
     float total_time = 0;
     cv::Mat frame;
-
-    while (capture.isOpened())
-    {
+    cuda_preprocess_init(maxImageSize);
+    while (capture.isOpened()) {
         index++;
-        if (imgBatch.size() < batchSize) // get input
-        {
+        if (imgBatch.size() < batchSize) {
             capture.read(frame);
-
-            if (frame.empty())
-            {
-                sample::gLogWarning << "no more video or camera frame" << std::endl;
+            if (frame.empty()) {
+                std::cout << "no more video or camera frame" << std::endl;
                 auto start_time = std::chrono::high_resolution_clock::now();
                 std::vector<Detections> det_results = InferenceImages(imgBatch);
                 auto end_time = std::chrono::high_resolution_clock::now();
                 dets.insert(dets.end(), det_results.begin(), det_results.end());
                 imgs.insert(imgs.end(), imgBatch.begin(), imgBatch.end());                    
-                imgBatch.clear(); // clear
+                imgBatch.clear(); 
                 total_time += std::chrono::duration<float, std::milli>(end_time - start_time).count();
                 break;
-            }
-            else
-            {
+            } else {
                 imgBatch.emplace_back(frame.clone());
             }
         }
-        else // infer
-        {
+        else {
             auto start_time = std::chrono::high_resolution_clock::now();
             auto det_results = InferenceImages(imgBatch);
             auto end_time = std::chrono::high_resolution_clock::now();
@@ -71,7 +63,6 @@ void Detection::Inference(const std::string &input_path, const cv::String &save_
         }
     }
     Visualize(dets, imgs, save_path, fps, size);
-
 }
 
 void Detection::Inference(const std::string &input_path, const std::string &save_path) {    
@@ -83,7 +74,7 @@ void Detection::Inference(const std::string &input_path, const std::string &save
     std::vector<std::string> imgInfo;
     imgInfo.reserve(batchSize);
     float total_time = 0;
-    cuda_preprocess_init(kMaxInputImageSize);
+    cuda_preprocess_init(maxImageSize);
     for (const std::string &image_name : image_list) {
         index++;
         // TODO: figure out why double free.
@@ -147,19 +138,26 @@ void Detection::Visualize(const std::vector<Detections> &detections,
                           const cv::String save_name, 
                           int fps, cv::Size size) {
     auto fourcc = cv::VideoWriter::fourcc('m','p','4','v');
+    int font_face = cv::FONT_HERSHEY_SIMPLEX;
+    double font_scale = 0.5f;
+    float thickness = 0.5;      
     cv::VideoWriter writer(save_name, fourcc, fps, size, true);
     for (int i = 0; i < (int)frames.size(); i++){
         auto frame = frames[i];
-        if (!frame.data)
-            continue;
+        if (!frame.data) continue;
         auto bboxes = detections[i].dets;
         for(const auto &bbox : bboxes) {
             auto score = cv::format("%.3f", bbox.score);
             std::string text = class_labels[bbox.label] + "|" + score;
-            cv::putText(frame, text, cv::Point(bbox.x - bbox.w / 2, bbox.y - bbox.h / 2 - 5),
-                    cv::FONT_HERSHEY_SIMPLEX, 0.7, class_colors[bbox.label], 2);
-            cv::Rect rect(bbox.x - bbox.w / 2, bbox.y - bbox.h / 2, bbox.w, bbox.h);
-            cv::rectangle(frame, rect, class_colors[bbox.label], 2, cv::LINE_8, 0);
+            cv::Size text_size = cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+            cv::Point org;
+            org.x = bbox.x;
+            org.y = bbox.y + text_size.height + 2; 
+            cv::Rect text_back = cv::Rect(org.x, org.y - text_size.height, text_size.width, text_size.height + 5); 
+            cv::rectangle(frame, text_back, class_colors[bbox.label], -1);
+            cv::putText(frame, text, org, font_face, font_scale, cv::Scalar(255, 255, 255), thickness);
+            cv::Rect rect(bbox.x, bbox.y, bbox.w, bbox.h);
+            cv::rectangle(frame, rect, class_colors[bbox.label], 2, cv::LINE_8, 0); 
         }        
         writer.write(frame);
     }
diff --git a/src/instance_segmentation.cpp b/src/instance_segmentation.cpp
@@ -18,7 +18,6 @@ InstanceSegmentation::InstanceSegmentation(const YAML::Node &config) : Model(con
 }
 
 void InstanceSegmentation::Inference(const std::string &input_path, const cv::String &save_path, const bool video) {
-
     cv::VideoCapture capture;
     capture.open(input_path);
     cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT));        
@@ -35,33 +34,26 @@ void InstanceSegmentation::Inference(const std::string &input_path, const cv::St
     int index = 0;
     float total_time = 0;
     cv::Mat frame;
-
-    while (capture.isOpened())
-    {
+    cuda_preprocess_init(maxImageSize);
+    while (capture.isOpened()) {
         index++;
-        if (imgBatch.size() < batchSize) // get input
-        {
+        if (imgBatch.size() < batchSize) {
             capture.read(frame);
-
-            if (frame.empty())
-            {
-                sample::gLogWarning << "no more video or camera frame" << std::endl;
+            if (frame.empty()) {
+                std::cout << "no more video or camera frame" << std::endl;
                 auto start_time = std::chrono::high_resolution_clock::now();
                 std::vector<Segmentations> seg_results = InferenceImages(imgBatch);
                 auto end_time = std::chrono::high_resolution_clock::now();
                 segs.insert(segs.end(), seg_results.begin(), seg_results.end());
                 imgs.insert(imgs.end(), imgBatch.begin(), imgBatch.end());                    
-                imgBatch.clear(); // clear
+                imgBatch.clear(); 
                 total_time += std::chrono::duration<float, std::milli>(end_time - start_time).count();
                 break;
-            }
-            else
-            {
+            } else {
                 imgBatch.emplace_back(frame.clone());
             }
         }
-        else // infer
-        {
+        else {
             auto start_time = std::chrono::high_resolution_clock::now();
             auto seg_results = InferenceImages(imgBatch);
             auto end_time = std::chrono::high_resolution_clock::now();
@@ -84,7 +76,7 @@ void InstanceSegmentation::Inference(const std::string &input_path, const std::s
     std::vector<std::string> imgInfo;
     imgInfo.reserve(batchSize);
     float total_time = 0;
-    cuda_preprocess_init(kMaxInputImageSize);
+    cuda_preprocess_init(maxImageSize);
     // cuda_postprocess_init(39, imageWidth, imageHeight);
     for (const std::string &image_name : image_list) {
         index++;
@@ -176,11 +168,39 @@ void InstanceSegmentation::Visualize(const std::vector<Segmentations> &segmentat
             continue;
         auto instances = segmentations[i].segs;
         for(const auto &ins : instances) {
+            cv::Mat mask = ins.mask;
+            cv::Mat img_mask = scale_mask(mask, frame);
+            cv::Mat reg_img = cv::Mat::zeros(frame.rows, frame.cols, CV_8UC1);
+            for (int row = ins.y; row < ins.y + ins.h; row++) {	
+                if (row < 0 || row >= frame.rows) continue;
+                cv::Vec<uint8_t, 1> *data_Ptr = reg_img.ptr<cv::Vec<uint8_t, 1>> (row);
+                for (int col = ins.x; col < ins.x + ins.w; col++)
+                {
+                    if (col < 0 || col >= frame.cols) continue;
+                    data_Ptr[col][0] = 1;
+                }
+            } 
+            cv::bitwise_and(img_mask, reg_img, img_mask);
+
+            std::vector<cv::Mat> contours;
+            cv::Mat hierarchy;
+            cv::Mat colored_img = frame.clone();
+            cv::findContours(img_mask, contours, hierarchy, 
+                             cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);
+            cv::drawContours(colored_img, contours, -1, class_colors[ins.label], -1, cv::LINE_8,
+                             hierarchy, 100);                
+            frame = 0.4 * colored_img + 0.6 * frame;
+
             auto score = cv::format("%.3f", ins.score);
             std::string text = class_labels[ins.label] + "|" + score;
-            cv::putText(frame, text, cv::Point(ins.x - ins.w / 2, ins.y - ins.h / 2 - 5),
-                    cv::FONT_HERSHEY_SIMPLEX, 0.7, class_colors[ins.label], 2);
-            cv::Rect rect(ins.x - ins.w / 2, ins.y - ins.h / 2, ins.w, ins.h);
+            cv::Size text_size = cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
+            cv::Point org;
+            org.x = ins.x ;
+            org.y = ins.y + text_size.height + 2;
+            cv::Rect text_back = cv::Rect(org.x, org.y - text_size.height, text_size.width, text_size.height + 5); 
+            cv::rectangle(frame, text_back, class_colors[ins.label], -1);
+            cv::putText(frame, text, org, font_face, font_scale, cv::Scalar(255, 255, 255), thickness);
+            cv::Rect rect(ins.x, ins.y, ins.w, ins.h);
             cv::rectangle(frame, rect, class_colors[ins.label], 2, cv::LINE_8, 0);
         }        
         writer.write(frame);
diff --git a/src/yolov6.cpp b/src/yolov6.cpp
@@ -1,3 +1,3 @@
 #include "yolov6.h"
 
-YOLOv6::YOLOv6(const YAML::Node &config) : YOLO(config) {}
+YOLOv6::YOLOv6(const YAML::Node &config) : YOLOv8(config) {}