Skip to content

Commit 2829a90

Browse files
committed
fix bugs
1 parent 374af6f commit 2829a90

11 files changed

Lines changed: 78 additions & 57 deletions

File tree

include/detection.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ struct Box {
5656
float score;
5757
int label;
5858
};
59-
const static int kMaxInputImageSize = 1024 * 1024;
59+
const static int maxImageSize = 2048 * 2048;
6060
struct Detections {
6161
std::vector<Box> dets;
6262
};

include/instance_segmentation.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ struct Instance {
1111
int label;
1212
float score;
1313
cv::Mat mask;
14-
float pred_mask[32];
1514
};
1615

1716
struct Segmentations {

include/yolov6.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#ifndef YOLOV6_H
22
#define YOLOV6_H
33

4-
#include "yolo.h"
4+
#include "yolov8.h"
55

6-
class YOLOv6 : public YOLO {
6+
class YOLOv6 : public YOLOv8 {
77
public:
88
explicit YOLOv6(const YAML::Node &config);
99
};

object_detection/main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ int main(int argc, char **argv)
1313
auto cfg = cfg_dir + "/" + model_arch + cfg_suffix;
1414
auto model = build_model(model_arch, cfg);
1515
check_dir(savepath, false);
16-
if (model == nullptr)
17-
return -1;
16+
if (model == nullptr) return -1;
1817
model->LoadEngine();
19-
if (video)
18+
if (video) {
19+
savepath = savepath + "test.mp4";
2020
model->Inference(inputpath, savepath, true);
21-
else
21+
} else
2222
model->Inference(inputpath, savepath);
2323
return 0;
2424
}

object_detection/rtdetr/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cmake_minimum_required(VERSION 3.10)
22

3-
set(CMAKE_BUILD_TYPE "Release")
3+
set(CMAKE_BUILD_TYPE "Debug")
4+
# set(CMAKE_BUILD_TYPE "Release")
45

56
project(rtdetr)
67

object_detection/yolov5/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cmake_minimum_required(VERSION 3.10)
22

3-
set(CMAKE_BUILD_TYPE "Release")
3+
set(CMAKE_BUILD_TYPE "Debug")
4+
# set(CMAKE_BUILD_TYPE "Release")
45

56
project(yolov5)
67

object_detection/yolov7/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cmake_minimum_required(VERSION 3.10)
22

3-
set(CMAKE_BUILD_TYPE "Release")
3+
set(CMAKE_BUILD_TYPE "Debug")
4+
# set(CMAKE_BUILD_TYPE "Release")
45

56
project(yolov7)
67

object_detection/yolov8/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
cmake_minimum_required(VERSION 3.10)
22

3-
set(CMAKE_BUILD_TYPE "Release")
3+
set(CMAKE_BUILD_TYPE "Debug")
4+
# set(CMAKE_BUILD_TYPE "Release")
45

56
project(yolov8)
67

src/detection.cpp

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ Detection::Detection(const YAML::Node &config) : Model(config) {
1717
}
1818

1919
void Detection::Inference(const std::string &input_path, const cv::String &save_path, const bool video) {
20-
2120
cv::VideoCapture capture;
2221
capture.open(input_path);
2322
cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT));
@@ -34,33 +33,26 @@ void Detection::Inference(const std::string &input_path, const cv::String &save_
3433
int index = 0;
3534
float total_time = 0;
3635
cv::Mat frame;
37-
38-
while (capture.isOpened())
39-
{
36+
cuda_preprocess_init(maxImageSize);
37+
while (capture.isOpened()) {
4038
index++;
41-
if (imgBatch.size() < batchSize) // get input
42-
{
39+
if (imgBatch.size() < batchSize) {
4340
capture.read(frame);
44-
45-
if (frame.empty())
46-
{
47-
sample::gLogWarning << "no more video or camera frame" << std::endl;
41+
if (frame.empty()) {
42+
std::cout << "no more video or camera frame" << std::endl;
4843
auto start_time = std::chrono::high_resolution_clock::now();
4944
std::vector<Detections> det_results = InferenceImages(imgBatch);
5045
auto end_time = std::chrono::high_resolution_clock::now();
5146
dets.insert(dets.end(), det_results.begin(), det_results.end());
5247
imgs.insert(imgs.end(), imgBatch.begin(), imgBatch.end());
53-
imgBatch.clear(); // clear
48+
imgBatch.clear();
5449
total_time += std::chrono::duration<float, std::milli>(end_time - start_time).count();
5550
break;
56-
}
57-
else
58-
{
51+
} else {
5952
imgBatch.emplace_back(frame.clone());
6053
}
6154
}
62-
else // infer
63-
{
55+
else {
6456
auto start_time = std::chrono::high_resolution_clock::now();
6557
auto det_results = InferenceImages(imgBatch);
6658
auto end_time = std::chrono::high_resolution_clock::now();
@@ -71,7 +63,6 @@ void Detection::Inference(const std::string &input_path, const cv::String &save_
7163
}
7264
}
7365
Visualize(dets, imgs, save_path, fps, size);
74-
7566
}
7667

7768
void Detection::Inference(const std::string &input_path, const std::string &save_path) {
@@ -83,7 +74,7 @@ void Detection::Inference(const std::string &input_path, const std::string &save
8374
std::vector<std::string> imgInfo;
8475
imgInfo.reserve(batchSize);
8576
float total_time = 0;
86-
cuda_preprocess_init(kMaxInputImageSize);
77+
cuda_preprocess_init(maxImageSize);
8778
for (const std::string &image_name : image_list) {
8879
index++;
8980
// TODO: figure out why double free.
@@ -147,19 +138,26 @@ void Detection::Visualize(const std::vector<Detections> &detections,
147138
const cv::String save_name,
148139
int fps, cv::Size size) {
149140
auto fourcc = cv::VideoWriter::fourcc('m','p','4','v');
141+
int font_face = cv::FONT_HERSHEY_SIMPLEX;
142+
double font_scale = 0.5f;
143+
float thickness = 0.5;
150144
cv::VideoWriter writer(save_name, fourcc, fps, size, true);
151145
for (int i = 0; i < (int)frames.size(); i++){
152146
auto frame = frames[i];
153-
if (!frame.data)
154-
continue;
147+
if (!frame.data) continue;
155148
auto bboxes = detections[i].dets;
156149
for(const auto &bbox : bboxes) {
157150
auto score = cv::format("%.3f", bbox.score);
158151
std::string text = class_labels[bbox.label] + "|" + score;
159-
cv::putText(frame, text, cv::Point(bbox.x - bbox.w / 2, bbox.y - bbox.h / 2 - 5),
160-
cv::FONT_HERSHEY_SIMPLEX, 0.7, class_colors[bbox.label], 2);
161-
cv::Rect rect(bbox.x - bbox.w / 2, bbox.y - bbox.h / 2, bbox.w, bbox.h);
162-
cv::rectangle(frame, rect, class_colors[bbox.label], 2, cv::LINE_8, 0);
152+
cv::Size text_size = cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
153+
cv::Point org;
154+
org.x = bbox.x;
155+
org.y = bbox.y + text_size.height + 2;
156+
cv::Rect text_back = cv::Rect(org.x, org.y - text_size.height, text_size.width, text_size.height + 5);
157+
cv::rectangle(frame, text_back, class_colors[bbox.label], -1);
158+
cv::putText(frame, text, org, font_face, font_scale, cv::Scalar(255, 255, 255), thickness);
159+
cv::Rect rect(bbox.x, bbox.y, bbox.w, bbox.h);
160+
cv::rectangle(frame, rect, class_colors[bbox.label], 2, cv::LINE_8, 0);
163161
}
164162
writer.write(frame);
165163
}

src/instance_segmentation.cpp

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ InstanceSegmentation::InstanceSegmentation(const YAML::Node &config) : Model(con
1818
}
1919

2020
void InstanceSegmentation::Inference(const std::string &input_path, const cv::String &save_path, const bool video) {
21-
2221
cv::VideoCapture capture;
2322
capture.open(input_path);
2423
cv::Size size = cv::Size((int)capture.get(cv::CAP_PROP_FRAME_WIDTH), (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT));
@@ -35,33 +34,26 @@ void InstanceSegmentation::Inference(const std::string &input_path, const cv::St
3534
int index = 0;
3635
float total_time = 0;
3736
cv::Mat frame;
38-
39-
while (capture.isOpened())
40-
{
37+
cuda_preprocess_init(maxImageSize);
38+
while (capture.isOpened()) {
4139
index++;
42-
if (imgBatch.size() < batchSize) // get input
43-
{
40+
if (imgBatch.size() < batchSize) {
4441
capture.read(frame);
45-
46-
if (frame.empty())
47-
{
48-
sample::gLogWarning << "no more video or camera frame" << std::endl;
42+
if (frame.empty()) {
43+
std::cout << "no more video or camera frame" << std::endl;
4944
auto start_time = std::chrono::high_resolution_clock::now();
5045
std::vector<Segmentations> seg_results = InferenceImages(imgBatch);
5146
auto end_time = std::chrono::high_resolution_clock::now();
5247
segs.insert(segs.end(), seg_results.begin(), seg_results.end());
5348
imgs.insert(imgs.end(), imgBatch.begin(), imgBatch.end());
54-
imgBatch.clear(); // clear
49+
imgBatch.clear();
5550
total_time += std::chrono::duration<float, std::milli>(end_time - start_time).count();
5651
break;
57-
}
58-
else
59-
{
52+
} else {
6053
imgBatch.emplace_back(frame.clone());
6154
}
6255
}
63-
else // infer
64-
{
56+
else {
6557
auto start_time = std::chrono::high_resolution_clock::now();
6658
auto seg_results = InferenceImages(imgBatch);
6759
auto end_time = std::chrono::high_resolution_clock::now();
@@ -84,7 +76,7 @@ void InstanceSegmentation::Inference(const std::string &input_path, const std::s
8476
std::vector<std::string> imgInfo;
8577
imgInfo.reserve(batchSize);
8678
float total_time = 0;
87-
cuda_preprocess_init(kMaxInputImageSize);
79+
cuda_preprocess_init(maxImageSize);
8880
// cuda_postprocess_init(39, imageWidth, imageHeight);
8981
for (const std::string &image_name : image_list) {
9082
index++;
@@ -176,11 +168,39 @@ void InstanceSegmentation::Visualize(const std::vector<Segmentations> &segmentat
176168
continue;
177169
auto instances = segmentations[i].segs;
178170
for(const auto &ins : instances) {
171+
cv::Mat mask = ins.mask;
172+
cv::Mat img_mask = scale_mask(mask, frame);
173+
cv::Mat reg_img = cv::Mat::zeros(frame.rows, frame.cols, CV_8UC1);
174+
for (int row = ins.y; row < ins.y + ins.h; row++) {
175+
if (row < 0 || row >= frame.rows) continue;
176+
cv::Vec<uint8_t, 1> *data_Ptr = reg_img.ptr<cv::Vec<uint8_t, 1>> (row);
177+
for (int col = ins.x; col < ins.x + ins.w; col++)
178+
{
179+
if (col < 0 || col >= frame.cols) continue;
180+
data_Ptr[col][0] = 1;
181+
}
182+
}
183+
cv::bitwise_and(img_mask, reg_img, img_mask);
184+
185+
std::vector<cv::Mat> contours;
186+
cv::Mat hierarchy;
187+
cv::Mat colored_img = frame.clone();
188+
cv::findContours(img_mask, contours, hierarchy,
189+
cv::RETR_CCOMP, cv::CHAIN_APPROX_SIMPLE);
190+
cv::drawContours(colored_img, contours, -1, class_colors[ins.label], -1, cv::LINE_8,
191+
hierarchy, 100);
192+
frame = 0.4 * colored_img + 0.6 * frame;
193+
179194
auto score = cv::format("%.3f", ins.score);
180195
std::string text = class_labels[ins.label] + "|" + score;
181-
cv::putText(frame, text, cv::Point(ins.x - ins.w / 2, ins.y - ins.h / 2 - 5),
182-
cv::FONT_HERSHEY_SIMPLEX, 0.7, class_colors[ins.label], 2);
183-
cv::Rect rect(ins.x - ins.w / 2, ins.y - ins.h / 2, ins.w, ins.h);
196+
cv::Size text_size = cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
197+
cv::Point org;
198+
org.x = ins.x ;
199+
org.y = ins.y + text_size.height + 2;
200+
cv::Rect text_back = cv::Rect(org.x, org.y - text_size.height, text_size.width, text_size.height + 5);
201+
cv::rectangle(frame, text_back, class_colors[ins.label], -1);
202+
cv::putText(frame, text, org, font_face, font_scale, cv::Scalar(255, 255, 255), thickness);
203+
cv::Rect rect(ins.x, ins.y, ins.w, ins.h);
184204
cv::rectangle(frame, rect, class_colors[ins.label], 2, cv::LINE_8, 0);
185205
}
186206
writer.write(frame);

0 commit comments

Comments
 (0)