Skip to content

Commit bc66236

Browse files
authored
Merge pull request #16 from Koldim2001/new_visualize_and_converter
New visualize and converter
2 parents 04267c4 + 8fe0a85 commit bc66236

6 files changed

Lines changed: 79 additions & 15 deletions

File tree

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ Possible arguments of the ```visualize_results``` function:
172172
| random_object_colors | bool | False | If true, colors for each object are selected randomly. |
173173
| show_confidences | bool | False | If true and show_class=True, confidences near class are visualized. |
174174
| axis_off | bool | True | If true, axis is turned off in the final visualization. |
175-
| show_classes_list | list | [] | If empty, visualize all classes. Otherwise, visualize only classes in the list. |
175+
| show_classes_list | list | [] | If empty, visualize all classes. Otherwise, visualize only classes in the list. |
176+
| list_of_class_colors | list | None | A list of tuples representing the colors for each class in BGR format. If provided, these colors will be used for displaying the classes instead of random colors. |
176177
| return_image_array | bool | False | If True, the function returns the image (BGR np.array) instead of displaying it. |
177178

178179

patched_yolo_infer/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ The library also provides a sleek customization of the visualization of the infe
77
**Model Support**: The library offers support for multiple ultralytics deep learning [models](https://docs.ultralytics.com/models/), such as YOLOv8, YOLOv8-seg, YOLOv9, YOLOv9-seg, YOLOv10, FastSAM, and RTDETR. Users can select from pre-trained options or utilize custom-trained models to best meet their task requirements.
88

99

10+
__Explanation of how Patch-Based-Inference works:__
11+
12+
<p align="center">
13+
<img width="600" alt="patched_inf_explanation" src="https://github.com/Koldim2001/YOLO-Patch-Based-Inference/blob/main/readme_content/patched_inf_explanation.gif?raw=true">
14+
</p>
15+
1016
## Installation
1117
You can install the library via pip:
1218

@@ -141,6 +147,7 @@ Visualizes custom results of object detection or segmentation on an image.
141147
- **show_confidences** (*bool*): If true and show_class=True, confidences near class are visualized. Default is False.
142148
- **axis_off** (*bool*): If true, axis is turned off in the final visualization. Default is True.
143149
- **show_classes_list** (*list*): If empty, visualize all classes. Otherwise, visualize only classes in the list.
150+
- **list_of_class_colors** (*list*) A list of tuples representing the colors for each class in BGR format. If provided, these colors will be used for displaying the classes instead of random colors.
144151
- **return_image_array** (*bool*): If True, the function returns the image (BGR np.array) instead of displaying it. Default is False.
145152

146153

patched_yolo_infer/__init__.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
from .functions_extra import visualize_results_usual_yolo_inference, get_crops, visualize_results
1+
from .functions_extra import (
2+
visualize_results_usual_yolo_inference,
3+
get_crops,
4+
visualize_results,
5+
create_masks_from_polygons,
6+
)
27

38
from .nodes.MakeCropsDetectThem import MakeCropsDetectThem
49
from .nodes.CombineDetections import CombineDetections
5-
from .elements.CropElement import CropElement
10+
from .elements.CropElement import CropElement

patched_yolo_infer/elements/CropElement.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def calculate_inference(self, model, imgsz=640, conf=0.35, iou=0.7, segment=Fals
5555
self.polygons = [mask.astype(np.uint16) for mask in pred.masks.xy]
5656
else:
5757
# Get the masks
58-
self.detected_masks = pred.masks.data.cpu().numpy()
58+
self.detected_masks = pred.masks.data.cpu().numpy().astype(np.uint8)
5959

6060

6161
def calculate_real_values(self):
@@ -88,7 +88,7 @@ def calculate_real_values(self):
8888
x_start_global:x_start_global+self.crop.shape[1]] = mask_resized
8989

9090
# Append the masked image to the list of detected_masks_real
91-
self.detected_masks_real.append(black_image)
91+
self.detected_masks_real.append(black_image.astype(np.uint8))
9292

9393
if self.polygons is not None:
9494
# Adjust the mask coordinates
@@ -116,7 +116,7 @@ def resize_results(self):
116116
# Resize mask
117117
mask_resized = cv2.resize(mask, (self.source_image.shape[1], self.source_image.shape[0]),
118118
interpolation=cv2.INTER_NEAREST)
119-
resized_masks.append(mask_resized)
119+
resized_masks.append(mask_resized.astype(np.uint8))
120120

121121

122122
for polygon in self.detected_polygons_real:

patched_yolo_infer/functions_extra.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def visualize_results_usual_yolo_inference(
2727
show_confidences=False,
2828
axis_off=True,
2929
show_classes_list=[],
30+
list_of_class_colors=None,
3031
return_image_array=False,
3132
inference_extra_args=None,
3233
):
@@ -44,8 +45,8 @@ def visualize_results_usual_yolo_inference(
4445
show_class (bool): Whether to show class labels. Default is True.
4546
fill_mask (bool): Whether to fill the segmented regions with color. Default is False.
4647
alpha (float): The transparency of filled masks. Default is 0.3.
47-
color_class_background (tuple): The background bgr color for class labels. Default is (0, 0, 255) (red).
48-
color_class_text (tuple): The text color for class labels. Default is (255, 255, 255) (white).
48+
color_class_background (tuple): The background BGR color for class labels. Default is (0, 0, 255) (red).
49+
color_class_text (tuple): The text BGR color for class labels. Default is (255, 255, 255) (white).
4950
thickness (int): The thickness of bounding box and text. Default is 4.
5051
font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
5152
font_scale (float): The scale factor for font size. Default is 1.5.
@@ -56,6 +57,9 @@ def visualize_results_usual_yolo_inference(
5657
axis_off (bool): If True, axis is turned off in the final visualization.
5758
show_classes_list (list): If empty, visualize all classes. Otherwise, visualize only classes in the list.
5859
inference_extra_args (dict/None): Dictionary with extra ultralytics inference parameters.
60+
list_of_class_colors (list/None): A list of tuples representing the colors for each class in BGR format. If provided,
61+
these colors will be used for displaying the classes instead of random colors. The number of tuples
62+
in the list must match the number of possible classes in the network.
5963
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it.
6064
Default is False.
6165
@@ -106,10 +110,12 @@ def visualize_results_usual_yolo_inference(
106110

107111
if random_object_colors:
108112
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
109-
else:
113+
elif list_of_class_colors is None:
110114
# Assign color according to class
111115
random.seed(int(classes[i] + delta_colors))
112116
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
117+
else:
118+
color = list_of_class_colors[classes[i]]
113119

114120
box = boxes[i]
115121
x_min, y_min, x_max, y_max = box
@@ -278,6 +284,7 @@ def visualize_results(
278284
show_confidences=False,
279285
axis_off=True,
280286
show_classes_list=[],
287+
list_of_class_colors=None,
281288
return_image_array=False
282289
):
283290
"""
@@ -295,8 +302,8 @@ def visualize_results(
295302
show_class (bool): Whether to show class labels. Default is True.
296303
fill_mask (bool): Whether to fill the segmented regions with color. Default is False.
297304
alpha (float): The transparency of filled masks. Default is 0.3.
298-
color_class_background (tuple): The background bgr color for class labels. Default is (0, 0, 255) (red).
299-
color_class_text (tuple): The text color for class labels. Default is (255, 255, 255) (white).
305+
color_class_background (tuple): The background BGR color for class labels. Default is (0, 0, 255) (red).
306+
color_class_text (tuple): The text BGR color for class labels. Default is (255, 255, 255) (white).
300307
thickness (int): The thickness of bounding box and text. Default is 4.
301308
font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
302309
font_scale (float): The scale factor for font size. Default is 1.5.
@@ -306,8 +313,10 @@ def visualize_results(
306313
show_confidences (bool): If true and show_class=True, confidences near class are visualized. Default is False.
307314
axis_off (bool): If true, axis is turned off in the final visualization. Default is True.
308315
show_classes_list (list): If empty, visualize all classes. Otherwise, visualize only classes in the list.
309-
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it.
310-
Default is False.
316+
list_of_class_colors (list/None): A list of tuples representing the colors for each class in BGR format. If provided,
317+
these colors will be used for displaying the classes instead of random colors. The number of tuples
318+
in the list must match the number of possible classes in the network.
319+
return_image_array (bool): If True, the function returns the image bgr array instead of displaying it. Default is False.
311320
312321
Returns:
313322
None/np.array
@@ -332,10 +341,12 @@ def visualize_results(
332341

333342
if random_object_colors:
334343
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
335-
else:
344+
elif list_of_class_colors is None:
336345
# Assign color according to class
337346
random.seed(int(classes_ids[i] + delta_colors))
338347
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
348+
else:
349+
color = list_of_class_colors[classes_ids[i]]
339350

340351
box = boxes[i]
341352
x_min, y_min, x_max, y_max = box
@@ -409,3 +420,43 @@ def visualize_results(
409420
if axis_off:
410421
plt.axis('off')
411422
plt.show()
423+
424+
425+
def create_masks_from_polygons(polygons, image):
426+
"""
427+
Create binary masks from a list of polygons.
428+
429+
This function takes a list of polygons and an image, and generates binary masks
430+
where each mask corresponds to one polygon. The masks are boolean arrays with
431+
the same dimensions as the input image, where the regions covered by the polygons
432+
are marked as True.
433+
434+
Parameters:
435+
polygons (list of numpy.ndarray): A list of polygons, where each polygon is
436+
represented as a numpy array of shape (N, 2) containing N (x, y) coordinates.
437+
image (numpy.ndarray): The input image, used to determine the dimensions of the masks.
438+
439+
Returns:
440+
list of numpy.ndarray: A list of binary masks, where each mask is a boolean
441+
numpy array of the same dimensions as the input image.
442+
"""
443+
# Get the dimensions of the image
444+
height, width = image.shape[:2]
445+
446+
# Create empty masks
447+
masks = []
448+
449+
for polygon in polygons:
450+
if len(polygon) > 0:
451+
points = np.array(polygon.reshape((-1, 1, 2)), dtype=np.int32)
452+
453+
# Create an empty mask with the same size as the image
454+
mask = np.zeros((height, width), dtype=np.uint8)
455+
456+
# Draw the polygon on the mask
457+
cv2.fillPoly(mask, [points], 1)
458+
459+
# Add the mask to the list
460+
masks.append(mask)
461+
462+
return masks

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
long_description = "\n" + fh.read()
99

1010

11-
VERSION = '1.2.8'
11+
VERSION = '1.2.9'
1212
DESCRIPTION = '''Patch-Based-Inference for detection/segmentation of small objects in images.'''
1313

1414
setup(

0 commit comments

Comments
 (0)