Skip to content

Commit 561ec16

Browse files
authored
Merge pull request #25 from Koldim2001/feature/multiple_crop_elements
Feature/multiple crop elements
2 parents 13e0aeb + 65dfb55 commit 561ec16

6 files changed

Lines changed: 328 additions & 282 deletions

File tree

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum
134134

135135
| **Argument** | **Type** | **Default** | **Description** |
136136
|----------------------|-------------------|-------------|-------------------------------------------------------------------------------------------------------------------------|
137-
| element_crops |MakeCropsDetectThem| | Object containing crop information. |
137+
| element_crops |MakeCropsDetectThem| | Object containing crop information. This can be either a single MakeCropsDetectThem object or a list of objects. |
138138
| nms_threshold | float | 0.3 | IoU/IoS threshold for non-maximum suppression. The lower the value, the fewer objects remain after suppression. |
139139
| match_metric | str | IOS | Matching metric, either 'IOU' or 'IOS'. |
140140
| class_agnostic_nms | bool | True | Determines the NMS mode in object detection. When set to True, NMS operates across all classes, ignoring class distinctions and suppressing less confident bounding boxes globally. Otherwise, NMS is applied separately for each class. |
@@ -286,6 +286,14 @@ shape_x, shape_y, overlap_x, overlap_y = auto_calculate_crop_values(
286286

287287
An example of working with `auto_calculate_crop_values` is presented in Google Colab notebook - [![Open In Colab][colab_badge]][colab_ex1_auto_calculate_crop_values]
288288

289+
---
290+
291+
292+
## __Implementing Patching at Different Resolutions__
293+
294+
There is an opportunity to produce cropping into patches at different resolutions. This way, small objects can be detected when cropping into smaller patches, and large objects can be detected when cropping into larger patches. As a result, the algorithm will be able to detect a wider range of object sizes in the frame. To achieve this, the image needs to be processed multiple times through MakeCropsDetectThem with different patch parameters, and then pass the list of element_crops to the CombineDetections process.
295+
296+
An example of using this approach can be seen in this Google Colab notebook - [![Open In Colab][colab_badge]][colab_ex1_different_resolutions]
289297

290298

291299
[nb_example1]: https://nbviewer.org/github/Koldim2001/YOLO-Patch-Based-Inference/blob/main/examples/example_patch_based_inference.ipynb
@@ -297,3 +305,4 @@ An example of working with `auto_calculate_crop_values` is presented in Google C
297305
[yt_link2]: https://www.youtube.com/watch?v=nBQuWa63188
298306
[colab_ex1_memory_optimize]: https://colab.research.google.com/drive/1XCpIYLMFEmGSO0XCOkSD7CcD9SFHSJPA?usp=sharing#scrollTo=DM_eCc3yXzXW
299307
[colab_ex1_auto_calculate_crop_values]: https://colab.research.google.com/drive/1XCpIYLMFEmGSO0XCOkSD7CcD9SFHSJPA?usp=sharing#scrollTo=Wkt1FkAkhCwQ
308+
[colab_ex1_different_resolutions]: !!!

examples/example_extra_functions.ipynb

Lines changed: 258 additions & 257 deletions
Large diffs are not rendered by default.

examples/example_patch_based_inference.ipynb

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@
344344
"cell_type": "markdown",
345345
"metadata": {},
346346
"source": [
347-
"### YOLOv9: "
347+
"### YOLO11: "
348348
]
349349
},
350350
{
@@ -390,7 +390,7 @@
390390
"source": [
391391
"element_crops = MakeCropsDetectThem(\n",
392392
" image=img,\n",
393-
" model_path=\"yolov9c.pt\",\n",
393+
" model_path=\"yolo11m.pt\",\n",
394394
" segment=False,\n",
395395
" show_crops=False,\n",
396396
" shape_x=600,\n",
@@ -406,7 +406,7 @@
406406
"print('Basic yolo inference:')\n",
407407
"visualize_results_usual_yolo_inference(\n",
408408
" img,\n",
409-
" model=YOLO(\"yolov9c.pt\") ,\n",
409+
" model=YOLO(\"yolo11m.pt\") ,\n",
410410
" imgsz=640,\n",
411411
" conf=0.5,\n",
412412
" iou=0.7,\n",
@@ -568,7 +568,7 @@
568568
"cell_type": "markdown",
569569
"metadata": {},
570570
"source": [
571-
"### YOLOv9-seg:"
571+
"### YOLO11-seg:"
572572
]
573573
},
574574
{
@@ -579,7 +579,7 @@
579579
"source": [
580580
"element_crops = MakeCropsDetectThem(\n",
581581
" image=img,\n",
582-
" model_path=\"yolov9e-seg.pt\",\n",
582+
" model_path=\"yolo11m-seg.pt\",\n",
583583
" segment=True,\n",
584584
" show_crops=False,\n",
585585
" shape_x=600,\n",
@@ -637,7 +637,7 @@
637637
"print('Basic yolo inference:')\n",
638638
"visualize_results_usual_yolo_inference(\n",
639639
" img,\n",
640-
" model=YOLO(\"yolov9e-seg.pt\") ,\n",
640+
" model=YOLO(\"yolo11m-seg.pt\") ,\n",
641641
" imgsz=640,\n",
642642
" conf=0.5,\n",
643643
" iou=0.7,\n",
@@ -808,7 +808,7 @@
808808
}
809809
],
810810
"source": [
811-
"from ultralytics import FastSAM\n",
811+
"from ultralytics import YOLO\n",
812812
"import matplotlib.pyplot as plt\n",
813813
"\n",
814814
"# Load the image \n",
@@ -872,8 +872,7 @@
872872
"source": [
873873
"element_crops = MakeCropsDetectThem(\n",
874874
" image=img,\n",
875-
" model=FastSAM('FastSAM-x.pt'),\n",
876-
" model_path=\"yolov8m.pt\",\n",
875+
" model=YOLO('FastSAM-x.pt'),\n",
877876
" segment=True,\n",
878877
" show_crops=True,\n",
879878
" shape_x=400,\n",
@@ -882,14 +881,15 @@
882881
" overlap_y=50,\n",
883882
" conf=0.3,\n",
884883
" iou=0.8,\n",
884+
" imgsz=1024,\n",
885885
")\n",
886-
"result = CombineDetections(element_crops, nms_threshold=0.40)\n",
886+
"result = CombineDetections(element_crops, nms_threshold=0.4)\n",
887887
"\n",
888888
"print('Basic FastSAM inference:')\n",
889889
"visualize_results_usual_yolo_inference(\n",
890890
" img,\n",
891-
" model=FastSAM('FastSAM-x.pt') ,\n",
892-
" imgsz=640,\n",
891+
" model=YOLO('FastSAM-x.pt') ,\n",
892+
" imgsz=1024,\n",
893893
" conf=0.3,\n",
894894
" iou=0.8,\n",
895895
" segment=True,\n",
@@ -1392,7 +1392,7 @@
13921392
],
13931393
"metadata": {
13941394
"kernelspec": {
1395-
"display_name": "WORK",
1395+
"display_name": "patched_yolo_infer",
13961396
"language": "python",
13971397
"name": "python3"
13981398
},

patched_yolo_infer/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ Class implementing cropping and passing crops through a neural network for detec
105105
**CombineDetections**
106106
Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum Suppression).\
107107
**Args:**
108-
- **element_crops** (*MakeCropsDetectThem*): Object containing crop information.
108+
- **element_crops** (*MakeCropsDetectThem*): Object containing crop information. This can be either a single MakeCropsDetectThem object or a list of objects.
109109
- **nms_threshold** (*float*): IoU/IoS threshold for non-maximum suppression.
110110
- **match_metric** (*str*): Matching metric, either 'IOU' or 'IOS'.
111111
- **class_agnostic_nms** (*bool*) Determines the NMS mode in object detection. When set to True, NMS operates across all classes, ignoring class distinctions and suppressing less confident bounding boxes globally. Otherwise, NMS is applied separately for each class. (Default is True)

patched_yolo_infer/nodes/CombineDetections.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from typing import Union, List
12
import torch
23
import numpy as np
34
from .MakeCropsDetectThem import MakeCropsDetectThem
@@ -46,26 +47,61 @@ class CombineDetections:
4647

4748
def __init__(
4849
self,
49-
element_crops: MakeCropsDetectThem,
50+
element_crops: Union[MakeCropsDetectThem, List[MakeCropsDetectThem]],
5051
nms_threshold=0.3,
5152
match_metric='IOS',
5253
intelligent_sorter=True,
5354
sorter_bins=5,
5455
class_agnostic_nms=True
5556
) -> None:
56-
self.class_names = element_crops.class_names_dict
57-
self.crops = element_crops.crops # List to store the CropElement objects
58-
if element_crops.resize_initial_size:
59-
self.image = element_crops.crops[0].source_image
60-
else:
61-
self.image = element_crops.crops[0].source_image_resized
6257

6358
self.nms_threshold = nms_threshold # IOU or IOS treshold for NMS
6459
self.match_metric = match_metric
6560
self.intelligent_sorter = intelligent_sorter # enable sorting by area and confidence parameter
6661
self.sorter_bins = sorter_bins
6762
self.class_agnostic_nms = class_agnostic_nms
6863

64+
# Check if element_crops is a list
65+
if isinstance(element_crops, list):
66+
# Ensure all elements in the list have the same source_image and other params
67+
first_image = element_crops[0].crops[0].source_image
68+
first_element_segment_status = element_crops[0].segment
69+
first_element_memory_optimize_status = element_crops[0].memory_optimize
70+
for element in element_crops:
71+
if not np.array_equal(element.crops[0].source_image, first_image):
72+
raise ValueError(
73+
"The source images in element_crops differ, "
74+
"so combining results from these objects is not possible."
75+
)
76+
if not element.resize_initial_size:
77+
raise ValueError(
78+
"When working with a list of element_crops, "
79+
"resize_initial_size should be True everywhere."
80+
)
81+
if (
82+
first_element_segment_status != element.segment
83+
or first_element_memory_optimize_status != element.memory_optimize
84+
):
85+
raise ValueError(
86+
"The segment or memory_optimize attributes of element_crops differ, "
87+
"so processing cannot be performed."
88+
)
89+
90+
self.class_names = element_crops[0].class_names_dict
91+
self.crops = [crop for element in element_crops for crop in element.crops]
92+
self.image = element_crops[0].crops[0].source_image
93+
self.segment = element_crops[0].segment
94+
self.memory_optimize = element_crops[0].memory_optimize
95+
else:
96+
self.class_names = element_crops.class_names_dict
97+
self.crops = element_crops.crops # List to store the CropElement objects
98+
if element_crops.resize_initial_size:
99+
self.image = element_crops.crops[0].source_image
100+
else:
101+
self.image = element_crops.crops[0].source_image_resized
102+
self.segment = element_crops.segment
103+
self.memory_optimize = element_crops.memory_optimize
104+
69105
# Combinate detections of all patches
70106
(
71107
self.detected_conf_list_full,
@@ -108,13 +144,13 @@ def __init__(
108144
self.filtered_classes_names = [self.detected_cls_names_list_full[i] for i in self.filtered_indices]
109145

110146
# Masks filtering:
111-
if element_crops.segment and not element_crops.memory_optimize:
147+
if self.segment and not self.memory_optimize:
112148
self.filtered_masks = [self.detected_masks_list_full[i] for i in self.filtered_indices]
113149
else:
114150
self.filtered_masks = []
115151

116152
# Polygons filtering:
117-
if element_crops.segment and element_crops.memory_optimize:
153+
if self.segment and self.memory_optimize:
118154
self.filtered_polygons = [self.detected_polygons_list_full[i] for i in self.filtered_indices]
119155
else:
120156
self.filtered_polygons = []

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
long_description = "\n" + fh.read()
99

1010

11-
VERSION = '1.3.1'
11+
VERSION = '1.3.2'
1212
DESCRIPTION = '''Patch-Based-Inference for detection/segmentation of small objects in images.'''
1313

1414
setup(

0 commit comments

Comments
 (0)