Merge pull request #25 from Koldim2001/feature/multiple_crop_elements

Koldim2001 · web-flow · commit 561ec160eb40 · 2024-10-08T15:32:28.000+03:00
Feature/multiple crop elements
diff --git a/README.md b/README.md
@@ -134,7 +134,7 @@ Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum
 
 | **Argument**         | **Type**          | **Default** | **Description**                                                                                                         |
 |----------------------|-------------------|-------------|-------------------------------------------------------------------------------------------------------------------------|
-| element_crops        |MakeCropsDetectThem|             | Object containing crop information.                                                                                     |
+| element_crops        |MakeCropsDetectThem|             | Object containing crop information. This can be either a single MakeCropsDetectThem object or a list of objects.                                                                                    |
 | nms_threshold        | float             | 0.3         | IoU/IoS threshold for non-maximum suppression. The lower the value, the fewer objects remain after suppression.                                                                          |
 | match_metric         | str               | IOS         | Matching metric, either 'IOU' or 'IOS'.                                                                                 |
 | class_agnostic_nms   | bool              | True         | Determines the NMS mode in object detection. When set to True, NMS operates across all classes, ignoring class distinctions and suppressing less confident bounding boxes globally. Otherwise, NMS is applied separately for each class. |
@@ -286,6 +286,14 @@ shape_x, shape_y, overlap_x, overlap_y = auto_calculate_crop_values(
 
 An example of working with `auto_calculate_crop_values` is presented in Google Colab notebook - [![Open In Colab][colab_badge]][colab_ex1_auto_calculate_crop_values]
 
+---
+
+
+## __Implementing Patching at Different Resolutions__
+
+There is an opportunity to produce cropping into patches at different resolutions. This way, small objects can be detected when cropping into smaller patches, and large objects can be detected when cropping into larger patches. As a result, the algorithm will be able to detect a wider range of object sizes in the frame. To achieve this, the image needs to be processed multiple times through MakeCropsDetectThem with different patch parameters, and then pass the list of element_crops to the CombineDetections process.
+
+An example of using this approach can be seen in this Google Colab notebook - [![Open In Colab][colab_badge]][colab_ex1_different_resolutions]
 
 
 [nb_example1]: https://nbviewer.org/github/Koldim2001/YOLO-Patch-Based-Inference/blob/main/examples/example_patch_based_inference.ipynb
@@ -297,3 +305,4 @@ An example of working with `auto_calculate_crop_values` is presented in Google C
 [yt_link2]: https://www.youtube.com/watch?v=nBQuWa63188
 [colab_ex1_memory_optimize]: https://colab.research.google.com/drive/1XCpIYLMFEmGSO0XCOkSD7CcD9SFHSJPA?usp=sharing#scrollTo=DM_eCc3yXzXW
 [colab_ex1_auto_calculate_crop_values]: https://colab.research.google.com/drive/1XCpIYLMFEmGSO0XCOkSD7CcD9SFHSJPA?usp=sharing#scrollTo=Wkt1FkAkhCwQ
+[colab_ex1_different_resolutions]: !!!
diff --git a/examples/example_extra_functions.ipynb b/examples/example_extra_functions.ipynb
diff --git a/examples/example_patch_based_inference.ipynb b/examples/example_patch_based_inference.ipynb
@@ -344,7 +344,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### YOLOv9: "
+    "### YOLO11: "
    ]
   },
   {
@@ -390,7 +390,7 @@
    "source": [
     "element_crops = MakeCropsDetectThem(\n",
     "    image=img,\n",
-    "    model_path=\"yolov9c.pt\",\n",
+    "    model_path=\"yolo11m.pt\",\n",
     "    segment=False,\n",
     "    show_crops=False,\n",
     "    shape_x=600,\n",
@@ -406,7 +406,7 @@
     "print('Basic yolo inference:')\n",
     "visualize_results_usual_yolo_inference(\n",
     "    img,\n",
-    "    model=YOLO(\"yolov9c.pt\") ,\n",
+    "    model=YOLO(\"yolo11m.pt\") ,\n",
     "    imgsz=640,\n",
     "    conf=0.5,\n",
     "    iou=0.7,\n",
@@ -568,7 +568,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### YOLOv9-seg:"
+    "### YOLO11-seg:"
    ]
   },
   {
@@ -579,7 +579,7 @@
    "source": [
     "element_crops = MakeCropsDetectThem(\n",
     "    image=img,\n",
-    "    model_path=\"yolov9e-seg.pt\",\n",
+    "    model_path=\"yolo11m-seg.pt\",\n",
     "    segment=True,\n",
     "    show_crops=False,\n",
     "    shape_x=600,\n",
@@ -637,7 +637,7 @@
     "print('Basic yolo inference:')\n",
     "visualize_results_usual_yolo_inference(\n",
     "    img,\n",
-    "    model=YOLO(\"yolov9e-seg.pt\") ,\n",
+    "    model=YOLO(\"yolo11m-seg.pt\") ,\n",
     "    imgsz=640,\n",
     "    conf=0.5,\n",
     "    iou=0.7,\n",
@@ -808,7 +808,7 @@
     }
    ],
    "source": [
-    "from ultralytics import FastSAM\n",
+    "from ultralytics import YOLO\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Load the image \n",
@@ -872,8 +872,7 @@
    "source": [
     "element_crops = MakeCropsDetectThem(\n",
     "    image=img,\n",
-    "    model=FastSAM('FastSAM-x.pt'),\n",
-    "    model_path=\"yolov8m.pt\",\n",
+    "    model=YOLO('FastSAM-x.pt'),\n",
     "    segment=True,\n",
     "    show_crops=True,\n",
     "    shape_x=400,\n",
@@ -882,14 +881,15 @@
     "    overlap_y=50,\n",
     "    conf=0.3,\n",
     "    iou=0.8,\n",
+    "    imgsz=1024,\n",
     ")\n",
-    "result = CombineDetections(element_crops, nms_threshold=0.40)\n",
+    "result = CombineDetections(element_crops, nms_threshold=0.4)\n",
     "\n",
     "print('Basic FastSAM inference:')\n",
     "visualize_results_usual_yolo_inference(\n",
     "    img,\n",
-    "    model=FastSAM('FastSAM-x.pt') ,\n",
-    "    imgsz=640,\n",
+    "    model=YOLO('FastSAM-x.pt') ,\n",
+    "    imgsz=1024,\n",
     "    conf=0.3,\n",
     "    iou=0.8,\n",
     "    segment=True,\n",
@@ -1392,7 +1392,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "WORK",
+   "display_name": "patched_yolo_infer",
    "language": "python",
    "name": "python3"
   },
diff --git a/patched_yolo_infer/README.md b/patched_yolo_infer/README.md
@@ -105,7 +105,7 @@ Class implementing cropping and passing crops through a neural network for detec
 **CombineDetections**
 Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum Suppression).\
 **Args:**
-- **element_crops** (*MakeCropsDetectThem*): Object containing crop information.
+- **element_crops** (*MakeCropsDetectThem*): Object containing crop information. This can be either a single MakeCropsDetectThem object or a list of objects. 
 - **nms_threshold** (*float*): IoU/IoS threshold for non-maximum suppression.
 - **match_metric** (*str*): Matching metric, either 'IOU' or 'IOS'.
 - **class_agnostic_nms** (*bool*) Determines the NMS mode in object detection. When set to True, NMS operates across all classes, ignoring class distinctions and suppressing less confident bounding boxes globally. Otherwise, NMS is applied separately for each class. (Default is True)
diff --git a/patched_yolo_infer/nodes/CombineDetections.py b/patched_yolo_infer/nodes/CombineDetections.py
@@ -1,3 +1,4 @@
+from typing import Union, List
 import torch
 import numpy as np
 from .MakeCropsDetectThem import MakeCropsDetectThem
@@ -46,26 +47,61 @@ class CombineDetections:
 
     def __init__(
         self,
-        element_crops: MakeCropsDetectThem,
+        element_crops: Union[MakeCropsDetectThem, List[MakeCropsDetectThem]],
         nms_threshold=0.3,
         match_metric='IOS',
         intelligent_sorter=True,
         sorter_bins=5,
         class_agnostic_nms=True
     ) -> None:
-        self.class_names = element_crops.class_names_dict 
-        self.crops = element_crops.crops  # List to store the CropElement objects
-        if element_crops.resize_initial_size:
-            self.image = element_crops.crops[0].source_image
-        else:
-            self.image = element_crops.crops[0].source_image_resized
 
         self.nms_threshold = nms_threshold  # IOU or IOS treshold for NMS
         self.match_metric = match_metric 
         self.intelligent_sorter = intelligent_sorter # enable sorting by area and confidence parameter
         self.sorter_bins = sorter_bins
         self.class_agnostic_nms = class_agnostic_nms
 
+        # Check if element_crops is a list
+        if isinstance(element_crops, list):
+            # Ensure all elements in the list have the same source_image and other params
+            first_image = element_crops[0].crops[0].source_image
+            first_element_segment_status = element_crops[0].segment
+            first_element_memory_optimize_status = element_crops[0].memory_optimize
+            for element in element_crops:
+                if not np.array_equal(element.crops[0].source_image, first_image):
+                    raise ValueError(
+                        "The source images in element_crops differ, "
+                        "so combining results from these objects is not possible."
+                    )
+                if not element.resize_initial_size:
+                    raise ValueError(
+                        "When working with a list of element_crops, "
+                        "resize_initial_size should be True everywhere."
+                    )
+                if (
+                    first_element_segment_status != element.segment
+                    or first_element_memory_optimize_status != element.memory_optimize
+                ):
+                    raise ValueError(
+                        "The segment or memory_optimize attributes of element_crops differ, "
+                        "so processing cannot be performed."
+                    )
+            
+            self.class_names = element_crops[0].class_names_dict
+            self.crops = [crop for element in element_crops for crop in element.crops]
+            self.image = element_crops[0].crops[0].source_image
+            self.segment = element_crops[0].segment
+            self.memory_optimize = element_crops[0].memory_optimize
+        else:
+            self.class_names = element_crops.class_names_dict
+            self.crops = element_crops.crops  # List to store the CropElement objects
+            if element_crops.resize_initial_size:
+                self.image = element_crops.crops[0].source_image
+            else:
+                self.image = element_crops.crops[0].source_image_resized
+            self.segment = element_crops.segment
+            self.memory_optimize = element_crops.memory_optimize
+
         # Combinate detections of all patches
         (
             self.detected_conf_list_full,
@@ -108,13 +144,13 @@ def __init__(
         self.filtered_classes_names = [self.detected_cls_names_list_full[i] for i in self.filtered_indices]
 
         # Masks filtering:
-        if element_crops.segment and not element_crops.memory_optimize:
+        if self.segment and not self.memory_optimize:
             self.filtered_masks = [self.detected_masks_list_full[i] for i in self.filtered_indices]
         else:
             self.filtered_masks = []
 
         # Polygons filtering:
-        if element_crops.segment and element_crops.memory_optimize:
+        if self.segment and self.memory_optimize:
             self.filtered_polygons = [self.detected_polygons_list_full[i] for i in self.filtered_indices]
         else:
             self.filtered_polygons = []
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
     long_description = "\n" + fh.read()
 
 
-VERSION = '1.3.1'
+VERSION = '1.3.2'
 DESCRIPTION = '''Patch-Based-Inference for detection/segmentation of small objects in images.'''
 
 setup(