Merge pull request #7 from Koldim2001/fix_memory

Koldim2001 · web-flow · commit 83e53111d334 · 2024-05-20T00:03:38.000+03:00
Fix memory
diff --git a/.gitignore b/.gitignore
@@ -18,4 +18,5 @@ setup.cfg
 .pypirc
 build
 info_how_pip_upload.txt
+examples/patched_yolo_infer
 **.ipynb
diff --git a/README.md b/README.md
diff --git a/examples/example_patch_based_inference.ipynb b/examples/example_patch_based_inference.ipynb
diff --git a/patched_yolo_infer/README.md b/patched_yolo_infer/README.md
@@ -23,10 +23,11 @@ Interactive notebooks are provided to showcase the functionality of the library.
 
 __Check this Colab examples:__
                          
-YOLO-Patch-Based-Inference Example - [Open in Colab](https://colab.research.google.com/drive/1FUao91GyB-ojGRN_okUxYyfagTT9tdsP?usp=sharing)
+Patch-Based-Inference Example - [Open in Colab](https://colab.research.google.com/drive/1XCpIYLMFEmGSO0XCOkSD7CcD9SFHSJPA?usp=sharing)
 
 Example of using various functions for visualizing basic YOLOv8/v9 inference results and handling overlapping crops - [Open in Colab](https://colab.research.google.com/drive/1eM4o1e0AUQrS1mLDpcgK9HKInWEvnaMn?usp=sharing)
 
+
 ## Usage
 
 ### 1. Patch-Based-Inference
@@ -40,7 +41,7 @@ The output obtained from the process includes several attributes that can be lev
 
 3. boxes: These bounding boxes are represented as a list of lists, where each list contains four values: [x_min, y_min, x_max, y_max]. These values correspond to the coordinates of the top-left and bottom-right corners of each bounding box.
 
-4. masks: If available, this attribute provides segmentation masks corresponding to the detected objects. These masks can be used to precisely delineate object boundaries.
+4. polygons: If available, this attribute provides a list containing NumPy arrays of polygon coordinates that represent segmentation masks corresponding to the detected objects. These polygons can be utilized to accurately outline the boundaries of each object.
 
 5. classes_ids: This attribute contains the class IDs assigned to each detected object. These IDs correspond to specific object classes defined during the model training phase.
 
@@ -72,7 +73,7 @@ result = CombineDetections(element_crops, nms_threshold=0.25, match_metric='IOS'
 img=result.image
 confidences=result.filtered_confidences
 boxes=result.filtered_boxes
-masks=result.filtered_masks
+polygons=result.filtered_polygons
 classes_ids=result.filtered_classes_id
 classes_names=result.filtered_classes_names
 ```
@@ -96,6 +97,7 @@ Class implementing cropping and passing crops through a neural network for detec
 - **overlap_y** (*float*): Percentage of overlap along the y-axis.
 - **show_crops** (*bool*): Whether to visualize the cropping.
 - **resize_initial_size** (*bool*): Whether to resize the results to the original image size (ps: slow operation).
+- **memory_optimize** (*bool*): Memory optimization option for segmentation (less accurate results when enabled).
 
 **CombineDetections**
 Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum Suppression).\
@@ -105,6 +107,8 @@ Class implementing combining masks/boxes from multiple crops + NMS (Non-Maximum
 - **match_metric** (*str*): Matching metric, either 'IOU' or 'IOS'.
 - **intelligent_sorter** (*bool*): Enable sorting by area and rounded confidence parameter. If False, sorting will be done only by confidence (usual nms). (Dafault is True)
 
+
+
 ---
 ### 2. Custom inference visualization:
 Visualizes custom results of object detection or segmentation on an image.
@@ -115,6 +119,7 @@ Visualizes custom results of object detection or segmentation on an image.
 - **classes_ids** (*list*): A list of class IDs for each detection.
 - **confidences** (*list*): A list of confidence scores corresponding to each bounding box. Default is an empty list.
 - **classes_names** (*list*): A list of class names corresponding to the class IDs. Default is an empty list.
+- **polygons** (*list*): A list containing NumPy arrays of polygon coordinates that represent segmentation masks.
 - **masks** (*list*): A list of masks. Default is an empty list.
 - **segment** (*bool*): Whether to perform instance segmentation. Default is False.
 - **show_boxes** (*bool*): Whether to show bounding boxes. Default is True.
@@ -147,9 +152,41 @@ visualize_results(
     img=result.image,
     confidences=result.filtered_confidences,
     boxes=result.filtered_boxes,
-    masks=result.filtered_masks,
+    polygons=result.filtered_polygons,
     classes_ids=result.filtered_classes_id,
     classes_names=result.filtered_classes_names,
     segment=False,
 )
+```
+
+---
+
+## __HOW TO IMPROVE THE QUALITY OF THE ALGORITHM FOR THE TASK OF INSTANCE SEGMENTATION:__
+
+In this approach, all operations under the hood are performed on binary masks of recognized objects. Storing these masks consumes a lot of memory, so this method requires more RAM and slightly more processing time. However, the accuracy of recognition significantly improves, which is especially noticeable in cases where there are many objects of different sizes and they are densely packed. Therefore, we recommend using this approach in production if accuracy is important and not speed, and if your computational resources allow storing hundreds of binary masks in RAM.
+
+The difference in the approach to using the function lies in specifying the parameter ```memory_optimize=False``` in the ```MakeCropsDetectThem``` class.
+In such a case, the informative values after processing will be the following:
+
+1. img: This attribute contains the original image on which the inference was performed. It provides context for the detected objects.
+
+2. confidences: This attribute holds the confidence scores associated with each detected object. These scores indicate the model's confidence level in the accuracy of its predictions.
+
+3. boxes: These bounding boxes are represented as a list of lists, where each list contains four values: [x_min, y_min, x_max, y_max]. These values correspond to the coordinates of the top-left and bottom-right corners of each bounding box.
+
+4. masks: This attribute provides segmentation binary masks corresponding to the detected objects. These masks can be used to precisely delineate object boundaries.
+
+5. classes_ids: This attribute contains the class IDs assigned to each detected object. These IDs correspond to specific object classes defined during the model training phase.
+
+6. classes_names: These are the human-readable names corresponding to the class IDs. They provide semantic labels for the detected objects, making the results easier to interpret.
+
+
+Here's how you can obtain them:
+```python
+img=result.image
+confidences=result.filtered_confidences
+boxes=result.filtered_boxes
+masks=result.filtered_masks
+classes_ids=result.filtered_classes_id
+classes_names=result.filtered_classes_names
 ```
diff --git a/patched_yolo_infer/elements/CropElement.py b/patched_yolo_infer/elements/CropElement.py
@@ -25,14 +25,16 @@ def __init__(
         self.detected_cls = None  # List of classes of detected objects
         self.detected_xyxy = None  # List of lists containing xyxy box coordinates
         self.detected_masks = None # List of np arrays containing masks in case of yolo-seg
+        self.polygons = None # List of polygons points in case of using memory optimaze
         
         # Refined coordinates according to crop position information
         self.detected_xyxy_real = None  # List of lists containing xyxy box coordinates in values from source_image_resized or source_image
         self.detected_masks_real = None # List of np arrays containing masks in case of yolo-seg with the size of source_image_resized or source_image
+        self.detected_polygons_real = None # List of polygons points in case of using memory optimaze in values from source_image_resized or source_image
 
-    def calculate_inference(self, model, imgsz=640, conf=0.35, iou=0.7, segment=False, classes_list=None):
-        # Perform inference
+    def calculate_inference(self, model, imgsz=640, conf=0.35, iou=0.7, segment=False, classes_list=None, memory_optimize=False):
 
+        # Perform inference
         predictions = model.predict(self.crop, imgsz=imgsz, conf=conf, iou=iou, classes=classes_list, verbose=False)
 
         pred = predictions[0]
@@ -47,8 +49,13 @@ def calculate_inference(self, model, imgsz=640, conf=0.35, iou=0.7, segment=Fals
         self.detected_conf = pred.boxes.conf.cpu().numpy()
 
         if segment and len(self.detected_cls) != 0:
-            # Get the masks
-            self.detected_masks = pred.masks.data.cpu().numpy()
+            if memory_optimize:
+                # Get the polygons
+                self.polygons = [mask.astype(np.uint16) for mask in pred.masks.xy]
+            else:
+                # Get the masks
+                self.detected_masks = pred.masks.data.cpu().numpy()
+            
 
     def calculate_real_values(self):
         # Calculate real values of bboxes and masks in source_image_resized
@@ -57,6 +64,7 @@ def calculate_real_values(self):
 
         self.detected_xyxy_real = []  # List of lists with xyxy box coordinates in the values ​​of the source_image_resized
         self.detected_masks_real = []  # List of np arrays with masks in case of yolo-seg sized as source_image_resized
+        self.detected_polygons_real = [] # List of polygons in case of yolo-seg sized as source_image_resized
 
         for bbox in self.detected_xyxy:
             # Calculate real box coordinates based on the position information of the crop
@@ -81,10 +89,18 @@ def calculate_real_values(self):
                 # Append the masked image to the list of detected_masks_real
                 self.detected_masks_real.append(black_image)
 
+        if self.polygons is not None:
+            # Adjust the mask coordinates
+            for mask in self.polygons:
+                mask[:, 0] += x_start_global  # Add x_start_global to all x coordinates
+                mask[:, 1] += y_start_global  # Add y_start_global to all y coordinates
+                self.detected_polygons_real.append(mask.astype(np.uint16))
+        
     def resize_results(self):
         # from source_image_resized to source_image sizes transformation
         resized_xyxy = []
         resized_masks = []
+        resized_polygons = []
 
         for bbox in self.detected_xyxy_real:
             # Resize bbox coordinates
@@ -101,5 +117,12 @@ def resize_results(self):
                                     interpolation=cv2.INTER_NEAREST)
             resized_masks.append(mask_resized)
 
+
+        for polygon in self.detected_polygons_real:
+            polygon[:, 0] = (polygon[:, 0] * (self.source_image.shape[1] / self.source_image_resized.shape[1])).astype(np.uint16)
+            polygon[:, 1] = (polygon[:, 1] * (self.source_image.shape[0] / self.source_image_resized.shape[0])).astype(np.uint16)
+            resized_polygons.append(polygon)
+
         self.detected_xyxy_real = resized_xyxy
         self.detected_masks_real = resized_masks
+        self.detected_polygons_real = resized_polygons
diff --git a/patched_yolo_infer/functions_extra.py b/patched_yolo_infer/functions_extra.py
@@ -264,6 +264,7 @@ def visualize_results(
     confidences=[],
     classes_names=[], 
     masks=[],
+    polygons=[],
     segment=False,
     show_boxes=True,
     show_class=True,
@@ -342,7 +343,7 @@ def visualize_results(
         box = boxes[i]
         x_min, y_min, x_max, y_max = box
 
-        if segment:
+        if segment and len(masks) > 0:
             mask = masks[i]
             # Resize mask to the size of the original image using nearest neighbor interpolation
             mask_resized = cv2.resize(
@@ -354,11 +355,26 @@ def visualize_results(
             )
             
             if fill_mask:
-                color_mask = np.zeros_like(img)
-                color_mask[mask_resized > 0] = color
-                labeled_image = cv2.addWeighted(labeled_image, 1, color_mask, alpha, 0)
+                if alpha == 1:
+                    cv2.fillPoly(labeled_image, pts=mask_contours, color=color)
+                else:
+                    color_mask = np.zeros_like(img)
+                    color_mask[mask_resized > 0] = color
+                    labeled_image = cv2.addWeighted(labeled_image, 1, color_mask, alpha, 0)
 
             cv2.drawContours(labeled_image, mask_contours, -1, color, thickness)
+        
+        elif segment and len(polygons) > 0:
+            if len(polygons[i]) > 0:
+                points = np.array(polygons[i].reshape((-1, 1, 2)), dtype=np.int32)
+                cv2.drawContours(labeled_image, [points], -1, color, thickness)
+                if fill_mask:
+                    if alpha == 1:
+                        cv2.fillPoly(labeled_image, pts=[points], color=color)
+                    else:
+                        mask_from_poly = np.zeros_like(img)
+                        color_mask_from_poly = cv2.fillPoly(mask_from_poly, pts=[points], color=color)
+                        labeled_image = cv2.addWeighted(labeled_image, 1, color_mask_from_poly, alpha, 0)
 
         # Write class label
         if show_boxes:
diff --git a/patched_yolo_infer/nodes/CombineDetections.py b/patched_yolo_infer/nodes/CombineDetections.py
@@ -25,6 +25,7 @@ class CombineDetections:
         detected_conf_list_full (list): List of detected confidences.
         detected_xyxy_list_full (list): List of detected bounding boxes.
         detected_masks_list_full (list): List of detected masks.
+        detected_polygons_list_full (list): List of detected polygons when memory optimization is enabled.
         detected_cls_id_list_full (list): List of detected class IDs.
         detected_cls_names_list_full (list): List of detected class names.
         filtered_indices (list): List of indices after non-maximum suppression.
@@ -33,6 +34,7 @@ class CombineDetections:
         filtered_classes_id (list): List of class IDs after non-maximum suppression.
         filtered_classes_names (list): List of class names after non-maximum suppression.
         filtered_masks (list): List of filtered (after nms) masks if segmentation is enabled.
+        filtered_polygons (list): List of filtered (after nms) polygons if segmentation and memory optimization are enabled.
     """
 
     def __init__(
@@ -54,20 +56,21 @@ def __init__(
         self.match_metric = match_metric 
         self.intelligent_sorter = intelligent_sorter # enable sorting by area and confidence parameter
 
-        # combinate detections of all patches
+        # Combinate detections of all patches
         (
             self.detected_conf_list_full,
             self.detected_xyxy_list_full,
             self.detected_masks_list_full,
-            self.detected_cls_id_list_full
+            self.detected_cls_id_list_full,
+            self.detected_polygons_list_full
         ) = self.combinate_detections(crops=self.crops)
 
         self.detected_cls_names_list_full = [
             self.class_names[value] for value in self.detected_cls_id_list_full
         ]  # make str list
 
         # Invoke the NMS for segmentation masks method for filtering predictions
-        if len(self.detected_masks_list_full)>0:
+        if len(self.detected_masks_list_full) > 0:
 
             self.filtered_indices = self.nms(
                 self.detected_conf_list_full,
@@ -93,10 +96,17 @@ def __init__(
         self.filtered_classes_id = [self.detected_cls_id_list_full[i] for i in self.filtered_indices]
         self.filtered_classes_names = [self.detected_cls_names_list_full[i] for i in self.filtered_indices]
 
-        if element_crops.segment:
+        # Masks filtering:
+        if element_crops.segment and not element_crops.memory_optimize:
             self.filtered_masks = [self.detected_masks_list_full[i] for i in self.filtered_indices]
         else:
             self.filtered_masks = []
+        
+        # Polygons filtering:
+        if element_crops.segment and element_crops.memory_optimize:
+            self.filtered_polygons = [self.detected_polygons_list_full[i] for i in self.filtered_indices]
+        else:
+            self.filtered_polygons = []
 
     def combinate_detections(self, crops):
         """
@@ -113,14 +123,16 @@ def combinate_detections(self, crops):
         detected_xyxy = []
         detected_masks = []
         detected_cls = []
+        detected_polygons = []
 
         for crop in crops:
             detected_conf.extend(crop.detected_conf)
             detected_xyxy.extend(crop.detected_xyxy_real)
             detected_masks.extend(crop.detected_masks_real)
             detected_cls.extend(crop.detected_cls)
+            detected_polygons.extend(crop.detected_polygons_real)
 
-        return detected_conf, detected_xyxy, detected_masks, detected_cls
+        return detected_conf, detected_xyxy, detected_masks, detected_cls, detected_polygons
 
     @staticmethod
     def intersect_over_union(mask, masks_list):
diff --git a/patched_yolo_infer/nodes/MakeCropsDetectThem.py b/patched_yolo_infer/nodes/MakeCropsDetectThem.py
@@ -29,6 +29,7 @@ class MakeCropsDetectThem:
                                     image size (ps: slow operation).
         model: Pre-initialized model object. If provided, the model will be used directly 
                    instead of loading from model_path.
+        memory_optimize (bool): Memory optimization option for segmentation (less accurate results)
 
     Attributes:
         model: YOLOv8 model loaded from the specified path.
@@ -48,6 +49,7 @@ class MakeCropsDetectThem:
         resize_initial_size (bool): Whether to resize the results to the original  
                                     image size (ps: slow operation).
         class_names_dict (dict): Dictionary containing class names of the YOLO model.
+        memory_optimize (bool): Memory optimization option for segmentation (less accurate results)
     """
 
     def __init__(
@@ -60,12 +62,13 @@ def __init__(
         classes_list=None,
         segment=False,
         shape_x=700,
-        shape_y=700,
+        shape_y=600,
         overlap_x=25,
         overlap_y=25,
         show_crops=False,
         resize_initial_size=False,
         model=None,
+        memory_optimize=True
     ) -> None:
         if model is None:
             self.model = YOLO(model_path)  # Load the model from the specified path
@@ -84,6 +87,7 @@ def __init__(
         self.crops = []  # List to store the CropElement objects
         self.show_crops = show_crops  # Whether to visualize the cropping
         self.resize_initial_size = resize_initial_size  # slow operation !
+        self.memory_optimize = memory_optimize # memory opimization option for segmentation
         self.class_names_dict = self.model.names
 
         self.crops = self.get_crops_xy(
@@ -195,6 +199,7 @@ def _detect_objects(self):
                 iou=self.iou,
                 segment=self.segment,
                 classes_list=self.classes_list,
+                memory_optimize=self.memory_optimize
             )
             crop.calculate_real_values()
             if self.resize_initial_size:
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
     long_description = "\n" + fh.read()
 
 
-VERSION = '1.1.2'
+VERSION = '1.2.1'
 DESCRIPTION = '''YOLO-Patch-Based-Inference for detection/segmentation of small objects in images.'''
 
 setup(