fix params

Koldim2001 · Koldim2001 · commit 7bbeea025f38 · 2025-01-09T12:31:34.000+03:00
diff --git a/README.md b/README.md
@@ -111,10 +111,10 @@ Class implementing cropping and passing crops through a neural network for detec
 | **Argument**          | **Type**               | **Default**  | **Description**                                                                                                |
 |-----------------------|------------------------|--------------|----------------------------------------------------------------------------------------------------------------|
 | image                 | np.ndarray             |              | 	The input image in BGR format.                                                                                               |
-| model_path            | str                    | "yolov8m.pt" | Path to the YOLO model.                                                                                        |
+| model_path            | str                    | "yolo11m.pt" | Path to the YOLO model.                                                                                        |
 | model                 | ultralytics model      | None         | Pre-initialized model object. If provided, the model will be used directly instead of loading from model_path. |
 | imgsz                 | int                    | 640          | Size of the input image for inference YOLO.                                                                    |
-| conf                  | float                  | 0.5          | Confidence threshold for detections YOLO.                                                                      |
+| conf                  | float                  | 0.25          | Confidence threshold for detections YOLO.                                                                      |
 | iou                   | float                  | 0.7          | IoU threshold for non-maximum suppression YOLOv8 of single  crop.                                              |
 | classes_list          | List[int] or None      | None         | List of classes to filter detections. If None, all classes are considered.                                     |
 | segment               | bool                   | False        | Whether to perform segmentation (if the model supports it).                                                    |
@@ -167,7 +167,7 @@ Possible arguments of the ```visualize_results``` function:
 | thickness               | int             | 4             | The thickness of bounding box and text.                                                       |
 | font                    | cv2.font        |cv2.FONT_HERSHEY_SIMPLEX | The font type for class labels.                                                     |
 | font_scale              | float           | 1.5           | The scale factor for font size.                                                               |
-| delta_colors            | int             | seed=0        | The random seed offset for color variation.                                                   |
+| delta_colors            | int             | seed=3        | The random seed offset for color variation.                                                   |
 | dpi                     | int             | 150           | Final visualization size (plot is bigger when dpi is higher).                                 |
 | random_object_colors    | bool            | False         | If true, colors for each object are selected randomly.                                        |
 | show_confidences        | bool            | False         | If true and show_class=True, confidences near class are visualized.                           |
diff --git a/patched_yolo_infer/functions_extra.py b/patched_yolo_infer/functions_extra.py
@@ -10,7 +10,7 @@ def visualize_results_usual_yolo_inference(
     img,
     model,
     imgsz=640,
-    conf=0.5,
+    conf=0.25,
     iou=0.7,
     segment=False,
     show_boxes=True,
@@ -22,7 +22,7 @@ def visualize_results_usual_yolo_inference(
     thickness=4,
     font=cv2.FONT_HERSHEY_SIMPLEX,
     font_scale=1.5,
-    delta_colors=0,
+    delta_colors=3,
     dpi=150,
     random_object_colors=False,
     show_confidences=False,
@@ -33,13 +33,13 @@ def visualize_results_usual_yolo_inference(
     inference_extra_args=None,
 ):
     """
-    Visualizes the results of usual YOLOv8 or YOLOv8-seg inference on an image
+    Visualizes the results of usual YOLO or YOLO-seg inference on an image
 
     Args:
         img (numpy.ndarray): The input image in BGR format.
         model: The object detection or segmentation model (yolov8).
         imgsz (int): The input image size for the model. Default is 640.
-        conf (float): The confidence threshold for detection. Default is 0.5.
+        conf (float): The confidence threshold for detection. Default is 0.25.
         iou (float): The intersection over union threshold for detection. Default is 0.7.
         segment (bool): Whether to perform instance segmentation. Default is False.
         show_boxes (bool): Whether to show bounding boxes. Default is True.
@@ -51,7 +51,7 @@ def visualize_results_usual_yolo_inference(
         thickness (int): The thickness of bounding box and text. Default is 4.
         font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
         font_scale (float): The scale factor for font size. Default is 1.5.
-        delta_colors (int): The random seed offset for color variation. Default is 0.
+        delta_colors (int): The random seed offset for color variation. Default is 3.
         dpi (int): Final visualization size (plot is bigger when dpi is higher).
         random_object_colors (bool): If True, colors for each object are selected randomly.
         show_confidences (bool): If True and show_class=True, confidences near class are visualized.
@@ -177,6 +177,160 @@ def visualize_results_usual_yolo_inference(
         plt.show()
 
 
+def visualize_results_yolo_pose_inference(
+    img,
+    model,
+    imgsz=640,
+    conf=0.25,
+    iou=0.7,
+    show_boxes=True,
+    show_class=True,
+    color_class_background=(0, 0, 255),
+    color_class_text=(255, 255, 255),
+    thickness=4,
+    point_radius=4,
+    connection_schema=None,
+    min_landmark_visibility=0.25,
+    font=cv2.FONT_HERSHEY_SIMPLEX,
+    font_scale=1.5,
+    delta_colors=3,
+    dpi=150,
+    random_object_colors=False,
+    show_confidences=False,
+    axis_off=True,
+    show_classes_list=[],
+    list_of_class_colors=None,
+    return_image_array=False,
+    inference_extra_args=None,
+):
+    """
+    Visualizes the results of usual YOLO-pose inference on an image
+
+    Args:
+        img (numpy.ndarray): The input image in BGR format.
+        model: The object detection or segmentation model (yolov8).
+        imgsz (int): The input image size for the model. Default is 640.
+        conf (float): The confidence threshold for detection. Default is 0.25.
+        iou (float): The intersection over union threshold for detection. Default is 0.7.
+        show_boxes (bool): Whether to show bounding boxes. Default is True.
+        show_class (bool): Whether to show class labels. Default is True.
+        color_class_background (tuple / list of tuple): The background BGR color for class labels. Default is (0, 0, 255) (red).
+        color_class_text (tuple): The text BGR color for class labels. Default is (255, 255, 255) (white).
+        thickness (int): The thickness of bounding box and text. Default is 4.
+        point_radius (int): The radius of the landmark points to be drawn on the image.   
+        connection_schema (list):  A list of tuples defining how landmarks should be connected to form a skeleton. 
+            Each tuple contains two indices representing the landmarks to be connected.
+            If None or empty, only landmarks will be drawn without any connections.
+        min_landmark_visibility (float): The minimum confidence threshold for a landmark's visibility to be drawn. 
+        font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
+        font_scale (float): The scale factor for font size. Default is 1.5.
+        delta_colors (int): The random seed offset for color variation. Default is 3.
+        dpi (int): Final visualization size (plot is bigger when dpi is higher).
+        random_object_colors (bool): If True, colors for each object are selected randomly.
+        show_confidences (bool): If True and show_class=True, confidences near class are visualized.
+        axis_off (bool): If True, axis is turned off in the final visualization.
+        show_classes_list (list): If empty, visualize all classes. Otherwise, visualize only classes in the list.
+        inference_extra_args (dict/None): Dictionary with extra ultralytics inference parameters.
+        list_of_class_colors (list/None): A list of tuples representing the colors for each class in BGR format.  
+                    If provided, these colors will be used for displaying the classes instead of random colors. 
+                    The number of tuples in the list must match the number of possible classes in the network.
+        return_image_array (bool): If True, the function returns the image bgr array instead of displaying it. 
+                                   Default is False.
+
+    Returns:
+        None/np.array
+    """
+
+    # Perform inference
+    extra_args = {} if inference_extra_args is None else inference_extra_args
+    predictions = model.predict(img, imgsz=imgsz, conf=conf, iou=iou, verbose=False, **extra_args)
+
+    labeled_image = img.copy()
+
+    if random_object_colors:
+        random.seed(int(delta_colors))
+
+    class_names = model.names
+
+    # Process each prediction
+    for pred in predictions:
+
+        # Get the bounding boxes and convert them to a list of lists
+        boxes = pred.boxes.xyxy.cpu().int().tolist()
+
+        # Get the classes and convert them to a list
+        classes = pred.boxes.cls.cpu().int().tolist()
+
+        # Get the mask confidence scores
+        confidences = pred.boxes.conf.cpu().numpy()
+
+        num_objects = len(classes)
+
+        # Visualization
+        for i in range(num_objects):
+            # Get the class for the current detection
+            class_index = int(classes[i])
+            class_name = class_names[class_index]
+
+            if show_classes_list and class_index not in show_classes_list:
+                continue
+
+            if random_object_colors:
+                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
+            elif list_of_class_colors is None:
+                # Assign color according to class
+                random.seed(int(classes[i] + delta_colors))
+                color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
+            else:
+                color = list_of_class_colors[class_index]
+
+            box = boxes[i]
+            x_min, y_min, x_max, y_max = box
+
+            # Write class label
+            if show_boxes:
+                cv2.rectangle(labeled_image, (x_min, y_min), (x_max, y_max), color, thickness)
+
+            if show_class:
+                if show_confidences:
+                    label = f'{str(class_name)} {confidences[i]:.2}'
+                else:
+                    label = str(class_name)
+                (text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, thickness)
+                background_color = (
+                    color_class_background[class_index]
+                    if isinstance(color_class_background, list)
+                    else color_class_background
+                )
+                cv2.rectangle(
+                    labeled_image,
+                    (x_min, y_min),
+                    (x_min + text_width + 5, y_min + text_height + 5),
+                    background_color,
+                    -1,
+                )
+                cv2.putText(
+                    labeled_image,
+                    label,
+                    (x_min + 5, y_min + text_height),
+                    font,
+                    font_scale,
+                    color_class_text,
+                    thickness=thickness,
+                )
+
+    if return_image_array:
+        return labeled_image
+    else:
+        # Display the final image with overlaid masks and labels
+        plt.figure(figsize=(8, 8), dpi=dpi)
+        labeled_image = cv2.cvtColor(labeled_image, cv2.COLOR_BGR2RGB)
+        plt.imshow(labeled_image)
+        if axis_off:
+            plt.axis('off')
+        plt.show()
+
+
 def get_crops(
     image_full,
     shape_x: int,
@@ -284,7 +438,7 @@ def visualize_results(
     thickness=4,
     font=cv2.FONT_HERSHEY_SIMPLEX,
     font_scale=1.5,
-    delta_colors=0,
+    delta_colors=3,
     dpi=150,
     random_object_colors=False,
     show_confidences=False,
@@ -313,7 +467,7 @@ def visualize_results(
         thickness (int): The thickness of bounding box and text. Default is 4.
         font: The font type for class labels. Default is cv2.FONT_HERSHEY_SIMPLEX.
         font_scale (float): The scale factor for font size. Default is 1.5.
-        delta_colors (int): The random seed offset for color variation. Default is 0.
+        delta_colors (int): The random seed offset for color variation. Default is 3.
         dpi (int): Final visualization size (plot is bigger when dpi is higher). Default is 150.
         random_object_colors (bool): If true, colors for each object are selected randomly. Default is False.
         show_confidences (bool): If true and show_class=True, confidences near class are visualized. Default is False.
diff --git a/patched_yolo_infer/nodes/MakeCropsDetectThem.py b/patched_yolo_infer/nodes/MakeCropsDetectThem.py
@@ -61,9 +61,9 @@ class MakeCropsDetectThem:
     def __init__(
         self,
         image: np.ndarray,
-        model_path="yolov8m.pt",
+        model_path="yolo11m.pt",
         imgsz=640,
-        conf=0.5,
+        conf=0.25,
         iou=0.7,
         classes_list=None,
         segment=False,