diff --git a/.gitignore b/.gitignore index 52232f0..62e821b 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,5 @@ logs/ .vscode COLCON_IGNORE + +.DS_Store \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 2fec4d0..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "tensor_detector/yolov5-tensorrt"] - path = tensor_detector/yolov5-tensorrt - url = https://github.com/noahmr/yolov5-tensorrt.git diff --git a/riptide_mapping/config/binary_classifier.yaml b/riptide_mapping/config/binary_classifier.yaml new file mode 100644 index 0000000..b03900b --- /dev/null +++ b/riptide_mapping/config/binary_classifier.yaml @@ -0,0 +1,13 @@ +/**: + ros__parameters: + binary_classifier: + buffer_size: 500 # max detections held in the rolling buffer used for clustering + buffer_ttl_sec: 8.0 # detections older than this are dropped, so a target we've left behind stops contributing + min_cluster_detections: 6 # how many nearby detections it takes to lock an instance + cluster_radius_m: 0.15 # neighbors within this radius count as the same cluster when finding an instance + assignment_gate_m: 0.20 # once an instance is locked, a new detection farther than this from its centroid is rejected as not belonging to it + exclusion_radius_m: 0.30 # while hunting for the second instance, detections within this radius of the first are thrown out so the first can't bleed into the second + min_instance_separation_m: 0.40 # a candidate second instance must be at least this far from the first to be accepted + max_cluster_variance_m2: 0.02 # reject a cluster whose spread exceeds this. 0 disables the check + centroid_ema_alpha: 0.15 # Exponential Moving Average (EMA) rate for a locked centroid chasing new detections. lower = steadier & slower, higher = faster & noisier + use_3d_distance: false # cluster/measure distance in 3D (x,y,z) vs 2D (x,y). Keep False for top-down flat targets where z is the noisy axis \ No newline at end of file diff --git a/riptide_mapping/config/config.yaml b/riptide_mapping/config/config.yaml index 75982cf..15eeab4 100644 --- a/riptide_mapping/config/config.yaml +++ b/riptide_mapping/config/config.yaml @@ -37,7 +37,7 @@ y: 0.0 z: -0.8 yaw: 0.0 - gate_shark: + gate_rescue: parent: gate_frame covar: x: 1.0 @@ -49,7 +49,7 @@ y: 0.5 z: 0.0 yaw: 180.0 - gate_saw: + gate_repair: parent: gate_frame covar: x: 1.0 @@ -63,6 +63,7 @@ yaw: 180.0 slalom_parent: parent: map + lock_orientation_to_config: true covar: x: 1.0 y: 1.0 @@ -75,6 +76,7 @@ yaw: 180.0 slalom_front: parent: slalom_parent_frame + lock_orientation_to_config: true covar: x: 1.0 y: 1.0 @@ -87,6 +89,7 @@ yaw: 0.0 slalom_middle: parent: slalom_parent_frame + lock_orientation_to_config: true covar: x: 1.0 y: 1.0 @@ -99,6 +102,7 @@ yaw: 0.0 slalom_back: parent: slalom_parent_frame + lock_orientation_to_config: true covar: x: 1.0 y: 1.0 @@ -121,7 +125,7 @@ y: 4.0 z: -3.0 yaw: 180.0 - torpedo_shark_hole: + fire_hole_large: parent: torpedo_frame covar: x: 0.5 @@ -133,7 +137,7 @@ y: -0.029972 z: 0.217424 yaw: 0.0 - torpedo_sawfish_hole: + fire_hole_small: parent: torpedo_frame covar: x: 0.001 @@ -145,7 +149,31 @@ y: 0.11176 z: -0.0889 yaw: 0.0 - bin_target: + blood_hole_large: + parent: torpedo_frame + covar: + x: 0.5 + y: 0.5 + z: 0.5 + yaw: 0.5 + pose: + x: 0.0 + y: -0.029972 + z: 0.217424 + yaw: 0.0 + blood_hole_small: + parent: torpedo_frame + covar: + x: 0.001 + y: 0.001 + z: 0.001 + yaw: 0.001 + pose: + x: 0.0 + y: 0.11176 + z: -0.0889 + yaw: 0.0 + bin: parent: map covar: x: 0.001 @@ -153,11 +181,34 @@ z: 0.001 yaw: 0.001 pose: - x: 11.0 - y: -6.0 - z: -3.25 + x: 1.0 + y: 0.0 + z: -1.2 + yaw: 0.0 + bin_target1: + parent: bin_frame + covar: + x: 0.001 + y: 0.001 + z: 0.001 + yaw: 0.001 + pose: + x: 0.0 + y: 0.0 + z: 0.0 + yaw: 0.0 + bin_target2: + parent: bin_frame + covar: + x: 0.001 + y: 0.001 + z: 0.001 + yaw: 0.001 + pose: + x: 0.0 + y: 0.0 + z: 0.0 yaw: 0.0 - # table locations table: parent: map @@ -171,7 +222,7 @@ y: -4.0 z: -3.25 yaw: 180.0 - table_reefshark: + pill: parent: table_frame covar: x: 0.001 @@ -179,11 +230,11 @@ z: 0.001 yaw: 0.001 pose: - x: 1.0 - y: 0.0 - z: 1.0 + x: -0.25 + y: 0.25 + z: 0.0 yaw: 180.0 - table_sawfish: + bandage: parent: table_frame covar: x: 0.001 @@ -191,11 +242,11 @@ z: 0.001 yaw: 0.001 pose: - x: -1.0 - y: 0.0 - z: 1.0 - yaw: 180.0 - table_basket_pink: + x: -0.25 + y: -0.25 + z: 0.0 + yaw: 180.0 + nut_and_bolt: parent: table_frame covar: x: 0.001 @@ -203,11 +254,11 @@ z: 0.001 yaw: 0.001 pose: - x: 0.305 - y: 0.0 + x: 0.25 + y: 0.25 z: 0.0 yaw: 0.0 - table_basket_yellow: + plug: parent: table_frame covar: x: 0.001 @@ -215,11 +266,11 @@ z: 0.001 yaw: 0.001 pose: - x: -0.305 - y: 0.0 + x: 0.25 + y: -0.25 z: 0.0 - yaw: 0.0 - table_spoon_pink: + yaw: 0.0 + helmet: parent: table_frame covar: x: 0.001 @@ -227,11 +278,11 @@ z: 0.001 yaw: 0.001 pose: - x: 0.1 - y: -0.036 + x: 0.305 + y: 0.0 z: 0.0 yaw: 0.0 - table_bottle_yellow: + warning: parent: table_frame covar: x: 0.001 @@ -239,11 +290,59 @@ z: 0.001 yaw: 0.001 pose: - x: 0.25 - y: 0.25 + x: -0.305 + y: 0.0 z: 0.0 yaw: 0.0 - + compass: + parent: table_frame + covar: + x: 1.0 + y: 1.0 + z: 1.0 + yaw: 1.0 + pose: + x: 1.2 + y: 0.0 + z: 1.0 + yaw: 180.0 + hammer_and_wrench: + parent: table_frame + covar: + x: 1.0 + y: 1.0 + z: 1.0 + yaw: 1.0 + pose: + x: -1.2 + y: 0.0 + z: 1.0 + yaw: 0.0 + buoy: + parent: table_frame + covar: + x: 1.0 + y: 1.0 + z: 1.0 + yaw: 1.0 + pose: + x: 0.0 + y: 1.2 + z: 1.0 + yaw: 270.0 + sos: + parent: table_frame + covar: + x: 1.0 + y: 1.0 + z: 1.0 + yaw: 1.0 + pose: + x: 0.0 + y: -1.2 + z: 1.0 + yaw: 90.0 + prequal_gate: parent: map covar: diff --git a/riptide_mapping/config/dummy_detections.yaml b/riptide_mapping/config/dummy_detections.yaml index 659c41b..cb3427d 100644 --- a/riptide_mapping/config/dummy_detections.yaml +++ b/riptide_mapping/config/dummy_detections.yaml @@ -1,6 +1,26 @@ /**/dummydetections: ros__parameters: timer_period: 0.05 + objects: + - gate + - gate_shark + - gate_saw + - slalom_front + - slalom_middle + - slalom_back + - torpedo + - torpedo_shark_hole + - torpedo_sawfish_hole + - bin_target + - blood_1 + - blood_2 + - table + - table_reefshark + - table_sawfish + - table_basket_pink + - table_basket_yellow + - table_spoon_pink + - table_bottle_yellow detection_data: gate: pose: [2.0, 0.0, -0.8, 0.0, 0.0, 0.0] @@ -31,7 +51,7 @@ noise: 0.1 score: 0.8 downward: false - pub_invalid_orientation: false + pub_invalid_orientation: true min_dist: 0.5 max_dist: 3.0 slalom_middle: @@ -82,6 +102,24 @@ pub_invalid_orientation: false min_dist: 0.5 max_dist: 3.0 + blood_1: + class_id: blood + pose: [4.0, -3.0, -3.25, 0.0, 0.0, 0.0] + noise: 0.05 + score: 0.8 + downward: true + pub_invalid_orientation: false + min_dist: 0.5 + max_dist: 3.0 + blood_2: + class_id: blood + pose: [4.7, -3.5, -3.25, 0.0, 0.0, 0.0] + noise: 0.05 + score: 0.8 + downward: true + pub_invalid_orientation: false + min_dist: 0.5 + max_dist: 3.0 table: pose: [4.0, 8.0, -3.25, 0.0, 0.0, 0.0] noise: 0.05 diff --git a/riptide_mapping/launch/mapping.launch.py b/riptide_mapping/launch/mapping.launch.py index 28052cb..307aa73 100644 --- a/riptide_mapping/launch/mapping.launch.py +++ b/riptide_mapping/launch/mapping.launch.py @@ -2,18 +2,9 @@ import launch from ament_index_python.packages import get_package_share_directory from launch.actions import DeclareLaunchArgument, GroupAction -from launch_ros.actions import Node, PushRosNamespace, ComposableNodeContainer -from launch_ros.descriptions import ComposableNode +from launch_ros.actions import Node, PushRosNamespace from launch.substitutions import PathJoinSubstitution, LaunchConfiguration as LC -# cfg_36h11 = { -# "image_transport": "raw", -# "family": "36h11", -# "size": 0.508, -# "max_hamming": 0, -# "z_up": True -# } - config_dir = os.path.join( get_package_share_directory('riptide_mapping2'), 'config' @@ -51,6 +42,11 @@ def generate_launch_description(): default_value=[LC('config'), ".yaml"] ), + DeclareLaunchArgument( + "binary_classifier_yaml", + default_value="binary_classifier.yaml" + ), + GroupAction([ PushRosNamespace( LC("robot") @@ -69,6 +65,10 @@ def generate_launch_description(): PathJoinSubstitution([ config_dir, LC("config_yaml") + ]), + PathJoinSubstitution([ + config_dir, + LC("binary_classifier_yaml") ]) ] ), @@ -104,36 +104,5 @@ def generate_launch_description(): ] ), - # ComposableNodeContainer( - # name='tag_container', - # namespace="apriltag", - # package='rclcpp_components', - # executable='component_container', - # composable_node_descriptions=[ - # ComposableNode( - # name='apriltag_36h11', - # package='apriltag_ros', plugin='AprilTagNode', - # remappings=[ - # # This maps the 'raw' images for simplicity of demonstration. - # # In practice, this will have to be the rectified 'rect' images. - # ("image_rect", - # "ffc/zed_node/left/image_rect_color"), - # ("camera_info", - # "ffc/zed_node/left/camera_info"), - # ], - # parameters=[cfg_36h11], - # extra_arguments=[{'use_intra_process_comms': True}], - # ) - # ], - # output='screen' - # ), - - # Node( - # package='tf2_ros', - # executable='static_transform_publisher', - # name='surface_frame_node', - # arguments=["0", "0.4572", "0", "0", "-1.5707", "-1.5707", - # "tag36h11:0", "estimated_origin_frame"] - # ) ], scoped=True) ]) diff --git a/riptide_mapping/riptide_mapping2/binary_classifier.py b/riptide_mapping/riptide_mapping2/binary_classifier.py new file mode 100644 index 0000000..5d9a8c3 --- /dev/null +++ b/riptide_mapping/riptide_mapping2/binary_classifier.py @@ -0,0 +1,445 @@ +from collections import deque +from dataclasses import dataclass, field +import numpy + +# fraction of farthest cluster points dropped before averaging the centroid +# not a param because we really shouldn't need to tweak this (would likely do more harm than good) +CLUSTER_TRIM_QUANTILE = 0.90 + +# State machine for binary classifier +# stopped -> aquire_first -> track_first -> [frozen] -> aquire_second -> track_second -> stopped +# frozen is optional and can be skipped to go from track_first -> aquire_second directly +# stop() returns to stopped from any running state +class BinaryClassifierState: + STOPPED = "stopped" # not running + ACQUIRE_FIRST = "acquire_first" # trying to find the first cluster to track + TRACK_FIRST = "track_first" # tracking the first cluster + FROZEN = "frozen" # buffer is frozen, first is still tracking + ACQUIRE_SECOND = "acquire_second" # trying to find the second cluster + TRACK_SECOND = "track_second" # tracking the second cluster + +# Defaults, overridden by the launch yaml +DEFAULTS = { + "buffer_size": 500, # max detections held in the rolling buffer used for clustering + "buffer_ttl_sec": 15.0, # detections older than this are dropped, so a target we've left behind stops contributing + "min_cluster_detections": 6, # how many nearby detections it takes to lock an instance + "cluster_radius_m": 0.15, # neighbors within this radius count as the same cluster when finding an instance + "assignment_gate_m": 0.20, # once an instance is locked, a new detection farther than this from its centroid is rejected as not belonging to it + "exclusion_radius_m": 0.30, # while hunting for the second instance, detections within this radius of the first are thrown out so the first can't bleed into the second + "min_instance_separation_m": 0.40, # a candidate second instance must be at least this far from the first to be accepted + "max_cluster_variance_m2": 0.02, # reject a cluster whose spread exceeds this. 0 disables the check + "centroid_ema_alpha": 0.15, # Exponential Moving Average (EMA) rate for a locked centroid chasing new detections. lower = steadier & slower, higher = faster & noisier + "use_3d_distance": False, # cluster/measure distance in 3D (x,y,z) vs 2D (x,y). Keep False for top-down flat targets where z is the noisy axis +} + +@dataclass +class BinaryClassifierParams: + buffer_size: int = DEFAULTS["buffer_size"] + buffer_ttl_sec: float = DEFAULTS["buffer_ttl_sec"] + min_cluster_detections: int = DEFAULTS["min_cluster_detections"] + cluster_radius_m: float = DEFAULTS["cluster_radius_m"] + assignment_gate_m: float = DEFAULTS["assignment_gate_m"] + exclusion_radius_m: float = DEFAULTS["exclusion_radius_m"] + min_instance_separation_m: float = DEFAULTS["min_instance_separation_m"] + max_cluster_variance_m2: float = DEFAULTS["max_cluster_variance_m2"] + centroid_ema_alpha: float = DEFAULTS["centroid_ema_alpha"] + use_3d_distance: bool = DEFAULTS["use_3d_distance"] + +@dataclass +class DetectionSample: + x: float + y: float + z: float + score: float + stamp_sec: float + + def point(self): + return numpy.array([self.x, self.y, self.z], dtype=numpy.float64) + +@dataclass +class AssignmentResult: + accepted: bool = False + target_name: str = "" + reason: str = "" + distance_m: float = None + +class BinaryClassifier: + def __init__(self, node=None, param_namespace="binary_classifier"): + self.node = node + self.param_namespace = param_namespace + + self.params = BinaryClassifierParams() + self.samples = deque(maxlen=max(1, self.params.buffer_size)) + + self.state = BinaryClassifierState.STOPPED + self.class_name = "" + self.instance1_name = "" + self.instance2_name = "" + + self.instance1_centroid = None + self.instance2_centroid = None + + self.declare_params() + self.update_params() + + @property + def running(self): + return self.state != BinaryClassifierState.STOPPED + + @property + def first_locked(self): + return self.instance1_centroid is not None + + @property + def second_locked(self): + return self.instance2_centroid is not None + + @property + def buffer_frozen(self): + return self.state == BinaryClassifierState.FROZEN + + def param_name(self, name): + return "{}.{}".format(self.param_namespace, name) + + def declare_params(self): + if self.node is None: + return + # defaults come from the shared DEFAULTS dict; the launch-provided yaml overrides them + self.node.declare_parameters( + namespace="", + parameters=[(self.param_name(name), value) for name, value in DEFAULTS.items()], + ) + + def update_params(self): + if self.node is None: + return + + self.params = BinaryClassifierParams( + buffer_size=int(self.node.get_parameter(self.param_name("buffer_size")).value), + buffer_ttl_sec=float(self.node.get_parameter(self.param_name("buffer_ttl_sec")).value), + min_cluster_detections=int(self.node.get_parameter(self.param_name("min_cluster_detections")).value), + cluster_radius_m=float(self.node.get_parameter(self.param_name("cluster_radius_m")).value), + assignment_gate_m=float(self.node.get_parameter(self.param_name("assignment_gate_m")).value), + exclusion_radius_m=float(self.node.get_parameter(self.param_name("exclusion_radius_m")).value), + min_instance_separation_m=float(self.node.get_parameter(self.param_name("min_instance_separation_m")).value), + max_cluster_variance_m2=float(self.node.get_parameter(self.param_name("max_cluster_variance_m2")).value), + centroid_ema_alpha=float(self.node.get_parameter(self.param_name("centroid_ema_alpha")).value), + use_3d_distance=bool(self.node.get_parameter(self.param_name("use_3d_distance")).value), + ) + + # re-wrap the existing samples in case buffer_size changed + self.samples = deque(self.samples, maxlen=max(1, self.params.buffer_size)) + + def start(self, class_name, instance1_name, instance2_name): + self.update_params() + + class_name = class_name.strip() + instance1_name = instance1_name.strip() + instance2_name = instance2_name.strip() + + if self.running: + return False, "BinaryClassifier is already running" + + if class_name == "": + return False, "class_name cannot be empty" + + if instance1_name == "" or instance2_name == "": + return False, "instance names cannot be empty" + + if instance1_name == instance2_name: + return False, "instance names must be different" + + self.samples.clear() + + self.class_name = class_name + self.instance1_name = instance1_name + self.instance2_name = instance2_name + + self.instance1_centroid = None + self.instance2_centroid = None + self.state = BinaryClassifierState.ACQUIRE_FIRST + + return True, "BinaryClassifier started for class {}".format(class_name) + + def freeze_buffer(self): + # Freeze the buffer while autonomy goes off to do the first task + # Nothing is added or pruned, so the second instance detections we already gathered survive even with a short TTL + # Only valid once the first instance is locked + if not self.running: + return False, "BinaryClassifier is not running" + + if self.state == BinaryClassifierState.FROZEN: + return True, "BinaryClassifier buffer already frozen" + + if self.state != BinaryClassifierState.TRACK_FIRST: + return False, "Can only lock the buffer while tracking the first instance" + + self.state = BinaryClassifierState.FROZEN + return True, "BinaryClassifier buffer frozen" + + def start_second(self): + self.update_params() + + if not self.running: + return False, "BinaryClassifier is not running" + + # autonomy decides when to go looking for the second instance, but the first has to be locked first + # (i.e. instance1_centroid has to exist) + if not self.first_locked: + return False, "Cannot start second instance before first instance is locked" + + if self.state == BinaryClassifierState.ACQUIRE_SECOND or self.state == BinaryClassifierState.TRACK_SECOND: + return True, "BinaryClassifier is already working on second instance" + + # leaving FROZEN (or TRACK_FIRST) unfreezes the buffer. Dump what we preserved to + # try to lock instance 2 from history, don't wait for a fresh detection + self.state = BinaryClassifierState.ACQUIRE_SECOND + + candidate = self.find_best_cluster(self.samples_outside_first_instance()) + if candidate is not None: + separation = self.distance(candidate["centroid"], self.instance1_centroid) + if separation >= self.params.min_instance_separation_m: + self.instance2_centroid = candidate["centroid"] + self.state = BinaryClassifierState.TRACK_SECOND + + return True, "BinaryClassifier switched to second instance" + + def stop(self): + if not self.running: + return False, "BinaryClassifier is not running" + + self.samples.clear() + + self.state = BinaryClassifierState.STOPPED + self.class_name = "" + self.instance1_name = "" + self.instance2_name = "" + + self.instance1_centroid = None + self.instance2_centroid = None + + return True, "BinaryClassifier stopped" + + def observe(self, sample, now_sec): + if not self.running: + return AssignmentResult(False, "", "classifier stopped") + + # If FROZEN, don't append/prune, so the detections gathered before autonomy executes the first task survive regardless of TTL. + # Instance 1 still tracks because its centroid is a field, not a buffer entry + if self.state == BinaryClassifierState.FROZEN: + return self.observe_track_first(sample) + + # prune on both sides of the append so a fresh sample can't keep stale ones alive, and the new sample itself is held to the same TTL + self.prune(now_sec) + self.samples.append(sample) + self.prune(now_sec) + + if self.state == BinaryClassifierState.ACQUIRE_FIRST: + return self.observe_acquire_first(sample) + + if self.state == BinaryClassifierState.TRACK_FIRST: + return self.observe_track_first(sample) + + if self.state == BinaryClassifierState.ACQUIRE_SECOND: + return self.observe_acquire_second(sample) + + if self.state == BinaryClassifierState.TRACK_SECOND: + return self.observe_track_second(sample) + + return AssignmentResult(False, "", "invalid classifier state") + + def observe_acquire_first(self, sample): + # try to lock instance 1 from the buffer, once locked we fall through and start assigning + if self.instance1_centroid is None: + cluster = self.find_best_cluster(list(self.samples)) + if cluster is not None: + self.instance1_centroid = cluster["centroid"] + self.state = BinaryClassifierState.TRACK_FIRST + + if self.instance1_centroid is None: + return AssignmentResult(False, "", "acquiring first instance") + + return self.assign_to_locked_instance( + sample, + self.instance1_centroid, + self.instance1_name, + 1, + "first instance locked, sample outside gate", + ) + + def observe_track_first(self, sample): + return self.assign_to_locked_instance( + sample, + self.instance1_centroid, + self.instance1_name, + 1, + "tracking first instance, sample outside gate", + ) + + def observe_acquire_second(self, sample): + # reject anything sitting on top of instance 1 so it can't leak into instance 2 + if self.instance1_centroid is not None: + dist_to_first = self.distance(sample.point(), self.instance1_centroid) + if dist_to_first < self.params.exclusion_radius_m: + return AssignmentResult(False, "", "sample inside first instance exclusion radius", dist_to_first) + + if self.instance2_centroid is None: + candidate_samples = self.samples_outside_first_instance() + cluster = self.find_best_cluster(candidate_samples) + + if cluster is not None: + # a valid second instance has to be physically far enough from the first + separation = self.distance(cluster["centroid"], self.instance1_centroid) + if separation < self.params.min_instance_separation_m: + return AssignmentResult(False, "", "second candidate too close to first instance", separation) + + self.instance2_centroid = cluster["centroid"] + self.state = BinaryClassifierState.TRACK_SECOND + + if self.instance2_centroid is None: + return AssignmentResult(False, "", "acquiring second instance") + + return self.assign_to_locked_instance( + sample, + self.instance2_centroid, + self.instance2_name, + 2, + "second instance locked, sample outside gate", + ) + + def observe_track_second(self, sample): + # keep excluding instance 1's neighborhood even while tracking the second + if self.instance1_centroid is not None: + dist_to_first = self.distance(sample.point(), self.instance1_centroid) + if dist_to_first < self.params.exclusion_radius_m: + return AssignmentResult(False, "", "sample inside first instance exclusion radius", dist_to_first) + + return self.assign_to_locked_instance( + sample, + self.instance2_centroid, + self.instance2_name, + 2, + "tracking second instance, sample outside gate", + ) + + def assign_to_locked_instance(self, sample, centroid, target_name, instance_num, reject_reason): + if centroid is None: + return AssignmentResult(False, "", "instance is not locked") + + dist = self.distance(sample.point(), centroid) + + # too far from the locked centroid -> not this instance + if dist > self.params.assignment_gate_m: + return AssignmentResult(False, "", reject_reason, dist) + + self.update_centroid(instance_num, sample.point()) + return AssignmentResult(True, target_name, "assigned", dist) + + def update_centroid(self, instance_num, point): + # slow EMA so a single noisy detection can't yank the lock around + alpha = self.params.centroid_ema_alpha + + if instance_num == 1 and self.instance1_centroid is not None: + self.instance1_centroid = (1.0 - alpha) * self.instance1_centroid + alpha * point + + if instance_num == 2 and self.instance2_centroid is not None: + self.instance2_centroid = (1.0 - alpha) * self.instance2_centroid + alpha * point + + def samples_outside_first_instance(self): + if self.instance1_centroid is None: + return list(self.samples) + + out = [] + + for sample in self.samples: + dist = self.distance(sample.point(), self.instance1_centroid) + + # only keep samples clearly away from instance 1 as candidates for instance 2 + if dist >= self.params.exclusion_radius_m and dist >= self.params.min_instance_separation_m: + out.append(sample) + + return out + + def find_best_cluster(self, samples): + min_count = max(1, self.params.min_cluster_detections) + + if len(samples) < min_count: + return None + + # cluster in the metric space (2D by default), but keep full 3D points for the final centroid + metric_points = numpy.array([self.metric_point(sample.point()) for sample in samples], dtype=numpy.float64) + full_points = numpy.array([sample.point() for sample in samples], dtype=numpy.float64) + + # greedy mode-seek: whichever sample has the most neighbors within cluster_radius wins + best_indices = None + best_count = 0 + + for i in range(metric_points.shape[0]): + dists = numpy.linalg.norm(metric_points - metric_points[i], axis=1) + indices = numpy.where(dists <= self.params.cluster_radius_m)[0] + + if len(indices) > best_count: + best_count = len(indices) + best_indices = indices + + if best_indices is None or best_count < min_count: + return None + + cluster_points = full_points[best_indices] + + # drop the farthest CLUSTER_TRIM_QUANTILE tail (vs the median center) before averaging so stragglers don't pull the centroid + center = numpy.nanmedian(cluster_points, axis=0) + dists_to_center = numpy.array([self.distance(point, center) for point in cluster_points], dtype=numpy.float64) + + # only trim if the cluster has a couple samples to spare above the minimum + if len(dists_to_center) >= min_count + 2: + cutoff = numpy.nanquantile(dists_to_center, CLUSTER_TRIM_QUANTILE) + trimmed = cluster_points[dists_to_center <= cutoff] + + if len(trimmed) >= min_count: + cluster_points = trimmed + + centroid = numpy.nanmean(cluster_points, axis=0) + + metric_cluster_points = numpy.array([self.metric_point(point) for point in cluster_points], dtype=numpy.float64) + metric_centroid = self.metric_point(centroid) + + # reject a cluster that's too spread out to be a single object + variance_m2 = float(numpy.nanmean(numpy.sum((metric_cluster_points - metric_centroid) ** 2, axis=1))) + + if self.params.max_cluster_variance_m2 > 0.0 and variance_m2 > self.params.max_cluster_variance_m2: + return None + + return { + "centroid": centroid, + "count": len(cluster_points), + "variance_m2": variance_m2, + } + + def age_buffer(self, now_sec): + # Time-driven TTL eviction + if not self.running or self.state == BinaryClassifierState.FROZEN: + return + self.prune(now_sec) + + def prune(self, now_sec): + # never called while FROZEN (age_buffer/observe gates it) + + if self.params.buffer_ttl_sec <= 0.0: + return + + # drop samples older than the TTL so old views of a bin don't linger after we've moved on + self.samples = deque( + [sample for sample in self.samples if now_sec - sample.stamp_sec <= self.params.buffer_ttl_sec], + maxlen=max(1, self.params.buffer_size), + ) + + def distance(self, point_a, point_b): + return float(numpy.linalg.norm(self.metric_point(point_a) - self.metric_point(point_b))) + + def metric_point(self, point): + # default to 2D: for top-down flat targets the z/depth axis is the noisiest, so leave it out of distances + if self.params.use_3d_distance: + return point[:3] + + return point[:2] \ No newline at end of file diff --git a/riptide_mapping/riptide_mapping2/dummydetections.py b/riptide_mapping/riptide_mapping2/dummydetections.py index cae06d1..dc20f2d 100644 --- a/riptide_mapping/riptide_mapping2/dummydetections.py +++ b/riptide_mapping/riptide_mapping2/dummydetections.py @@ -13,6 +13,7 @@ from rcl_interfaces.msg import SetParametersResult from geometry_msgs.msg import PoseWithCovariance, PoseWithCovarianceStamped, Pose, Quaternion from rclpy.node import Node +from rclpy.parameter import Parameter from std_msgs.msg import Header from std_srvs.srv import SetBool from tf2_ros import Buffer, TransformException, TransformListener @@ -25,18 +26,6 @@ TOPIC_NAME = "detected_objects" CAMERA_ROTATION = tf3d.euler.euler2quat(-1.5707, 0, -1.5707) # makes orientations agree with camera -objects = [ - "gate", - "gate_shark", - "gate_saw", - "slalom_front", - "slalom_middle", - "slalom_back", - "torpedo", - "torpedo_shark_hole", - "torpedo_sawfish_hole", - "bin_target", -] config = {} @@ -63,7 +52,7 @@ def __init__(self): self.pubs = [ ] self.srv = self.create_service(SetBool, 'set_camera_is_dfc', self.setActiveCameraCb) - for object in objects: + for object in self.objects: self.pubs.append(self.create_publisher(PoseWithCovarianceStamped, f"dummydetections/{object}", 10)) self.smoothed_slalom_dist = 3.0 @@ -73,6 +62,9 @@ def __init__(self): def declareParams(self): + self.declare_parameter("objects", Parameter.Type.STRING_ARRAY) + self.objects = list(self.get_parameter("objects").value) + self.declare_parameter("timer_period", 0.0) self.declare_parameter("simulate_pool", False) self.declare_parameter("forward_camera_hfov", 60) @@ -84,16 +76,18 @@ def declareParams(self): self.declare_parameter("downward_camera_frame", "downward_link") self.declare_parameter("downward_camera_pub_frame", "downward_link") - for object in objects: + for object in self.objects: + self.declare_parameter(f"detection_data.{object}.class_id", object) self.declare_parameter(f"detection_data.{object}.pose", [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]) self.declare_parameter(f"detection_data.{object}.noise", 0.0) self.declare_parameter(f"detection_data.{object}.score", 0.0) self.declare_parameter(f"detection_data.{object}.downward", False) self.declare_parameter(f"detection_data.{object}.publish_invalid_orientation", False) + self.declare_parameter(f"detection_data.{object}.pub_invalid_orientation", False) self.declare_parameter(f"detection_data.{object}.min_dist", 0.0) self.declare_parameter(f"detection_data.{object}.max_dist", 0.0) - - + + #TODO: UPDATE ALL THE OTHER NON OBJECT PARAMS LIKE SIMULATE_POOL def updateParams(self, params): for param in params: #set timer to new rate if there is a new rate to set @@ -214,14 +208,17 @@ def timerCB(self): forwardsDetectArray.header = fwdHeader downwardsDetectArray.header = dwdHeader - for i in range(0, len(objects)): - objectName = objects[i] + for i in range(0, len(self.objects)): + objectName = self.objects[i] self.get_logger().debug(f"Processing dummy detection for {objectName}") poseArr = self.get_parameter(f"detection_data.{objectName}.pose").value # this pose is in map frame noise = self.get_parameter(f"detection_data.{objectName}.noise").value score = self.get_parameter(f"detection_data.{objectName}.score").value - publishInvalid = self.get_parameter(f"detection_data.{objectName}.publish_invalid_orientation").value + # This is funny so I kept it + publishInvalid = self.get_parameter(f"detection_data.{objectName}.publish_invalid_orientation").value or \ + self.get_parameter(f"detection_data.{objectName}.pub_invalid_orientation").value + classId = self.get_parameter(f"detection_data.{objectName}.class_id").value self.get_logger().debug(f"Object name: {objectName}") @@ -303,7 +300,7 @@ def timerCB(self): # hypothesis = ObjectHypothesisWithPose() - hypothesis.hypothesis.class_id = objectName + hypothesis.hypothesis.class_id = classId hypothesis.hypothesis.score = score hypothesis.pose.pose = framePose diff --git a/riptide_mapping/riptide_mapping2/mapping.py b/riptide_mapping/riptide_mapping2/mapping.py index 9af0194..ad841ce 100755 --- a/riptide_mapping/riptide_mapping2/mapping.py +++ b/riptide_mapping/riptide_mapping2/mapping.py @@ -7,10 +7,11 @@ from rclpy.time import Time from std_msgs.msg import Header, Int8 -from geometry_msgs.msg import PoseWithCovariance, PoseWithCovarianceStamped, Pose, Vector3, Point +from std_srvs.srv import Trigger +from geometry_msgs.msg import PoseWithCovariance, PoseWithCovarianceStamped, Pose, Vector3, Point, PoseStamped from vision_msgs.msg import Detection3DArray, ObjectHypothesisWithPose from tf2_geometry_msgs import do_transform_pose_stamped -from riptide_msgs2.srv import MappingTarget +from riptide_msgs2.srv import MappingTarget, StartBinaryClassifier from riptide_msgs2.msg import MappingTargetInfo, LedCommand import tf2_ros @@ -19,6 +20,7 @@ from transforms3d.euler import euler2quat from location import Location +from binary_classifier import BinaryClassifier, DetectionSample from tf2_msgs.msg import TFMessage import math @@ -37,10 +39,11 @@ def callback(self, data: TFMessage) -> None: class OutstandingDetectionInfo: - def __init__(self, det_result: ObjectHypothesisWithPose, det_header: Header, closest_object: str): + def __init__(self, det_result: ObjectHypothesisWithPose, det_header: Header, closest_object: str, binary_classifier_detection=False): self.det_result = det_result self.det_header = det_header self.closest_object = closest_object + self.binary_classifier_detection = binary_classifier_detection # Instead of updating the location for individual objects we apply a global offset to account for robot drift as we @@ -53,33 +56,64 @@ def __init__(self): super().__init__('riptide_mapping2') self.objects = { + # Gate "gate": dict(), - "gate_shark": dict(), - "gate_saw": dict(), + "gate_repair": dict(), + "gate_rescue": dict(), + + # Slalom (left the same for autonomy compatability) "slalom_parent": dict(), "slalom_front": dict(), "slalom_middle": dict(), "slalom_back": dict(), + + # Torpedo "torpedo": dict(), - "torpedo_shark_hole": dict(), - "torpedo_sawfish_hole": dict(), - "bin_target": dict(), + "fire_hole_large": dict(), + "fire_hole_small": dict(), + "blood_hole_large": dict(), + "blood_hole_small": dict(), + + # Bin + "fire": dict(), # only here to match the class name for autonomy safety + "blood": dict(), # only here to match the class name for autonomy safety + "bin": dict(), + "bin_target1": dict(), + "bin_target2": dict(), + + # Octagon + "compass": dict(), + "hammer_and_wrench": dict(), + "buoy": dict(), + "sos": dict(), + + # Table "table": dict(), - "table_reefshark": dict(), - "table_sawfish": dict(), - "table_basket_pink": dict(), - "table_basket_yellow": dict(), - "table_spoon_pink": dict(), - "table_bottle_yellow": dict(), + "pill": dict(), + "plug": dict(), + "nut_and_bolt": dict(), + "bandage": dict(), + "helmet": dict(), + "warning": dict(), + + # Prequal "prequal_gate": dict(), "prequal_pole": dict(), } - + self.downwards_objects = { + # Bin + "bin": dict(), + "bin_target1": dict(), + "bin_target2": dict(), + + # Table + "pill": dict(), + "plug": dict(), "nut_and_bolt": dict(), "bandage": dict(), - "fire": dict(), - "blood": dict(), + "helmet": dict(), + "warning": dict(), } self.outstanding_detections: list[OutstandingDetectionInfo] = [] @@ -97,7 +131,8 @@ def __init__(self): ('init_data.{}.covar.x'.format(object), 1.0), ('init_data.{}.covar.y'.format(object), 1.0), ('init_data.{}.covar.z'.format(object), 1.0), - ('init_data.{}.covar.yaw'.format(object), 1.0) + ('init_data.{}.covar.yaw'.format(object), 1.0), + ('init_data.{}.lock_orientation_to_config'.format(object), False), ] ) @@ -122,18 +157,82 @@ def __init__(self): self.target_object = "" self.lock_map = False self.offset = Location(Point(), Vector3(), int(self.get_parameter("buffer_size").value), tuple(self.get_parameter("quantile").value)) + self.binary_classifier = BinaryClassifier(self) + + # Store seeds (which move tf and reset cov once we enter tracking) + self.instance1_seeded = False + self.instance2_seeded = False self.add_on_set_parameters_callback(self.param_callback) self.create_subscription(Detection3DArray, "detected_objects".format(self.get_namespace()), self.vision_callback, qos_profile_system_default) self.status_pub = self.create_publisher(MappingTargetInfo, "state/mapping", qos_profile_system_default) - self.create_service(MappingTarget, "mapping_target", self.target_callback) + self.create_service(MappingTarget, "mapping_target", self.target_callback) # Should prob be mapping ns but not changing for compatability for now + self.create_service(Trigger, "mapping/reset_mapping", self.reset_mapping_callback) + + # binary classifier services (resolve under this nodes ns) + self.create_service(StartBinaryClassifier, "mapping/start_binary_classifier", self.start_binary_classifier_callback) + self.create_service(Trigger, "mapping/freeze_binary_classifier_buffer", self.freeze_binary_classifier_buffer_callback) + self.create_service(Trigger, "mapping/start_binary_classifier_second", self.start_binary_classifier_second_callback) + self.create_service(Trigger, "mapping/stop_binary_classifier", self.stop_binary_classifier_callback) + self.led_pulse_pub = self.create_publisher(LedCommand, "command/led", qos_profile_system_default) self.last_pub_time = Time() self.publish_pose() self.publish_timer = self.create_timer(0.125, self.update_oustanding_items) - + def reset_runtime_state(self): + # Reset all configured map objects back to init_data params. + # This also clears each Location's internal sample buffer because create_location() constructs a new Location object + for object_name in self.objects.keys(): + self.create_location(object_name) + + # Reset global map drift/offset buffer + self.offset = Location( + Point(), + Vector3(), + int(self.get_parameter("buffer_size").value), + tuple(self.get_parameter("quantile").value), + ) + + # Drop queued detections waiting on TF + self.outstanding_detections.clear() + + # Stop/clear binary classifier state + try: + self.binary_classifier.stop() + except Exception as ex: + self.get_logger().warning(f"Binary classifier stop during reset failed: {ex}") + + # Reset mapping mode + self.target_object = "" + self.lock_map = False + self.instance1_seeded = False + self.instance2_seeded = False + + # Immediately publish reset topics/TF/status + self.publish_pose() + + def reset_mapping_callback(self, request: Trigger.Request, response: Trigger.Response): + self.get_logger().info("Resetting mapping runtime state to init_data") + self.reset_runtime_state() + + response.success = True + response.message = "Mapping reset to init_data" + return response + + def pulse_detection_led(self, red=0, green=255, blue=0): + # Pulse LEDs to indicate a detection was received + ledPulse = LedCommand() + ledPulse.target = LedCommand.TARGET_ALU + ledPulse.mode = LedCommand.SINGLETON_FLASH + + ledPulse.red = red + ledPulse.green = green + ledPulse.blue = blue + + self.led_pulse_pub.publish(ledPulse) + def create_location(self, object: str): #create the Location object using two vector3s describing coordinates and euler rotation xyz = Point() @@ -181,14 +280,163 @@ def target_callback(self, request: MappingTarget.Request, response: MappingTarge return response + def start_binary_classifier_callback(self, request: StartBinaryClassifier.Request, response: StartBinaryClassifier.Response): + class_name = str(request.class_name).strip() + target1 = str(request.object1_name).strip() + target2 = str(request.object2_name).strip() + + if self.binary_classifier.running: + response.success = False + response.message = "BinaryClassifier is already running" + return response + + if class_name == "": + response.success = False + response.message = "class_name cannot be empty" + return response + + if class_name not in self.objects.keys() and class_name not in self.downwards_objects.keys(): + response.success = False + response.message = f"Unknown class_name {class_name}" + return response + + # both instance targets are caller-supplied; they must be real mapping objects we can publish/seed + if target1 not in self.objects.keys() or target2 not in self.objects.keys(): + response.success = False + response.message = f"instance targets must exist in mapping objects (got '{target1}', '{target2}')" + return response + + success, message = self.binary_classifier.start( + class_name, + target1, + target2, + ) + + if not success: + response.success = False + response.message = message + return response + + self.objects[target1]["location"].reset() + self.objects[target2]["location"].reset() + + self.instance1_seeded = False + self.instance2_seeded = False + + self.target_object = target1 + self.lock_map = False + self.offset.cool_buffer() + self.outstanding_detections.clear() + + response.success = True + response.message = message + self.get_logger().info(message) + return response + + def freeze_binary_classifier_buffer_callback(self, request: Trigger.Request, response: Trigger.Response): + # Freeze the classifier buffer while autonomy goes to do the first task, so the + # second-instance detections already gathered survive the short TTL. Unfrozen by start_second. + success, message = self.binary_classifier.freeze_buffer() + + if success: + self.get_logger().info(message) + else: + self.get_logger().warning(message) + + response.success = success + response.message = message + return response + + def start_binary_classifier_second_callback(self, request: Trigger.Request, response: Trigger.Response): + success, message = self.binary_classifier.start_second() + + if success: + # If start_second locked instance 2 from the (preserved) buffer, publish it now as a TF for autonomy to navigate to, + # but high covariance so it isn't treated as confirmed. + if self.binary_classifier.second_locked: + self.seed_object_estimate(self.binary_classifier.instance2_name, self.binary_classifier.instance2_centroid) + self.instance2_seeded = True + self.publish_pose() + else: + self.get_logger().info("start_second: no buffered cluster yet, will lock on live detection") + + self.get_logger().info(message) + else: + self.get_logger().warning(message) + + response.success = success + response.message = message + return response + + def stop_binary_classifier_callback(self, request: Trigger.Request, response: Trigger.Response): + success, message = self.binary_classifier.stop() + + if success: + self.target_object = "" + self.outstanding_detections.clear() + self.get_logger().info(message) + else: + self.get_logger().warning(message) + + response.success = success + response.message = message + return response + + def maybe_seed_binary_instances(self): + # Fire once on each aquire -> track transition + # Rebuild the target's Location centered on the classifier centroid so the published pose snaps to the cluster instead of crawling + # from the reckoned init pose (needs to only fire once, or cov would never drop) + bc = self.binary_classifier + + if bc.first_locked and not self.instance1_seeded: + self.seed_object_estimate(bc.instance1_name, bc.instance1_centroid) + self.instance1_seeded = True + + if bc.second_locked and not self.instance2_seeded: + self.seed_object_estimate(bc.instance2_name, bc.instance2_centroid) + self.instance2_seeded = True + + def seed_object_estimate(self, child: str, centroid_map): + # Seed a map-parented object's pose so both its Location and TF frame land on the same centroid. + # Rebuilt Location stays soft/unwarmed (cov=1.0) until its buffer fills. + if child not in self.objects.keys(): + self.get_logger().error(f"seed_object_estimate: unknown object {child}") + return + + offset_pos = self.offset.get_pose().pose.position + + cx = float(centroid_map[0]) + cy = float(centroid_map[1]) + cz = float(centroid_map[2]) + + # rebuild Location centered on the centroid -> topic position = centroid, cov = soft 1.0 + xyz = Point(x=cx, y=cy, z=cz) + rpy = Vector3() # no orientation from the classifier, leave identity + self.objects[child]["location"] = Location( + xyz, rpy, + int(self.get_parameter("buffer_size").value), + tuple(self.get_parameter("quantile").value), + ) + + # TF init_pose cancels the shared offset so map -> offset -> child lands on the centroid + init_pose = Pose() + init_pose.position.x = cx - offset_pos.x + init_pose.position.y = cy - offset_pos.y + init_pose.position.z = cz - offset_pos.z + init_pose.orientation.w = 1.0 + self.objects[child]["init_pose"] = init_pose + def vision_callback(self, detections: Detection3DArray): if self.lock_map: return - closest_object = self.closest_object(detections) + # Bypass normal mapping behavior for binary classifier + if self.binary_classifier.running: + self.handle_binary_classifier_detections(detections) + self.publish_pose() + return - # for det in detections.detections: - # self.get_logger().info(f"slalomi: {det}") + closest_object = self.closest_object(detections) # if no target object set, use closest closest_or_target = closest_object if self.target_object == "" else self.target_object @@ -215,12 +463,7 @@ def vision_callback(self, detections: Detection3DArray): self.get_logger().info(f"Rejecting detection of {result.hypothesis.class_id} because confidence {result.hypothesis.score} is too low") continue - # Pulse LEDs to indicate a detection was received - ledPulse = LedCommand() - ledPulse.target = LedCommand.TARGET_ALU - ledPulse.mode = LedCommand.SINGLETON_FLASH - ledPulse.green = 255 - self.led_pulse_pub.publish(ledPulse) + self.pulse_detection_led() # update pose of object in map update_success, _ = self.try_update_pose(result, detections.header, closest_object) @@ -228,8 +471,31 @@ def vision_callback(self, detections: Detection3DArray): self.outstanding_detections.append(OutstandingDetectionInfo(result, detections.header, closest_object)) self.publish_pose() - - + + def handle_binary_classifier_detections(self, detections: Detection3DArray): + for detection in detections.detections: + result_ids = [r.hypothesis.class_id for r in detection.results] + + try: + result_idx = result_ids.index(self.binary_classifier.class_name) + except ValueError: + continue + + result = detection.results[result_idx] + + if result.hypothesis.score < float(self.get_parameter("confidence_cutoff").value): + self.get_logger().info(f"Rejecting detection of {result.hypothesis.class_id} because confidence {result.hypothesis.score} is too low") + continue + + self.pulse_detection_led() + + update_success, _ = self.try_update_binary_classifier_pose(result, detections.header) + + if not update_success: + self.outstanding_detections.append(OutstandingDetectionInfo(result, detections.header, "", True)) + + self.maybe_seed_binary_instances() + def update_outstanding_detections(self): current_time = self.get_clock().now() oustanding_detections_remaining: list[OutstandingDetectionInfo] = [] @@ -237,7 +503,18 @@ def update_outstanding_detections(self): elapsed_nanoseconds = current_time.nanoseconds - (outstanding.det_header.stamp.sec * 1e9) - outstanding.det_header.stamp.nanosec elapsed_seconds = elapsed_nanoseconds / float(1e9) - update_success, error_msg = self.try_update_pose(outstanding.det_result, outstanding.det_header, outstanding.closest_object) + if outstanding.binary_classifier_detection: + if self.binary_classifier.running: + update_success, error_msg = self.try_update_binary_classifier_pose(outstanding.det_result, outstanding.det_header) + now_sec = float(self.get_clock().now().nanoseconds) / 1e9 + self.binary_classifier.age_buffer(now_sec) + self.maybe_seed_binary_instances() + else: + update_success = True + error_msg = "BinaryClassifier stopped" + else: + update_success, error_msg = self.try_update_pose(outstanding.det_result, outstanding.det_header, outstanding.closest_object) + if not update_success and not elapsed_seconds > STALE_TIME: oustanding_detections_remaining.append(outstanding) @@ -247,12 +524,10 @@ def update_outstanding_detections(self): self.outstanding_detections = oustanding_detections_remaining - - def try_update_pose(self, result: ObjectHypothesisWithPose, detection_header: Header, closest_object: str): + def transform_detection_to_object_parent(self, result: ObjectHypothesisWithPose, detection_header: Header, child: str): # We have a transform from camera to child we need to transform so # that we have a transform from parrent to child - parent: str = str(self.get_parameter("init_data.{}.parent".format(result.hypothesis.class_id)).value) - child: str = result.hypothesis.class_id + parent: str = str(self.get_parameter("init_data.{}.parent".format(child)).value) # Get the pose that is a transform from camera to child pose: PoseWithCovariance = result.pose @@ -264,23 +539,26 @@ def try_update_pose(self, result: ObjectHypothesisWithPose, detection_header: He detection_header.stamp ) except TransformException as ex: - return False, str(ex) - - # If the current object isnt the closest object and its parent is map we - # aren't going to track its location in favor of offsetting the entire map - update_position = True - update_orientation = True + return False, None, None, str(ex) trans_pose = do_transform_pose_stamped(pose, transform) - - - if result.hypothesis.class_id in self.downwards_objects.keys(): # and parent == "map": + if result.hypothesis.class_id in self.downwards_objects.keys(): trans_pose.pose.orientation.x = 0.0 trans_pose.pose.orientation.y = 0.0 trans_pose.pose.orientation.z = 0.0 trans_pose.pose.orientation.w = 1.0 - if "slalom" in result.hypothesis.class_id: + return True, trans_pose, parent, "" + + def update_object_with_pose(self, trans_pose, parent: str, child: str, closest_object: str): + if not child in self.objects.keys(): + return False, f"Unknown mapping object {child}" + + update_position = True + update_orientation = True + + # These objects keep their orientation from config (detection yaw is unreliable) + if bool(self.get_parameter("init_data.{}.lock_orientation_to_config".format(child)).value): update_orientation = False object_location: Location = self.objects[child]["location"] @@ -301,8 +579,80 @@ def try_update_pose(self, result: ObjectHypothesisWithPose, detection_header: He self.offset.add_pose(offset_pose, True, False) return True, "" - - + + def try_update_pose(self, result: ObjectHypothesisWithPose, detection_header: Header, closest_object: str): + child: str = result.hypothesis.class_id + + update_success, trans_pose, parent, error_msg = self.transform_detection_to_object_parent( + result, + detection_header, + child, + ) + + if not update_success: + return False, error_msg + + return self.update_object_with_pose( + trans_pose, + parent, + child, + closest_object, + ) + + def try_update_binary_classifier_pose(self, result: ObjectHypothesisWithPose, detection_header: Header): + common_target = self.binary_classifier.instance1_name + + update_success, trans_pose, parent, error_msg = self.transform_detection_to_object_parent( + result, + detection_header, + common_target, + ) + + if not update_success: + return False, error_msg + + stamp_sec = float(detection_header.stamp.sec) + float(detection_header.stamp.nanosec) / float(1e9) + now_sec = float(self.get_clock().now().nanoseconds) / float(1e9) + + sample = DetectionSample( + trans_pose.pose.position.x, + trans_pose.pose.position.y, + trans_pose.pose.position.z, + result.hypothesis.score, + stamp_sec, + ) + + assignment = self.binary_classifier.observe(sample, now_sec) + + if not assignment.accepted: + self.get_logger().debug(f"Binary classifier rejected sample: {assignment.reason}") + return True, assignment.reason + + child: str = assignment.target_name + + if not child in self.objects.keys(): + return False, f"BinaryClassifier assigned unknown target {child}" + + child_parent: str = str(self.get_parameter("init_data.{}.parent".format(child)).value) + + # If both binary targets share the same parent, reuse the already-transformed pose + if child_parent != parent: + update_success, trans_pose, parent, error_msg = self.transform_detection_to_object_parent( + result, + detection_header, + child, + ) + + if not update_success: + return False, error_msg + + return self.update_object_with_pose( + trans_pose, + parent, + child, + "", + ) + def closest_object(self, detections: Detection3DArray) -> str: object = "" closest_dist: float = 1000 diff --git a/tensor_detector/CMakeLists.txt b/tensor_detector/CMakeLists.txt index 5d7825d..7413da5 100644 --- a/tensor_detector/CMakeLists.txt +++ b/tensor_detector/CMakeLists.txt @@ -37,11 +37,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") endif() endif() -# Install the Python node -install(PROGRAMS - src/yolo_orientation.py - DESTINATION lib/${PROJECT_NAME} -) +# yoink python programs from src directory +file(GLOB src_py_files RELATIVE ${PROJECT_SOURCE_DIR} src/*.py) +install(PROGRAMS ${src_py_files} + DESTINATION lib/${PROJECT_NAME}) #install launch install( diff --git a/tensor_detector/config/yolo_orientation.yaml b/tensor_detector/config/yolo_orientation.yaml index 258edf4..8cc8b6b 100644 --- a/tensor_detector/config/yolo_orientation.yaml +++ b/tensor_detector/config/yolo_orientation.yaml @@ -2,53 +2,17 @@ ros__parameters: active_camera: "ffc" # Change to "dfc" or "ffc" to switch cameras - torp_top: "shark" - bin_target: "bin_saw" - # ---------- FFC configuration ---------- - # ffc_model: "robosub_2024_v3.pt" - # ffc_class_id_map: "{ - # 0: 'buoy', - # 1: 'gate_sawfish', - # 2: 'mapping_hole', - # 3: 'gate_reefshark', - # 4: 'gate_cold', - # 5: 'bin_temperature', - # 6: 'bin' - # }" - - # ffc_model: "rs25.pt" - # ffc_model: "rs25_8_10_1.pt" - # ffc_class_id_map: "{ - # 0: 'buoy', - # 1: 'mapping_map', - # 2: 'mapping_hole', - # 3: 'gate_reefshark', - # 4: 'gate_cold', - # 5: 'bin_temperature', - # 6: 'bin' - # }" - - #ffc_model: "rs25.pt" - # ffc_model: "rs26_gen2.pt" - ffc_model: "rs25_3_4_26.pt" + ffc_model: "ffc_rs_26.pt" ffc_class_id_map: "{ - 0: 'ambulance', - 1: 'bandage', - 2: 'blood', - 3: 'buoy', - 4: 'compass', - 5: 'circle' - 6: 'fire', - 7: 'fire_engine', - 8: 'hammer_and_wrench', - 9: 'helmet', - 10: 'nut_and_bolt', - 11: 'pill', - 12: 'plug', - 13: 'slalom' - 14: 'sos', - 15: 'warning' + 0: 'blood', + 1: 'buoy', + 2: 'compass', + 3: 'circle', + 4: 'fire', + 5: 'hammer_and_wrench', + 6: 'slalom', + 7: 'sos' }" # ffc_class_id_map: "{ # 0: 'ambulance', @@ -70,33 +34,37 @@ ffc_iou: 0.7 # ---------- DFC configuration ---------- - dfc_model: "rs26_gen2.pt" - # dfc_model: "rs25_3_4_26.pt" # new rotationally invariant model - # dfc_model: "rs25_8_15_1_BIN.pt" # New rotationally invariant version with dropout + dfc_model: "dfc_rs_26.pt" dfc_class_id_map: "{ - 1: 'bandage', - 2: 'blood', - 6: 'fire', - 9: 'helmet', - 10: 'nut_and_bolt', - 11: 'pill', - 12: 'plug', - 15: 'warning' + 0: 'bandage', + 1: 'blood', + 2: 'fire', + 3: 'helmet', + 4: 'nut_and_bolt', + 5: 'pill', + 6: 'plug', + 7: 'warning' }" - # dfc_class_id_map: "{ - # 0: 'bin', - # 1: 'bin_saw', - # 2: 'bin_shark', - # 3: 'torpedo_shark_top', - # 4: 'torpedo_saw_top', - # 5: 'bin_pink', - # 6: 'bin_yellow', - # 7: 'bottle_yellow', - # 8: 'gate_saw', - # 9: 'gate_shark', - # 10: 'slalom_red', - # 11: 'spoon_pink', - # 12: 'torpedo_hole' - # }" + dfc_threshold: 0.75 dfc_iou: 0.9 + +# Full map from CVAT for reference + # dfc_class_id_map: "{ + # 0: 'ambulance', + # 1: 'bandage', + # 2: 'blood', + # 3: 'buoy', + # 4: 'compass', + # 5: 'circle', + # 6: 'fire', + # 7: 'fire_engine', + # 8: 'hammer_and_wrench', + # 9: 'helmet', + # 10: 'nut_and_bolt', + # 11: 'pill', + # 12: 'plug', + # 13: 'slalom', + # 14: 'sos', + # 15: 'warning' + # }" \ No newline at end of file diff --git a/tensor_detector/launch/tensorrt.launch.py b/tensor_detector/launch/tensorrt.launch.py index f2fd92c..8120afd 100644 --- a/tensor_detector/launch/tensorrt.launch.py +++ b/tensor_detector/launch/tensorrt.launch.py @@ -16,6 +16,12 @@ def generate_launch_description(): description="name of the robot" ), + DeclareLaunchArgument( + 'use_sim_time', + default_value='false', + description='Use simulation (Gazebo/Clock) time' + ), + GroupAction([ PushRosNamespace(LC("robot")), @@ -26,7 +32,8 @@ def generate_launch_description(): output='screen', parameters=[ params_path, - {"robot_namespace": LC("robot")} + {"robot_namespace": LC("robot")}, + {"use_sim_time": LC("use_sim_time")} ] ) ], scoped=True) diff --git a/tensor_detector/src/colors.py b/tensor_detector/src/colors.py new file mode 100644 index 0000000..873d0f3 --- /dev/null +++ b/tensor_detector/src/colors.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# Colors used for rviz markers, keyed by the published class name. +# Will move to yaml eventually +COLOR_MAP = { + # Slaloms (slalom_name set by service) + 'slalom_close': (1.0, 0.0, 0.0), + 'slalom_middle': (1.0, 1.0, 0.0), + 'slalom_far': (0.0, 1.0, 0.0), + + # Slalom publishes under slalom_name, this is the default (not sure if we are using the service anymore) + 'slalom_front': (0.0, 1.0, 0.4), + + # Gate combinations + 'gate_repair': (0.0, 1.0, 0.0), + 'gate_rescue': (0.0, 0.5, 1.0), + + # Torpedo stuff + 'torpedo': (0.0, 1.0, 1.0), + 'fire_hole_large': (1.0, 0.4, 0.0), + 'fire_hole_small': (1.0, 0.7, 0.0), + 'blood_hole_large': (0.6, 0.0, 0.0), + 'blood_hole_small': (1.0, 0.0, 0.4), + + # FFC generic planar classes (when not merged into a gate pair) + 'buoy': (0.2, 0.8, 1.0), + 'compass': (0.9, 0.6, 0.1), + 'hammer_and_wrench': (0.4, 0.4, 0.45), + 'sos': (1.0, 0.1, 0.3), + + # DFC generic planar classes + # fire/blood also fall here on dfc, as the torpedo task is disabled + 'bandage': (0.95, 0.85, 0.70), + 'blood': (0.80, 0.0, 0.0), + 'fire': (1.0, 0.5, 0.0), + 'helmet': (1.0, 1.0, 0.0), + 'nut_and_bolt': (0.5, 0.5, 0.5), + 'pill': (0.7, 0.2, 0.9), + 'plug': (0.0, 0.4, 1.0), + 'warning': (1.0, 0.0, 1.0), +} diff --git a/tensor_detector/src/detection.py b/tensor_detector/src/detection.py new file mode 100644 index 0000000..a2d779f --- /dev/null +++ b/tensor_detector/src/detection.py @@ -0,0 +1,600 @@ +#!/usr/bin/env python3 +"""Detection processing +Turns YOLO results into Detection3D messages and markers +""" + +# TODO: Make this completely class agnostic, pass class specific logic to a delegator, then to yolo class specific python classes +# Most of the refactor outside of this file is pretty much done, but this is harder to separate +# And move stuff to config files as well + +import math +from dataclasses import dataclass +from typing import Any + +import cv2 +import numpy as np + +from vision_msgs.msg import Detection3DArray +from tf_transformations import quaternion_from_euler, quaternion_multiply +from rclpy.time import Time + +import geometry +import outputs +from outputs import MarkerBuilder +from pointcloud import PointCloudBuilder, CloudColorMode +from colors import COLOR_MAP + + +# Published-name relabels (no class is currently relabeled on publish but good to have) +RELABEL_MAP = {} + +# Classes intercepted by the Torpedo task when it is enabled (ffc) +TORPEDO_CLASSES = {'fire', 'blood', 'circle'} + +# Pairs that, when both are seen in a frame, are merged into one combined detection (union bbox -> centroid) (Gate) +# When only one member is present, it runs through the normal planar path (Octagon) +GATE_PAIRS = [ + (('compass', 'hammer_and_wrench'), 'gate_repair'), + (('buoy', 'sos'), 'gate_rescue'), +] +GATE_PAIR_CLASSES = {c for pair, _ in GATE_PAIRS for c in pair} # This way we dont have to type them twice (python moment) + +SLALOM_CLASS = "slalom" #magic 🪄 + +@dataclass +class SurfaceFit: + """Result of fitting a plane to one detection's surface patch.""" + points: Any # Nx3 inlier points + normal: Any # unit normal, flipped to face the camera + centroid: Any # 3D centroid (bbox center x/y at the fitted plane depth) + quat: Any # orientation from the normal + center2d: tuple # (px, py) bbox center in image coords + conf: Any # detection confidence of the source box + + +@dataclass +class ProcessorConfig: + class_detect_shrink: float = 0.15 + min_points: int = 5 + map_min_area: int = 50 + slalom_history_size: int = 10 + use_incoming_timestamp: bool = True + publish_interval: float = 0.1 # marker publish interval + marker_lifetime: float = 5.0 # seconds; 0 = persist until replaced/deleted + grid_step: int = 8 # px spacing for the surface grid sample + max_sample_points: int = 50 # cap points per patch fed to the SVD/cloud (0 = uncapped) + cloud_color_mode: CloudColorMode = CloudColorMode.CLASS + publish_box_markers: bool = True + +@dataclass +class Frame: + """Everything the processor needs for a single image. Built by the node.""" + image: Any # cv_image (bgr8); feature points get overlaid on it + depth: Any # depth image (passthrough) + fx: float # camera intrinsics (same values as K below, isolated for easy access) + fy: float + cx: float + cy: float + K: Any # 3x3 intrinsic matrix [[fx,0,cx],[0,fy,cy],[0,0,1]] (useful for numpy lin alg math) + frame_id: str # Tf frame + timestamp: Any # incoming header stamp + class_id_map: dict + conf: float + want_markers: bool = True + want_cloud: bool = True + want_overlay_points: bool = True + + +class DetectionProcessor: + def __init__(self, config, logger, now_stamp, tf_buffer): + self.cfg = config + self.log = logger + self._now_stamp = now_stamp # callable lambda (ƛ🍾) -> builtin_interfaces/Time msg + self.tf_buffer = tf_buffer + self._default_normal = geometry.DEFAULT_NORMAL + + # Output things + self.markers = MarkerBuilder(config) + self.cloud = PointCloudBuilder(config.min_points, logger, color_mode=config.cloud_color_mode) + + # Persistant states (stuff that lives across frames) + self.plane_normal = None # last fitted plane normal (debug/use) + self.slalom_history = [] + self.slalom_name = 'slalom_front' + self._markers = [] + + # Per-frame states + self._frame = None + self._mask = None + self.slalom_red_detections = [] + + # Torpedo stuff + self.torpedo_fire_box = None + self.torpedo_blood_box = None + self.torpedo_circles = [] + + # When enabled, the torpedo classes are intercepted and + # resolved by process_torpedo_task instead of the default plane-fit path. + # (So blood/fire work for bins) + self.torpedo_enabled = False + + # Gate pairs: class_name -> list of boxes seen this frame + self.gate_pair_boxes = {} + + + ### API + def set_slalom_name(self, name): + self.slalom_name = name + + def set_torpedo_enabled(self, enabled): + """Turn the Task04 fire/blood torpedo logic on (ffc) or off (dfc).""" + self.torpedo_enabled = bool(enabled) + + def process(self, results, frame): + """Run one image through the pipeline. + + Returns (detections, markers_to_publish). The node handles marker + throttling, the point cloud, the annotated image, and publishing. + """ + self._frame = frame + self._reset_frame_state() + self.markers.reset() + + detections = Detection3DArray() + detections.header.frame_id = frame.frame_id + detections.header.stamp = self._stamp(frame) + + if self._mask is None or self._mask.shape[:2] != frame.image.shape[:2]: + self._mask = np.zeros(frame.image.shape[:2], dtype=np.uint8) + + # Build the full-frame segmentation mask + self._mask.fill(0) + + for result in results: + if result is not None and result.masks is not None: + for contour in result.masks.xy: + contour = np.array(contour, dtype=np.int32) + cv2.fillPoly(self._mask, [contour], 255) + + # Route each detection + # Class specifics are handled, otherwise generic plane/fit + for result in results: + if result is None: + continue + if result.boxes is not None: + for box in result.boxes.cpu().numpy(): + if box.conf[0] <= frame.conf: # Technically redundant but extra safety check for now + continue + # int-cast here so routing keys match create_detection3d_message + # and the int-keyed YAML class map (was a raw numpy float). + class_id = int(box.cls[0]) + if class_id not in frame.class_id_map: + continue + + conf = box.conf[0] + name = frame.class_id_map[class_id] + + if self.torpedo_enabled and name in TORPEDO_CLASSES: + self._torpedo_add(name, box) + elif name in GATE_PAIR_CLASSES: + self.gate_pair_boxes.setdefault(name, []).append(box) + elif name == SLALOM_CLASS: + detection_temp = self.create_detection3d_message(box, frame, conf) + if detection_temp and detection_temp.results: + self._stash_slalom(box, detection_temp) + else: + detection = self.create_detection3d_message(box, frame, conf) + if detection: + detections.detections.append(detection) + + # Resolve grouped detections that need the whole frame first + self._resolve_gate_pairs(frame, detections) + + # Torpedo fire/blood (ffc) + if self.torpedo_enabled: + self.process_torpedo_task(frame, detections) + + self._process_slalom(frame, detections) + + # Flush all markers built this frame (including slalom) + markers_out = self._markers + self._markers = [] + + return detections, markers_out + + def take_point_cloud(self, frame_id, stamp): + """Build and clear the accumulated point cloud (None if empty).""" + return self.cloud.take_cloud(frame_id, stamp) + + ### Helpers + def _reset_frame_state(self): + self.slalom_red_detections = [] + self.torpedo_fire_box = None + self.torpedo_blood_box = None + self.torpedo_circles = [] + self.gate_pair_boxes = {} + + def _stamp(self, frame): + if self.cfg.use_incoming_timestamp: + return frame.timestamp + return self._now_stamp() + + def _stash_slalom(self, box, detection_temp): + result_temp = detection_temp.results[0] + centroid = [ + result_temp.pose.pose.position.x, + result_temp.pose.pose.position.y, + result_temp.pose.pose.position.z, + ] + quat = [ + result_temp.pose.pose.orientation.x, + result_temp.pose.pose.orientation.y, + result_temp.pose.pose.orientation.z, + result_temp.pose.pose.orientation.w, + ] + x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) + self.slalom_red_detections.append({ + 'centroid': centroid, + 'quat': quat, + 'conf': box.conf[0], + 'bbox_width': x_max - x_min, + 'bbox_height': y_max - y_min, + }) + + ### Core dispatch + def create_detection3d_message(self, box, frame, conf): + """Default per-detection path: slalom (depth-sampled centroid) or the + generic planar surface fit. Returns a Detection3D or None.""" + x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) + bbox_width = x_max - x_min + bbox_height = y_max - y_min + + class_id = int(box.cls[0]) + bbox_center_x = (x_min + x_max) / 2 + bbox_center_y = (y_min + y_max) / 2 + class_name = frame.class_id_map.get(class_id, "Unknown") + + # Slalom: depth-sampled centroid, default-facing orientation (mapping overrides it now) + if class_name == SLALOM_CLASS: + depth_value = frame.depth[int(bbox_center_y), int(bbox_center_x)] + if (np.isnan(depth_value) or math.isinf(bbox_center_x) + or math.isinf(bbox_center_y) or math.isinf(depth_value)): + return None + centroid = geometry.pixel_to_3d(bbox_center_x, bbox_center_y, + float(depth_value), + frame.fx, frame.fy, frame.cx, frame.cy) + quat, _ = geometry.normal_to_quaternion(-self._default_normal, + self._default_normal) + detection = self._new_detection(frame) + detection.results.append(self._make_hypothesis(class_name, centroid, quat, conf)) + return detection + + # Generic planar surface fit (everything else) + fit = self._fit_bbox((x_min, y_min, x_max, y_max), frame, conf, class_name=class_name) + if fit is None: + return None + + self.plane_normal = fit.normal + self._build_marker(frame, fit.quat, fit.centroid, class_name, + bbox_width, bbox_height) + detection = self._new_detection(frame) + detection.results.append(self._make_hypothesis(class_name, fit.centroid, fit.quat, conf)) + return detection + + ### Shared surface fitting + def _extract_bbox_points(self, bbox, frame, class_name=None): + """Shrink the bbox, grid-sample the masked region, back-project to 3D.""" + x_min, y_min, x_max, y_max = map(int, bbox) + shrink_x = (x_max - x_min) * self.cfg.class_detect_shrink + shrink_y = (y_max - y_min) * self.cfg.class_detect_shrink + x0 = int(x_min + shrink_x) + x1 = int(x_max - shrink_x) + y0 = int(y_min + shrink_y) + y1 = int(y_max - shrink_y) + + mask_roi = self._mask[y0:y1, x0:x1] + feature_points = self._grid_sample(mask_roi, x0, y0) + if not feature_points: + return None + + return self._get_3d_points(frame, feature_points, class_name=class_name) + + def _grid_sample(self, mask_roi, x0, y0): + """Regular pixel grid over the in-mask region of an ROI -> [(px, py), ...].""" + if mask_roi.size == 0: + return [] + step = max(1, int(self.cfg.grid_step)) + ys, xs = np.where(mask_roi[::step, ::step] == 255) + + cap = int(self.cfg.max_sample_points) + if cap > 0 and len(xs) > cap: + sel = np.linspace(0, len(xs) - 1, cap).astype(int) + xs, ys = xs[sel], ys[sel] + + # Map back to full-image coords (account for the stride and ROI offset). + return [(float(x * step + x0), float(y * step + y0)) for y, x in zip(ys, xs)] + + def _plane_from_points(self, points_3d, frame, center_bbox, conf): + """Pooled 3D points -> SVD plane -> camera-facing surface""" + if points_3d is None or len(points_3d) < self.cfg.min_points: + return None + + normal, _, centroid3 = geometry.fit_plane(points_3d) + if normal is None: + return None + + bbox_center_x = (center_bbox[0] + center_bbox[2]) / 2 + bbox_center_y = (center_bbox[1] + center_bbox[3]) / 2 + centroid = geometry.pixel_to_3d(bbox_center_x, bbox_center_y, centroid3[2], + frame.fx, frame.fy, frame.cx, frame.cy) + if normal[2] > 0: + normal = -normal + quat, _ = geometry.normal_to_quaternion(normal, self._default_normal) + + return SurfaceFit(points=points_3d, normal=normal, centroid=centroid, + quat=quat, center2d=(bbox_center_x, bbox_center_y), conf=conf) + + def _fit_bbox(self, bbox, frame, conf, class_name=None): + """Generic planar surface fit over a single bbox: sample -> SVD.""" + points_3d = self._extract_bbox_points(bbox, frame, class_name=class_name) + return self._plane_from_points(points_3d, frame, bbox, conf) + + def _fit_boxes(self, boxes, frame, conf, center_bbox, class_name=None): + """Sample each box independently, pool the points, then one SVD fit""" + pooled = [] + for box in boxes: + pts = self._extract_bbox_points(box.xyxy[0], frame, class_name=class_name) + if pts is not None and len(pts) > 0: + pooled.append(np.asarray(pts)) + if not pooled: + return None + return self._plane_from_points(np.vstack(pooled), frame, center_bbox, conf) + + def _fit_symbol(self, box, frame, class_name=None): + """Run the generic surface pipeline on one symbol box (fire/blood).""" + return self._fit_bbox(box.xyxy[0], frame, box.conf[0], class_name=class_name) + + ### Torpedo + def _torpedo_add(self, name, box): + if name == "fire": + self.torpedo_fire_box = box + elif name == "blood": + self.torpedo_blood_box = box + elif name == "circle": + self.torpedo_circles.append(box) + + def process_torpedo_task(self, frame, detections): + """Resolve the fire/blood torpedo board: publish the torpedo center and, + when fully visible, the four labeled openings""" + fire = self._fit_symbol(self.torpedo_fire_box, frame, class_name='fire') if self.torpedo_fire_box is not None else None + blood = self._fit_symbol(self.torpedo_blood_box, frame, class_name='blood') if self.torpedo_blood_box is not None else None + + present = [f for f in (fire, blood) if f is not None] + if not present: + return # no reliable symbol plane this frame; publish nothing + + # Board plane normal (pool both patches when available) + if fire is not None and blood is not None: + pooled = np.vstack([fire.points, blood.points]) + normal, _, _ = geometry.fit_plane(pooled) + if normal is None: + normal = present[0].normal + else: + normal = present[0].normal + normal = np.asarray(normal, dtype=float) + if normal[2] > 0: + normal = -normal + self.plane_normal = normal + board_quat, _ = geometry.normal_to_quaternion(normal, self._default_normal) + + # Plane anchor: midpoint of visible symbol centroids (any on-plane point). + anchor = np.mean(np.array([f.centroid for f in present]), axis=0) + + # Torpedo center via in-plane extent of all visible shapes + centers_2d = [f.center2d for f in present] + for cbox in self.torpedo_circles: + centers_2d.append(self._box_center(cbox)) + + projected = [] + for (px, py) in centers_2d: + pt = self._project_center(px, py, frame, normal, anchor) + if pt is not None: + projected.append(pt) + if not projected: + return + + u, v = geometry.inplane_basis(normal) + origin = projected[0] + us = [float(np.dot(p - origin, u)) for p in projected] + vs = [float(np.dot(p - origin, v)) for p in projected] + torpedo_center = (origin + + ((min(us) + max(us)) / 2.0) * u + + ((min(vs) + max(vs)) / 2.0) * v) + + torpedo_conf = min(self._box_conf(b) for b in + (self.torpedo_fire_box, self.torpedo_blood_box) if b is not None) + self._emit_task(frame, detections, "torpedo", torpedo_center, board_quat, + torpedo_conf, 0.3, 0.3) + + # Hole instance separation (needs both symbols + 4 circles) + if fire is None or blood is None or len(self.torpedo_circles) != 4: + return + + fpx, fpy = fire.center2d + bpx, bpy = blood.center2d + fire_pt = self._project_center(fpx, fpy, frame, normal, anchor) + blood_pt = self._project_center(bpx, bpy, frame, normal, anchor) + if fire_pt is None or blood_pt is None: + return + fire_pt = np.asarray(fire_pt) + blood_pt = np.asarray(blood_pt) + + # Project the holes onto the fire->blood axis and sort + # The two nearest fire go to the fire side, the two nearest blood to the blood side + # This always splits them 2-2 and (should) work at any roll + sep = blood_pt - fire_pt + norm_sep = float(np.linalg.norm(sep)) + if norm_sep < 1e-6: + return + sep = sep / norm_sep + + holes = [] + for cbox in self.torpedo_circles: + cx, cy = self._box_center(cbox) + pt = self._project_center(cx, cy, frame, normal, anchor) + if pt is None: + return # can't resolve all four then publish no holes + pt = np.asarray(pt) + holes.append({'box': cbox, 'pt': pt, + 's': float(np.dot(pt - fire_pt, sep))}) + + holes.sort(key=lambda h: h['s']) + fire_pair, blood_pair = holes[:2], holes[2:] + + self._emit_hole_pair(frame, detections, "fire", fire, fire_pair, board_quat) + self._emit_hole_pair(frame, detections, "blood", blood, blood_pair, board_quat) + + def _emit_hole_pair(self, frame, detections, symbol_name, symbol_fit, pair, quat): + # The opening closest (in 3D) to its symbol is the large one + d0 = float(np.linalg.norm(np.asarray(pair[0]['pt']) - np.asarray(symbol_fit.centroid))) + d1 = float(np.linalg.norm(np.asarray(pair[1]['pt']) - np.asarray(symbol_fit.centroid))) + large, small = (pair[0], pair[1]) if d0 <= d1 else (pair[1], pair[0]) + for hole, size_name in ((large, "large"), (small, "small")): + cw, ch = self._box_size(hole['box']) + self._emit_task(frame, detections, f"{symbol_name}_hole_{size_name}", + hole['pt'], quat, self._box_conf(hole['box']), + cw / 150.0, ch / 150.0) + + def _emit_task(self, frame, detections, class_id, centroid, quat, conf, scale_x, scale_y): + if frame.want_markers: + color = COLOR_MAP.get(class_id, (1.0, 1.0, 1.0)) + self._markers.extend(self.markers.build( + frame.frame_id, self._stamp(frame), quat, centroid, color, scale_x, scale_y)) + detection = self._new_detection(frame) + detection.results.append(self._make_hypothesis(class_id, centroid, quat, conf)) + detections.detections.append(detection) + + def _project_center(self, px, py, frame, normal, anchor): + pt, _ = geometry.ray_plane_intersection(px, py, frame.K, normal, anchor) + return pt + + def _box_center(self, box): + x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) + return (x_min + x_max) / 2, (y_min + y_max) / 2 + + def _box_size(self, box): + x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) + return (x_max - x_min), (y_max - y_min) + + def _box_conf(self, box): + return box.conf[0] + + ### Gate pairs + def _union_bbox(self, boxes): + """Axis-aligned union of several boxes""" + xs0, ys0, xs1, ys1 = [], [], [], [] + for box in boxes: + x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) + xs0.append(x_min); ys0.append(y_min); xs1.append(x_max); ys1.append(y_max) + return (min(xs0), min(ys0), max(xs1), max(ys1)) + + def _resolve_gate_pairs(self, frame, detections): + """If both were seen this frame, publish ONE combined detection + If only one is present, publish each present box + normally (its own class, standard planar path).""" + for (a, b), gate_name in GATE_PAIRS: + boxes_a = self.gate_pair_boxes.get(a, []) + boxes_b = self.gate_pair_boxes.get(b, []) + if boxes_a and boxes_b: + self._emit_combined_gate(frame, detections, boxes_a + boxes_b, gate_name) + else: + for box in boxes_a + boxes_b: + det = self.create_detection3d_message(box, frame, box.conf[0]) + if det: + detections.detections.append(det) + + def _emit_combined_gate(self, frame, detections, boxes, gate_name): + """Union the member bboxes, fit one plane over the combined region, and + publish it under gate_name (conf = min of the members)""" + union = self._union_bbox(boxes) + conf = min(self._box_conf(b) for b in boxes) + + fit = self._fit_boxes(boxes, frame, conf, center_bbox=union, class_name=gate_name) + if fit is None: + return + self.plane_normal = fit.normal # Stored for debug stuff + bbox_width = union[2] - union[0] + bbox_height = union[3] - union[1] + self._build_marker(frame, fit.quat, fit.centroid, gate_name, bbox_width, bbox_height) + detection = self._new_detection(frame) + detection.results.append(self._make_hypothesis(gate_name, fit.centroid, fit.quat, conf)) + detections.detections.append(detection) + + ### Slalom + def _process_slalom(self, frame, detections_array): + """Report the closest slalom_red from history, oriented by tf parent""" + if len(self.slalom_red_detections) > 0: + closest_detection = min(self.slalom_red_detections, key=lambda x: x['centroid'][2]) + self.slalom_history.append({ + 'centroid': closest_detection['centroid'], + 'quat': closest_detection['quat'], + 'conf': closest_detection['conf'], + }) + if len(self.slalom_history) > self.cfg.slalom_history_size: + self.slalom_history.pop(0) + + if self.slalom_history: + closest_in_history = min(self.slalom_history, key=lambda x: x['centroid'][2]) + + parent_frame = "slalom_parent_frame" + try: + parent_quat_tf = self.tf_buffer.lookup_transform( + frame.frame_id, parent_frame, Time()).transform.rotation + detection_quat = [parent_quat_tf.x, parent_quat_tf.y, + parent_quat_tf.z, parent_quat_tf.w] + except Exception: + self.log.warning(f'Pubbing slalom detection with rotation since {parent_frame} not found') + detection_quat = closest_in_history['quat'] + + z_to_x_quat = quaternion_from_euler(0.0, -1.57079632679, 0.0) + corrected_quat = quaternion_multiply(detection_quat, z_to_x_quat) + + detection = self._new_detection(frame) + detection.results.append(self._make_hypothesis( + self.slalom_name, + closest_in_history['centroid'], + corrected_quat, + closest_in_history['conf'])) + + self._build_marker( + frame, corrected_quat, closest_in_history['centroid'], + self.slalom_name, + closest_detection['bbox_width'], + closest_detection['bbox_height']) + detections_array.detections.append(detection) + + self.slalom_red_detections = [] + else: + self.slalom_red_detections = [] + + ### 3D / cloud + def _get_3d_points(self, frame, feature_points, class_name=None): + return self.cloud.extract(frame, feature_points, self._mask, class_name=class_name) + + ### Messages + def _new_detection(self, frame): + return outputs.new_detection(frame.frame_id, self._stamp(frame)) + + def _make_hypothesis(self, class_name, centroid, quat, conf): + # Apply the publish-name relabel (anchors publish under a different name) + published = RELABEL_MAP.get(class_name, class_name) + return outputs.make_hypothesis(published, centroid, quat, conf) + + def _build_marker(self, frame, quat, centroid, class_name, bbox_width, bbox_height): + if not frame.want_markers: + return + color = COLOR_MAP.get(class_name, (1.0, 1.0, 1.0)) + flat = (class_name == "buoy") + self._markers.extend(self.markers.build( + frame.frame_id, self._stamp(frame), quat, centroid, color, + float(bbox_width) / 150.0, float(bbox_height) / 150.0, flat)) \ No newline at end of file diff --git a/tensor_detector/src/geometry.py b/tensor_detector/src/geometry.py new file mode 100644 index 0000000..8f1cdf0 --- /dev/null +++ b/tensor_detector/src/geometry.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +"""Pure geometry helpers for yolo_orientation.""" +import numpy as np +from scipy.spatial.transform import Rotation as R +from scipy.spatial import cKDTree + + +# The camera-frame normal a flat, front-facing surface would have +# Don't touch this unless you know what you're doing +DEFAULT_NORMAL = np.array([0.0, 0.0, 1.0]) + + +def pixel_to_3d(u, v, z, fx, fy, cx, cy): + """Back-project a pixel (u, v) at depth z into a 3D camera-frame point.""" + x = (u - cx) * z / fx + y = (v - cy) * z / fy + return [x, y, z] + + +def ray_plane_intersection(u, v, K, normal, p0): + """Intersect the camera ray through pixel (u, v) with the plane (normal, p0). + + Returns (point_3d, t). point_3d is None when the ray is parallel to the + plane (denominator == 0). Scaling of the ray direction does not affect the + intersection point, so the result is independent of normalization. + """ + d = np.linalg.inv(K) @ np.array([u, v, 1.0]) + d = d / np.linalg.norm(d) + denominator = np.dot(normal, d) + if denominator == 0: + return None, None + t = np.dot(normal, p0) / denominator + return t * d, t + + +def fit_plane(points_3d): + """Least-squares plane fit via SVD. + + Returns (normal, d, centroid); (None, None, None) when there are no points. + """ + if len(points_3d) == 0: + return None, None, None + centroid = np.mean(points_3d, axis=0) + _, _, vh = np.linalg.svd(points_3d - centroid, full_matrices=False) + normal = vh[-1] + normal = normal / np.linalg.norm(normal) + d = -np.dot(normal, centroid) + return normal, d, centroid + + +def rotation_from_normal(normal, default_normal=DEFAULT_NORMAL): + """Rotation that maps default_normal onto normal (scipy Rotation).""" + axis = np.cross(default_normal, normal) + axis_length = np.linalg.norm(axis) + if axis_length == 0: + # Normal is parallel/anti-parallel, rotate 180 deg about an arbitrary axis (goofy ahh rotation) + axis = np.array([1, 0, 0]) + angle = np.pi + else: + axis = axis / axis_length + angle = np.arccos(np.dot(default_normal, normal)) + return R.from_rotvec(axis * angle) + + +def normal_to_quaternion(normal, default_normal=DEFAULT_NORMAL): + """(quat, euler_xyz_degrees) for a plane with the given normal. + + euler is None when no rotation is needed (normal == default_normal). + """ + if np.allclose(normal, default_normal): + return [0.0, 0.0, 0.0, 1.0], None + rotation = rotation_from_normal(normal, default_normal) + return rotation.as_quat(), rotation.as_euler('xyz', degrees=True) + + +def inplane_basis(normal): + """Two orthonormal vectors (u, v) spanning the plane with the given normal. + + The absolute orientation/sign of u and v is arbitrary; callers that need a + consistent 'up' must resolve it themselves. Stable for any normal direction. + """ + normal = np.asarray(normal, dtype=float) + normal = normal / np.linalg.norm(normal) + # Reference axis least aligned with the normal, to avoid a degenerate cross. + ref = np.array([1.0, 0.0, 0.0]) if abs(normal[0]) < 0.9 else np.array([0.0, 1.0, 0.0]) + u = np.cross(normal, ref) + u = u / np.linalg.norm(u) + v = np.cross(normal, u) + v = v / np.linalg.norm(v) + return u, v + + +def is_inside_bbox(inner_bbox, outer_bbox): + inner_x_min, inner_y_min, inner_x_max, inner_y_max = inner_bbox + outer_x_min, outer_y_min, outer_x_max, outer_y_max = outer_bbox + return (inner_x_min >= outer_x_min and inner_x_max <= outer_x_max and + inner_y_min >= outer_y_min and inner_y_max <= outer_y_max) + + +def radius_outlier_mask(points_3d, radius=1.0, min_neighbors=10): + """Boolean mask: True for points with more than min_neighbors others within radius.""" + n = len(points_3d) + if n == 0: + return np.zeros(0, dtype=bool) + tree = cKDTree(points_3d) + # count includes the point itself, matching the original > min_neighbors semantics + counts = tree.query_ball_point(points_3d, r=radius, return_length=True) + return counts > min_neighbors + + +def statistical_outlier_mask(points_3d, k=10, std_ratio=1.0): + """Boolean mask: True for inliers, based on mean distance to k nearest neighbors.""" + n = len(points_3d) + if n == 0: + return np.zeros(0, dtype=bool) + k_eff = min(k, n - 1) + if k_eff < 1: + return np.ones(n, dtype=bool) + tree = cKDTree(points_3d) + # k_eff + 1 because the nearest neighbor is the point itself (distance 0) + dists, _ = tree.query(points_3d, k=k_eff + 1) + mean_distances = dists[:, 1:].mean(axis=1) # drop the self column + threshold = mean_distances.mean() + std_ratio * mean_distances.std() + return mean_distances < threshold \ No newline at end of file diff --git a/tensor_detector/src/outputs.py b/tensor_detector/src/outputs.py new file mode 100644 index 0000000..2b532f6 --- /dev/null +++ b/tensor_detector/src/outputs.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +"""ROS output data stuf: detections, hypotheses, markers""" +from scipy.spatial.transform import Rotation as R + +from vision_msgs.msg import (Detection3D, ObjectHypothesisWithPose, + ObjectHypothesis) +from visualization_msgs.msg import Marker + + +def new_detection(frame_id, stamp): + detection = Detection3D() + detection.header.frame_id = frame_id + detection.header.stamp = stamp + return detection + + +def make_hypothesis(class_id, centroid, quat, conf): + """Build an ObjectHypothesisWithPose. class_id is the final published name.""" + hypothesis_with_pose = ObjectHypothesisWithPose() + hypothesis = ObjectHypothesis() + hypothesis.class_id = class_id + # conf may be a numpy scalar (from a YOLO box) or a plain float (synthesized). + hypothesis.score = float(conf.item()) if hasattr(conf, "item") else float(conf) + + hypothesis_with_pose.hypothesis = hypothesis + hypothesis_with_pose.pose.pose.position.x = float(centroid[0]) + hypothesis_with_pose.pose.pose.position.y = float(centroid[1]) + hypothesis_with_pose.pose.pose.position.z = float(centroid[2]) + hypothesis_with_pose.pose.pose.orientation.x = float(quat[0]) + hypothesis_with_pose.pose.pose.orientation.y = float(quat[1]) + hypothesis_with_pose.pose.pose.orientation.z = float(quat[2]) + hypothesis_with_pose.pose.pose.orientation.w = float(quat[3]) + return hypothesis_with_pose + + +class MarkerBuilder: + """Builds the (cube + arrow) marker pair for a pose""" + + def __init__(self, config): + self.cfg = config + self._counter = 0 + + def reset(self): + self._counter = 0 + + def _next_id(self): + self._counter += 1 + return self._counter + + def build(self, frame_id, stamp, quat, centroid, color, scale_x, scale_y, flat=False): + """Return [plane_marker, arrow_marker]. Caller appends to its buffer.""" + lifetime_s = max(0.0, float(self.cfg.marker_lifetime)) + lt_sec = int(lifetime_s) + lt_nsec = int((lifetime_s - lt_sec) * 1e9) + markers = [] + + if getattr(self.cfg, 'publish_box_markers', True): + plane_marker = Marker() + plane_marker.header.frame_id = frame_id + plane_marker.header.stamp = stamp + plane_marker.ns = "detection_markers" + plane_marker.id = self._next_id() + plane_marker.type = Marker.CUBE + plane_marker.action = Marker.ADD + plane_marker.lifetime.sec = lt_sec + plane_marker.lifetime.nanosec = lt_nsec + plane_marker.pose.position.x = float(centroid[0]) + plane_marker.pose.position.y = float(centroid[1]) + plane_marker.pose.position.z = float(centroid[2]) + plane_marker.pose.orientation.x = float(quat[0]) + plane_marker.pose.orientation.y = float(quat[1]) + plane_marker.pose.orientation.z = float(quat[2]) + plane_marker.pose.orientation.w = float(quat[3]) + plane_marker.scale.x = float(scale_x) + plane_marker.scale.y = float(scale_y) + plane_marker.scale.z = 0.01 if flat else 0.05 + plane_marker.color.r = color[0] + plane_marker.color.g = color[1] + plane_marker.color.b = color[2] + plane_marker.color.a = 0.8 + markers.append(plane_marker) + + arrow_marker = Marker() + arrow_marker.header.frame_id = frame_id + arrow_marker.header.stamp = stamp + arrow_marker.ns = "orientation_markers" + arrow_marker.id = self._next_id() + arrow_marker.type = Marker.ARROW + arrow_marker.action = Marker.ADD + arrow_marker.lifetime.sec = lt_sec + arrow_marker.lifetime.nanosec = lt_nsec + arrow_marker.pose.position.x = float(centroid[0]) + arrow_marker.pose.position.y = float(centroid[1]) + arrow_marker.pose.position.z = float(centroid[2]) + + additional_rotation = R.from_euler('y', -90, degrees=True).as_quat() + arrow_quat = (R.from_quat(quat) * R.from_quat(additional_rotation)).as_quat() + arrow_marker.pose.orientation.x = arrow_quat[0] + arrow_marker.pose.orientation.y = arrow_quat[1] + arrow_marker.pose.orientation.z = arrow_quat[2] + arrow_marker.pose.orientation.w = arrow_quat[3] + arrow_marker.scale.x = 1.0 + arrow_marker.scale.y = 0.05 + arrow_marker.scale.z = 0.05 + arrow_marker.color.r = color[0] + arrow_marker.color.g = color[1] + arrow_marker.color.b = color[2] + arrow_marker.color.a = 0.8 + markers.append(arrow_marker) + + return markers diff --git a/tensor_detector/src/pointcloud.py b/tensor_detector/src/pointcloud.py new file mode 100644 index 0000000..dbf700c --- /dev/null +++ b/tensor_detector/src/pointcloud.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Point Clouds: back-project feature pixels to 3D, filter outliers, +accumulate, and drain into a PointCloud2 message with per-point RGB.""" +import struct +from enum import Enum + +import cv2 +import numpy as np + +from sensor_msgs.msg import PointCloud2, PointField + +import geometry +from colors import COLOR_MAP + +MAX_POINTS = 10000 +DEFAULT_COLOR = (1.0, 1.0, 1.0) + + +class CloudColorMode(Enum): + CLASS = "class" # Flat color from COLOR_MAP keyed by class name + PIXEL = "pixel" # Sample the RGB image at each feature pixel + + +def _pack_rgb(r, g, b): + """Pack three 0-1 floats into the uint32 that RViz expects for rgb fields.""" + ri = int(np.clip(r * 255, 0, 255)) + gi = int(np.clip(g * 255, 0, 255)) + bi = int(np.clip(b * 255, 0, 255)) + packed = (ri << 16) | (gi << 8) | bi + return struct.unpack('f', struct.pack('I', packed))[0] + + +class PointCloudBuilder: + def __init__(self, min_points, logger, color_mode=CloudColorMode.CLASS): + self.min_points = min_points + self.log = logger + self.color_mode = color_mode + # Each entry is (x, y, z, rgb_packed_float) + self.accumulated_points = [] + + def extract(self, frame, feature_points, mask, class_name=None): + """Back-project masked feature pixels to 3D, filter, accumulate, return. + + Color per point is determined by self.color_mode: + CLASS – flat color from COLOR_MAP for class_name + PIXEL – sampled from frame.image at the source pixel (bgr8 -> rgb) + + Returns the filtered Nx3 array, or None if too few points survive. + """ + class_color = COLOR_MAP.get(class_name, DEFAULT_COLOR) + class_rgb_packed = _pack_rgb(*class_color) + + # Build (x, y, z, rgb) together so outlier removal keeps them in sync + xyzrgb = [] + for x, y in feature_points: + xi = int(x) + yi = int(y) + if yi >= frame.depth.shape[0] or xi >= frame.depth.shape[1]: + continue + if mask[yi, xi] != 255: + continue + z = frame.depth[yi, xi] + if np.isnan(z) or np.isinf(z) or z == 0: + continue + + pt = geometry.pixel_to_3d(xi, yi, z, frame.fx, frame.fy, frame.cx, frame.cy) + + if self.color_mode == CloudColorMode.PIXEL: + b, g, r = frame.image[yi, xi] # frame.image is bgr8 + rgb = _pack_rgb(r / 255.0, g / 255.0, b / 255.0) + else: + rgb = class_rgb_packed + + xyzrgb.append((*pt, rgb)) + + points_3d = np.array([p[:3] for p in xyzrgb]) if xyzrgb else np.array([]) + + if getattr(frame, "want_overlay_points", True): + self._overlay(frame, points_3d, class_color) + + if len(points_3d) == 0: + return None + + # Outlier removal operates on xyz; apply the same index mask to rgb + rgb_arr = np.array([p[3] for p in xyzrgb]) + + # Drop any NaN/infinite points so they don't poison the distance math + finite = np.isfinite(points_3d).all(axis=1) + points_3d = points_3d[finite] + rgb_arr = rgb_arr[finite] + + if len(points_3d) == 0: + return None + + indices = self._outlier_indices(points_3d) + points_3d = points_3d[indices] + rgb_arr = rgb_arr[indices] + + # Only accumulate for the viz cloud when someone actually wants it + want_cloud = getattr(frame, "want_cloud", True) + if want_cloud and len(points_3d) > 0: + self.accumulated_points.extend( + (p[0], p[1], p[2], float(c)) for p, c in zip(points_3d, rgb_arr)) + if len(self.accumulated_points) > MAX_POINTS: + self.accumulated_points = self.accumulated_points[-MAX_POINTS:] + + if len(points_3d) < self.min_points: + return None + + return points_3d + + def take_cloud(self, frame_id, stamp): + """Build and clear the accumulated cloud as PointCloud2 (None if empty).""" + if not self.accumulated_points: + return None + + fields = [ + PointField(name='x', offset=0, datatype=PointField.FLOAT32, count=1), + PointField(name='y', offset=4, datatype=PointField.FLOAT32, count=1), + PointField(name='z', offset=8, datatype=PointField.FLOAT32, count=1), + PointField(name='rgb', offset=12, datatype=PointField.FLOAT32, count=1), + ] + point_step = 16 # 4 floats x 4 bytes + + arr = np.asarray(self.accumulated_points, dtype=np.float32) + data = arr.tobytes() + + cloud = PointCloud2() + cloud.header.frame_id = frame_id + cloud.header.stamp = stamp + cloud.height = 1 + cloud.width = len(self.accumulated_points) + cloud.fields = fields + cloud.is_bigendian = False + cloud.point_step = point_step + cloud.row_step = point_step * cloud.width + cloud.data = data + cloud.is_dense = True + + self.accumulated_points.clear() + return cloud + + # --- internals --- + + def _outlier_indices(self, points_3d): + """Return the surviving index array after both outlier passes.""" + n = len(points_3d) + if n == 0: + return np.array([], dtype=int) + + # radius pass + k = min(10, max(1, int(n * 0.8))) + radius_idx = np.where(geometry.radius_outlier_mask(points_3d, min_neighbors=k))[0] + if len(radius_idx) == 0: + return radius_idx + + # statistical pass on the survivors, then map back through radius_idx + survivors = points_3d[radius_idx] + k2 = min(10, max(1, int(len(survivors) * 0.8))) + stat_mask = geometry.statistical_outlier_mask(survivors, k=k2) + return radius_idx[stat_mask] + + def _overlay(self, frame, points, color): + if len(points) == 0: + return + bgr = (int(color[2] * 255), int(color[1] * 255), int(color[0] * 255)) + for point in points: + try: + if point[2] <= 0 or np.isnan(point[2]) or np.isinf(point[2]): + continue + if np.isnan(point[0]) or np.isinf(point[0]) or np.isnan(point[1]) or np.isinf(point[1]): + continue + x2d = int(point[0] * frame.fx / point[2] + frame.cx) + y2d = int(point[1] * frame.fy / point[2] + frame.cy) + if 0 <= x2d < frame.image.shape[1] and 0 <= y2d < frame.image.shape[0]: + cv2.circle(frame.image, (x2d, y2d), radius=3, color=bgr, thickness=-1) + except (ZeroDivisionError, OverflowError, ValueError): + continue + except Exception as e: + self.log.warning(f"Unexpected error in overlay_points_on_image: {e}") + continue \ No newline at end of file diff --git a/tensor_detector/src/yolo_model.py b/tensor_detector/src/yolo_model.py new file mode 100644 index 0000000..d212c86 --- /dev/null +++ b/tensor_detector/src/yolo_model.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +"""Thin wrapper around the Ultralytics YOLO model""" +import os +from ultralytics import YOLO + + +class YoloModel: + def __init__(self, model_path, export=False): + self.model = None + self._export = export + self._initialize(model_path) + + def _initialize(self, model_path): + # Prefer a prebuilt .engine next to the .pt if one exists. + engine_model_path = os.path.splitext(model_path)[0] + '.engine' + if model_path.endswith(".pt") and os.path.exists(engine_model_path): + model_path = engine_model_path + + self.model = YOLO(model_path, task="segment") + + # Optionally export to TensorRT, then reload the freshly built engine. + if self._export and model_path.endswith(".pt"): + self.model.export(format="engine") + self._initialize(engine_model_path) + + def infer(self, image, conf, iou): + return self.model(image, verbose=False, iou=iou, conf=conf) diff --git a/tensor_detector/src/yolo_orientation.py b/tensor_detector/src/yolo_orientation.py index 953118a..8855d1d 100644 --- a/tensor_detector/src/yolo_orientation.py +++ b/tensor_detector/src/yolo_orientation.py @@ -1,1696 +1,353 @@ #!/usr/bin/env python3 +"""yolo_orientation node. + +ROS node +parameters, publishers/subscribers, services, the camera lifecycle, and the image/depth/camera_info callbacks + +Per frame it builds a Frame bundle and hands it to a DetectionProcessor +The processor returns detections + markers, which this node publishes. +""" +import os +import time import rclpy from rclpy.node import Node -from sensor_msgs.msg import Image, CameraInfo, PointCloud +from sensor_msgs.msg import CompressedImage, CameraInfo, PointCloud2, Image +from visualization_msgs.msg import MarkerArray, Marker +from vision_msgs.msg import Detection3DArray from cv_bridge import CvBridge -import cv2 -from ultralytics import YOLO import numpy as np -from visualization_msgs.msg import Marker, MarkerArray -from vision_msgs.msg import Detection3DArray, Detection3D, ObjectHypothesisWithPose, ObjectHypothesis -from geometry_msgs.msg import Point32 -from scipy.spatial.transform import Rotation as R -from ament_index_python.packages import get_package_share_directory -import time -import os import yaml -import math +from ament_index_python.packages import get_package_share_directory from std_srvs.srv import SetBool from riptide_msgs2.srv import SetString from tf2_ros.transform_listener import TransformListener from tf2_ros import Buffer -from rclpy.time import Time -from tf_transformations import quaternion_from_euler, quaternion_multiply - -class YOLONode(Node): - def __init__(self): - super().__init__('yolo_orientation') - self.declare_parameters( - namespace='', - parameters=[ - ('active_camera', 'ffc'), # Default camera - ('ffc_model', ''), - ('dfc_model', ''), - ('ffc_class_id_map', ''), - ('dfc_class_id_map', ''), - ('ffc_threshold', 0.9), - ('dfc_threshold', 0.9), - ('ffc_iou', 0.9), - ('dfc_iou', 0.9), - ('robot_namespace', 'talos'), - ('torp_top', 'saw'), - ('bin_target', 'bin_saw') - ] - ) - - self.robot_ns = self.get_parameter(f'robot_namespace').get_parameter_value().string_value - - ########################## - # USER DEFINED PARAMS # - ########################## - self.log_processing_time = False - self.use_incoming_timestamp = True - self.export = False # Whether or not to export .pt file to engine - self.print_camera_info = False # Print the camera info recieved - self.class_detect_shrink = 0.15 # Shrink the detection area around the class (% Between 0 and 1, 1 being full shrink) - self.min_points = 5 # Minimum number of points for SVD - self.publish_interval = 0.05 # 100 milliseconds - self.history_size = 10 # Window size for rolling average smoothing - self.default_normal = np.array([0.0, 0.0, 1.0]) # Default normal for quaternion calculation - self.map_min_area = 50 # 130 - - # Color map for classes published to markers - self.color_map = { - 'bin_target': (1.0, 0.0, 0.0), - 'mapping_map': (0.0, 1.0, 0.0), - 'mapping_hole': (0.0, 0.0, 1.0), - 'mapping_largest_hole': (0.0, 0.0, 1.0), - 'mapping_smallest_hole': (1.0, 0.0, 0.0), - 'gate_hot': (1.0, 1.0, 1.0), - 'slalom_close': (1.0, 0.0, 0.0), - 'slalom_middle': (1.0, 1.0, 0.0), - 'slalom_far': (0.0, 1.0, 0.0), - 'torpedo_sawfish_hole': (1.0, 0.0, 0.0), - 'torpedo_shark_hole': (0.0, 1.0, 0.0), - 'gate_saw': (0.0, 0.0, 1.0), - 'gate_shark': (1.0, 0.0, 0.0) - } - - self.create_publishers() - - # CV and bridge init - self.bridge = CvBridge() - - # Init global vars - self.depth_image = None - self.camera_info_gathered = False - self.depth_info_gathered = False - self.gray_image = None - self.mask = None - self.accumulated_points = [] - self.detection_id_counter = 0 - self.centroid_history = {} - self.orientation_history = {} - self.temp_markers = [] - self.last_publish_time = time.time() - self.open_torpedo_centroid = None - self.open_torpedo_quat = None - self.closed_torpedo_centroid = None - self.closed_torpedo_quat = None - self.holes = [] - self.latest_bbox_class_7 = None - self.latest_bbox_class_8 = None - self.detection_timestamp = None - self.detection_time = None - self.mapping_holes = [] - self.mapping_map_centroid = None - self.mapping_map_quat = None - self.largest_hole = None - self.smallest_hole = None - self.latest_buoy = None - self.plane_normal = None - self.slalom_red_detections = [] - self.active_camera = self.get_parameter('active_camera').get_parameter_value().string_value - self.torp_top = self.get_parameter('torp_top').get_parameter_value().string_value - self.slalom_history = [] - self.slalom_history_size = 10 - self.torpedo_type = None # Will be "shark" or "saw" - self.torpedo_holes = [] - self.torpedo_centroid = None - self.torpedo_quat = None - self.torpedo_top_hole = None - self.torpedo_bottom_hole = None - self.slalom_name = 'slalom_front' - self.bin_target = self.get_parameter('bin_target').get_parameter_value().string_value - - # tf stuff - self.tf_buffer = Buffer() - self.tf_listener = TransformListener(self.tf_buffer, self) - - self.create_switch_service() - - self.create_slalom_switch_service() - - # Set up the camera based on the active_camera parameter - self.setup_camera() - - def create_publishers(self): - # Creating publishers - self.marker_array_publisher = self.create_publisher(MarkerArray, 'visualization_marker_array', 10) - self.publisher = self.create_publisher(Image, 'yolo', 10) - self.point_cloud_publisher = self.create_publisher(PointCloud, 'point_cloud', 10) - self.detection_publisher = self.create_publisher(Detection3DArray, 'detected_objects', 10) - - def delayed_setup(self): - try: - self.setup_camera() - finally: - self.camera_switch_in_progress = False - self.delayed_timer.cancel() - - def create_switch_service(self): - # Create the service for camera switching - self.srv = self.create_service(SetBool, 'set_camera_is_dfc', self.switch_camera_callback) - self.get_logger().info("Camera switch service created. Call to toggle between ffc and dfc cameras") - - def create_slalom_switch_service(self): - self.slalom_srv = self.create_service(SetString, 'set_slalom_type', self.switch_slalom_callback) - self.get_logger().info("Slalom switch service created. Call to change name of pubbed slalom det") - - - def shift_toward_blue(self, img_bgr, blue_boost=0.20, rg_reduce=0.05): - out = img_bgr.astype(np.float32) - out[:, :, 0] *= (1.0 + blue_boost) - out[:, :, 1] *= (1.0 - rg_reduce) - out[:, :, 2] *= (1.0 - rg_reduce) - np.clip(out, 0, 255, out) - return out.astype(np.uint8) - - - def switch_camera_callback(self, request, response): - if getattr(self, 'camera_switch_in_progress', False): - response.success = False - response.message = "Camera switch already in progress." - return response - - self.camera_switch_in_progress = True - - new_camera = 'dfc' if request.data else 'ffc' - old_camera = self.active_camera - - if new_camera == old_camera: - response.success = True - response.message = f"Camera already set to {new_camera}, no change needed" - self.camera_switch_in_progress = False - return response - - self.get_logger().info(f"Switching from {self.active_camera} to {new_camera}") - old_camera = self.active_camera - self.active_camera = new_camera - - # Schedule reconfiguration after a short delay (e.g., 0.1 seconds) - self.delayed_timer = self.create_timer(0.1, self.delayed_setup) - - response.success = True - response.message = f"Successfully switched from {old_camera} to {new_camera}" - return response - - def switch_slalom_callback(self, request, response): - self.slalom_name = request.data - - response.success = True - response.message = f"Successfully set slalom type to {self.slalom_name}" - return response - - def setup_camera(self): - self.get_logger().info(f"Active camera: {self.active_camera}") - - # Set the camera prefix - self.camera_prefix = self.active_camera - - # Set frame ID - self.frame_id = f'{self.robot_ns}/{self.camera_prefix}_left_camera_optical_frame' - - # Get camera-specific parameters - yolo_model = self.get_parameter(f'{self.active_camera}_model').get_parameter_value().string_value - class_id_map_str = self.get_parameter(f'{self.active_camera}_class_id_map').get_parameter_value().string_value - self.conf = self.get_parameter(f'{self.active_camera}_threshold').get_parameter_value().double_value - self.iou = self.get_parameter(f'{self.active_camera}_iou').get_parameter_value().double_value - - self.get_logger().info(f"Yolo Model: {yolo_model}") - self.get_logger().info(f"Class id map str: {class_id_map_str}") - self.get_logger().info(f"Confidence Threshold: {self.conf}") - self.get_logger().info(f"IOU: {self.iou}") - - self.load_class_id_map(class_id_map_str) - self.load_model(yolo_model) - self.reset_collection_variables() - self.destroy_subscriptions() - self.create_subscriptions() - - def load_class_id_map(self, class_id_map_str): - # Load class ID map - self.class_id_map = yaml.safe_load(class_id_map_str) if class_id_map_str else {} - - # Add default class ID map if none provided - if not self.class_id_map: - self.get_logger().info(f"No class id map found, defaulting to:") - if self.active_camera == 'ffc': - self.class_id_map = { - 0: 'bin_target', - 1: 'mapping_map', - 2: 'mapping_hole', - 3: 'gate_hot', - 4: 'gate_cold', - 5: 'bin_temperature', - 6: 'bin' - } - else: # dfc - self.class_id_map = { - 0: 'bin_target' - } - else: - self.get_logger().info(f"Class id map found:") - - if self.active_camera == 'ffc': - # Update internal class_id_map - self.class_id_map.update({ - 21: "mapping_largest_hole", - 22: "mapping_smallest_hole" - }) - - self.get_logger().info(f"{self.class_id_map}") - - def reset_collection_variables(self): - # Reset camera-related variables - self.depth_image = None - self.camera_info_gathered = False - self.depth_info_gathered = False - - def destroy_subscriptions(self): - # Unsubscribe from old topics if subscriptions exist - if hasattr(self, 'zed_info_subscription'): - try: - topic = self.zed_info_subscription.topic_name - except Exception: - topic = "unknown" - self.destroy_subscription(self.zed_info_subscription) - self.get_logger().info(f"Destroying camera info subscription: {topic}") - if hasattr(self, 'depth_info_subscription'): - try: - topic = self.depth_info_subscription.topic_name - except Exception: - topic = "unknown" - self.destroy_subscription(self.depth_info_subscription) - self.get_logger().info(f"Destroying depth info subscription: {topic}") - if hasattr(self, 'image_subscription'): - try: - topic = self.image_subscription.topic_name - except Exception: - topic = "unknown" - self.destroy_subscription(self.image_subscription) - self.get_logger().info(f"Destroying image subscription: {topic}") - if hasattr(self, 'depth_subscription'): - try: - topic = self.depth_subscription.topic_name - except Exception: - topic = "unknown" - self.destroy_subscription(self.depth_subscription) - self.get_logger().info(f"Destroying depth subscription: {topic}") - - def create_subscriptions(self): - # Create new subscriptions - self.zed_info_subscription = self.create_subscription( - CameraInfo, - f'/{self.robot_ns}/{self.camera_prefix}/zed_node/left/camera_info', - self.camera_info_callback, - 1 - ) - self.get_logger().info(f"Creating camera info subcription: /{self.robot_ns}/{self.camera_prefix}/zed_node/left/camera_info") - - self.depth_info_subscription = self.create_subscription( - CameraInfo, - f'/{self.robot_ns}/{self.camera_prefix}/zed_node/depth/camera_info', - self.depth_info_callback, - 1 - ) - self.get_logger().info(f"Creating depth info subcription: /{self.robot_ns}/{self.camera_prefix}/zed_node/depth/camera_info") - - self.image_subscription = self.create_subscription( - Image, - f'/{self.robot_ns}/{self.camera_prefix}/zed_node/left/image_rect_color', - self.image_callback, - 10 - ) - self.get_logger().info(f"Creating image subcription: /{self.robot_ns}/{self.camera_prefix}/zed_node/left/image_rect_color") - - self.depth_subscription = self.create_subscription( - Image, - f'/{self.robot_ns}/{self.camera_prefix}/zed_node/depth/depth_registered', - self.depth_callback, - 10 - ) - self.get_logger().info(f"Creating depth subcription: /{self.robot_ns}/{self.camera_prefix}/zed_node/depth/depth_registered") - - def load_model(self, yolo_model): - # Load model - tensorrt_wrapper_dir = get_package_share_directory("tensor_detector") - yolo_model_path = os.path.join(tensorrt_wrapper_dir, 'weights', yolo_model) - self.get_logger().info(f"Loading model path: {yolo_model_path}") - self.initialize_yolo(yolo_model_path) - - - def initialize_yolo(self, yolo_model_path): - # Check if the .engine version of the model exists - engine_model_path = yolo_model_path.replace('.pt', '.engine') - if yolo_model_path.endswith(".pt") and os.path.exists(engine_model_path): - # If the .engine file exists, use it instead - yolo_model_path = engine_model_path - - self.model = YOLO(yolo_model_path, task="segment") - - # Check if the model needs to be exported - if self.export and yolo_model_path.endswith(".pt"): - self.model.export(format="engine") - # Update the model path to use the .engine file - # Note: This will create a new .engine file if it didn't exist before - self.initialize_yolo(engine_model_path) # Recursive call with the new .engine path - - def is_inside_bbox(self, inner_bbox, outer_bbox): - inner_x_min, inner_y_min, inner_x_max, inner_y_max = inner_bbox - outer_x_min, outer_y_min, outer_x_max, outer_y_max = outer_bbox - - return (inner_x_min >= outer_x_min and inner_x_max <= outer_x_max and - inner_y_min >= outer_y_min and inner_y_max <= outer_y_max) - - def has_subscribers(self, publisher): - return publisher.get_subscription_count() > 0 - def camera_info_callback(self, msg): - if not self.camera_info_gathered: - if self.print_camera_info: - self.get_logger().info(f"Camera info: {msg}") - - self.intrinsic_matrix = np.array(msg.k).reshape((3, 3)) - self.fx = msg.k[0] - self.cx = msg.k[2] - self.fy = msg.k[4] - self.cy = msg.k[5] - - self.distortion_matrix = np.array(msg.d) - - self.camera_info_gathered = True - # self.zed_info_subscription.destroy() - - def depth_info_callback(self, msg): - if not self.depth_info_gathered: - self.depth_intrinsic_matrix = np.array(msg.k).reshape((3, 3)) - self.depth_distortion_matrix = np.array(msg.d) - self.depth_info_gathered = True - # self.depth_info_subscription.destroy() - - def depth_callback(self, msg): - self.depth_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') - - def process_slalom_red_detections(self, detections_array): - """ - Process all slalom_red detections to determine closest, middle, and farthest. - If 3 slaloms are found with proper spacing and aspect ratio, report all. - If 1 or 2 are found, report only the closest (no aspect ratio check). - """ - num_detections = len(self.slalom_red_detections) - - # if num_detections == 3: - # # Validate aspect ratios (height/width >= 2.5) - # valid_detections = [] - # for detection_data in self.slalom_red_detections: - # height = detection_data['bbox_height'] - # width = detection_data['bbox_width'] - # aspect_ratio = height / width if width > 0 else 0 - - # if aspect_ratio >= 2.5: - # valid_detections.append(detection_data) - # else: - # # self.get_logger().warn(f"Slalom rejected: aspect ratio {aspect_ratio:.2f} < 2.5") - # return - - # # Check if we still have 3 valid detections after aspect ratio filtering - # if len(valid_detections) != 3: - # # self.get_logger().warn(f"Only {len(valid_detections)}/3 slaloms meet aspect ratio requirement") - # self.slalom_red_detections = [] - # return - - # # Sort by distance (Z coordinate - depth) - # sorted_detections = sorted(valid_detections, key=lambda x: x['centroid'][2]) - - # # Check minimum spacing between consecutive slaloms (at least 1m apart) - # spacing_valid = True - # for i in range(len(sorted_detections) - 1): - # current_pos = sorted_detections[i]['centroid'] - # next_pos = sorted_detections[i + 1]['centroid'] - - # # Calculate 3D distance between centroids - # distance = ((next_pos[0] - current_pos[0])**2 + - # (next_pos[1] - current_pos[1])**2 + - # (next_pos[2] - current_pos[2])**2)**0.5 - - # if distance < 1.0: # 1 meter minimum spacing - # # self.get_logger().warn(f"Slaloms too close: {distance:.2f}m < 1.0m between slalom {i} and {i+1}") - # spacing_valid = False - # break - - # # Only proceed if spacing is valid - # if not spacing_valid: - # # self.get_logger().warn("Slalom spacing validation failed") - # self.slalom_red_detections = [] - # return - - # # All validations passed - proceed with classification - # # self.get_logger().info("3 valid slaloms found with proper spacing and aspect ratio") - - # # Assign new class names based on distance - # class_names = ['slalom_close', 'slalom_middle', 'slalom_far'] - # for i, detection_data in enumerate(sorted_detections): - # # Create new detection with updated class name - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - - # # Create object hypothesis with new class name - # detection.results.append(self.create_object_hypothesis_with_pose( - # class_names[i], - # detection_data['centroid'], - # detection_data['quat'], - # detection_data['conf'] - # )) - - # # Publish marker with new class name - # self.publish_marker( - # detection_data['quat'], - # detection_data['centroid'], - # class_names[i], - # detection_data['bbox_width'], - # detection_data['bbox_height'] - # ) - # detections_array.detections.append(detection) - - # # Clear the list for next frame - # self.slalom_red_detections = [] - - if num_detections > 0: - # Report only the closest one - closest_detection = min(self.slalom_red_detections, key=lambda x: x['centroid'][2]) - # Store closest detection in history (keep last slalom_history_size) - closest_data = { - 'centroid': closest_detection['centroid'], - 'quat': closest_detection['quat'], - 'conf': closest_detection['conf'] - } - self.slalom_history.append(closest_data) - if len(self.slalom_history) > self.slalom_history_size: - self.slalom_history.pop(0) +from detection import DetectionProcessor, ProcessorConfig, Frame +from pointcloud import CloudColorMode +from yolo_model import YoloModel - # Find the closest one from history based on Z coordinate - if self.slalom_history: - closest_in_history = min(self.slalom_history, key=lambda x: x['centroid'][2]) - #self.get_logger().info(f"Closest slalom in history: [{closest_in_history['centroid'][0]:.2f}, {closest_in_history['centroid'][1]:.2f}, {closest_in_history['centroid'][2]:.2f}]") - # self.get_logger().info(f"Frames: parent {parent_frame}, self.frame_id ") - # Always use slalom_parent's yaw for detection - parent_frame = "slalom_parent_frame" - detection_quat = None - try: - # Detections need to be in camera frame - parent_quat_tf = self.tf_buffer.lookup_transform(self.frame_id, parent_frame, Time()).transform.rotation - detection_quat = [ - parent_quat_tf.x, - parent_quat_tf.y, - parent_quat_tf.z, - parent_quat_tf.w - ] - except: - self.get_logger().warning(f'Pubbing slalom detection with rotation since {parent_frame} not found') - detection_quat = closest_in_history['quat'] - # Fun unknown offset for 90 deg - z_to_x_quat = quaternion_from_euler(0.0, -1.57079632679, 0.0) - corrected_quat = quaternion_multiply(detection_quat, z_to_x_quat) - # self.get_logger().info(f'Pubbing slalom with quat {corrected_quat}') - - - # Create detection using closest from history - detection = Detection3D() - detection.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - detection.header.stamp = self.detection_timestamp - else: - detection.header.stamp = self.get_clock().now().to_msg() - - detection.results.append(self.create_object_hypothesis_with_pose( - self.slalom_name, - closest_in_history['centroid'], - corrected_quat, - closest_in_history['conf'] - )) - - self.publish_marker( - corrected_quat, - closest_in_history['centroid'], - self.slalom_name, - closest_detection['bbox_width'], # Use current frame's bbox dimensions - closest_detection['bbox_height'] - ) - detections_array.detections.append(detection) - - # # Repeat publication of the closest slalom to maintain heading through course - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - - # detection.results.append(self.create_object_hypothesis_with_pose( - # 'slalom_front', - # closest_in_history['centroid'], - # closest_in_history['quat'], - # closest_in_history['conf'] - # )) - - # self.publish_marker( - # closest_in_history['quat'], - # closest_in_history['centroid'], - # 'slalom_front', - # closest_detection['bbox_width'], # Use current frame's bbox dimensions - # closest_detection['bbox_height'] - # ) - # detections_array.detections.append(detection) - - self.slalom_red_detections = [] - - else: - # No valid detections - self.slalom_red_detections = [] - - def image_callback(self, msg: Image): - - if self.log_processing_time: - self.detection_time = time.time() - - if self.depth_image is None or not self.camera_info_gathered: - self.get_logger().warning("Skipping image because either no depth image or camera info is available.", throttle_duration_sec=1) - return - - cv_image = self.bridge.imgmsg_to_cv2(msg, "bgr8") - # cv_image = self.shift_toward_blue(cv_image, blue_boost=0.2, rg_reduce=0.05) - self.gray_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY) - if cv_image is None: - return - results = self.model(cv_image, verbose=False, iou=self.iou, conf=self.conf) - - detections = Detection3DArray() - detections.header.frame_id = self.frame_id - self.detection_timestamp = msg.header.stamp - if self.use_incoming_timestamp: - detections.header.stamp = msg.header.stamp - else: - detections.header.stamp = self.get_clock().now().to_msg() - - if self.mask is None or self.mask.shape[:2] != cv_image.shape[:2]: - self.mask = np.zeros(cv_image.shape[:2], dtype=np.uint8) - - # Reset mapping holes each image - self.mapping_holes = [] - self.torpedo_holes = [] - self.slalom_red_detections = [] - self.torpedo_top_hole = None - self.torpedo_bottom_hole = None - self.largest_hole = None - self.smallest_hole = None - - for result in results: - for box in result.boxes.cpu().numpy(): - if box.conf[0] <= self.conf: - continue - class_id = box.cls[0] - - if class_id in self.class_id_map: - conf = box.conf[0] - #self.get_logger().info(f"class id: {class_id}") - # If its a hole, store it, otherwise make the detection message - if self.class_id_map[class_id] == "mapping_hole": - - #self.get_logger().info(f"class id: {class_id}") - x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) - if self.use_incoming_timestamp: - self.holes.append(((x_min, y_min, x_max, y_max), self.detection_timestamp)) - else: - self.holes.append(((x_min, y_min, x_max, y_max), self.get_clock().now().to_msg())) - self.mapping_holes.append(box) - #self.get_logger().info(f"Holes after adding: {len(self.holes)}") - #self.get_logger().info(f"holes: {len(self.mapping_holes)}") - if self.class_id_map[class_id] == "torpedo_hole": - - #self.get_logger().info(f"class id: {class_id}") - x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) - if self.use_incoming_timestamp: - self.holes.append(((x_min, y_min, x_max, y_max), self.detection_timestamp)) - else: - self.holes.append(((x_min, y_min, x_max, y_max), self.get_clock().now().to_msg())) - self.torpedo_holes.append(box) - #self.get_logger().info(f"Holes after adding: {len(self.holes)}") - #self.get_logger().info(f"holes: {len(self.mapping_holes)}") - elif class_id in self.class_id_map and self.class_id_map[class_id] == "slalom_red": - # Don't create detection immediately, store for later processing - detection_temp = self.create_detection3d_message(box, cv_image, conf) - if detection_temp and detection_temp.results: - # Extract the centroid and quaternion from the detection - result_temp= detection_temp.results[0] - centroid = [ - result_temp.pose.pose.position.x, - result_temp.pose.pose.position.y, - result_temp.pose.pose.position.z - ] - quat = [ - result_temp.pose.pose.orientation.x, - result_temp.pose.pose.orientation.y, - result_temp.pose.pose.orientation.z, - result_temp.pose.pose.orientation.w - ] - - # Store detection data for sorting - x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) - bbox_width = x_max - x_min - bbox_height = y_max - y_min - - self.slalom_red_detections.append({ - 'centroid': centroid, - 'quat': quat, - 'conf': box.conf[0], - 'bbox_width': bbox_width, - 'bbox_height': bbox_height - }) - else: - detection = self.create_detection3d_message(box, cv_image, conf) - - if detection: - detections.detections.append(detection) - - self.mask.fill(0) - for contour in result.masks.xy: - contour = np.array(contour, dtype=np.int32) - cv2.fillPoly(self.mask, [contour], 255) - mask_msg = self.bridge.cv2_to_imgmsg(self.mask,encoding="mono8") - #self.mask_publisher.publish(mask_msg) - - - # Create detection3d for the holes if there are 4 - if len(self.mapping_holes) == 4: - #self.get_logger().info(f"holes: {len(self.mapping_holes)}") - self.find_smallest_and_largest_holes() - - if self.smallest_hole is not None: - class_id = self.smallest_hole.cls[0] - conf = self.smallest_hole.conf[0] - detection = self.create_detection3d_message(self.smallest_hole, cv_image, conf, "smallest") - if detection: - detections.detections.append(detection) - else: - self.get_logger().warning("No smallest hole found.") - - if self.largest_hole is not None: - class_id = self.largest_hole.cls[0] - conf = self.largest_hole.conf[0] - detection = self.create_detection3d_message(self.largest_hole, cv_image, conf, "largest") - if detection: - detections.detections.append(detection) - else: - self.get_logger().warning("No largest hole found.") - - if len(self.torpedo_holes) == 2: - #self.get_logger().info(f"holes: {len(self.mapping_holes)}") - self.find_top_and_bottom_holes() - - - if self.torpedo_top_hole is not None: - class_id = self.torpedo_top_hole.cls[0] - conf = self.torpedo_top_hole.conf[0] - detection = self.create_detection3d_message(self.torpedo_top_hole, cv_image, conf, "smallest") - if detection: - detections.detections.append(detection) - else: - self.get_logger().warning("No top hole found.") - - if self.torpedo_bottom_hole is not None: - class_id = self.torpedo_bottom_hole.cls[0] - conf = self.torpedo_bottom_hole.conf[0] - detection = self.create_detection3d_message(self.torpedo_bottom_hole, cv_image, conf, "largest") - if detection: - detections.detections.append(detection) - else: - self.get_logger().warning("No bottom hole found.") - - self.publish_markers(self.temp_markers) - self.temp_markers = [] # Clear the list for the next frame - - annotated_frame = results[0].plot() - - self.publish_accumulated_point_cloud() - - if self.has_subscribers(self.publisher): - annotated_msg = self.bridge.cv2_to_imgmsg(annotated_frame, encoding="bgr8") - self.publisher.publish(annotated_msg) - - self.process_slalom_red_detections(detections) - - # if not self.torpedo_seen and len(self.holes) > 1 and self.torpedo_centroid is not None and self.torpedo_quat is not None: - # detections.detections.append(self.spoof_torpedo()) - self.torpedo_seen = False - self.cleanup_old_holes(age_threshold=2.0) - if self.log_processing_time: - self.detection_time = time.time() - self.detection_time - self.get_logger().info(f"Total time (ms): {self.detection_time * 1000}") - self.get_logger().info(f"FPS: {1/self.detection_time}") - # self.get_logger().info(f"detections: {detections}") - if self.has_subscribers(self.detection_publisher): - self.detection_publisher.publish(detections) - self.detection_id_counter = 0 - - def get_hole_size(self, hole): - x_min, y_min, x_max, y_max = map(int, hole.xyxy[0]) - hole_width = x_max - x_min - hole_height = y_max - y_min - hole_size = hole_height*hole_width - return hole_size - - # def find_smallest_and_largest_holes(self): - # hole_sizes = [] - # for hole in self.mapping_holes: - # hole_size = self.get_hole_size(hole) - - # if self.largest_hole is None: - # self.largest_hole = hole - # else: - # largest_hole_size = self.get_hole_size(self.largest_hole) - # if hole_size > largest_hole_size: - # self.largest_hole = hole - - # if self.smallest_hole is None: - # self.smallest_hole = hole - # else: - # smallest_hole_size = self.get_hole_size(self.smallest_hole) - # if hole_size < smallest_hole_size: - # self.smallest_hole = hole - - def find_top_and_bottom_holes(self): - if self.plane_normal is None: - self.get_logger().warning("Plane normal not defined. Cannot compute hole positions.") - return - if self.torpedo_centroid is None: - self.get_logger().warning("Centroid not defined. Cannot compute hole positions.") - return - - hole_positions = [] - for hole in self.torpedo_holes: - x_min, y_min, x_max, y_max = map(int, hole.xyxy[0]) - bbox_center_x = (x_min + x_max) / 2 - bbox_center_y = (y_min + y_max) / 2 - - # Calculate 3D position using plane intersection - d = np.linalg.inv(self.intrinsic_matrix) @ np.array([bbox_center_x, bbox_center_y, 1.0]) - d = d / np.linalg.norm(d) - - n = self.plane_normal - p0 = self.torpedo_centroid - - numerator = np.dot(n, p0) - denominator = np.dot(n, d) - if denominator == 0: - self.get_logger().warning(f"Denominator zero for hole center ({bbox_center_x}, {bbox_center_y}). Skipping this hole.") - continue - t = numerator / denominator - if t <= 0: - self.get_logger().warning(f"Intersection behind the camera for hole center ({bbox_center_x}, {bbox_center_y}). Skipping this hole.") - continue - point_3d = t * d - - # Store hole with its 3D position and Y coordinate for sorting - hole_positions.append((hole, point_3d, point_3d[1])) - #self.get_logger().info(f"Hole position computed: Y={point_3d[1]}") - - if not hole_positions: - self.get_logger().warning("No valid hole positions computed.") - return - - # Sort holes based on Y coordinate (height) - hole_positions.sort(key=lambda x: x[2]) - - self.torpedo_bottom_hole = hole_positions[-1][0] # Lower Y = bottom - self.torpedo_top_hole = hole_positions[0][0] # Higher Y = top - - #self.get_logger().info(f"Bottom hole Y: {hole_positions[0][2]}") - #self.get_logger().info(f"Top hole Y: {hole_positions[-1][2]}") - - def find_smallest_and_largest_holes(self): - if self.plane_normal is None or self.mapping_map_centroid is None: - self.get_logger().warning("Plane normal or centroid not defined. Cannot compute hole sizes.") - return - - hole_sizes = [] - for hole in self.mapping_holes: - x_min, y_min, x_max, y_max = map(int, hole.xyxy[0]) - corners_2d = [ - (x_min, y_min), - (x_max, y_min), - (x_max, y_max), - (x_min, y_max) - ] - corners_3d = [] - for (u, v) in corners_2d: - d = np.linalg.inv(self.intrinsic_matrix) @ np.array([u, v, 1.0]) - n = self.plane_normal - p0 = self.mapping_map_centroid - - numerator = np.dot(n, p0) - denominator = np.dot(n, d) - if denominator == 0: - self.get_logger().warning(f"Denominator zero for point ({u}, {v}). Skipping this corner.") - continue - t = numerator / denominator - if t <= 0: - self.get_logger().warning(f"Intersection behind the camera for point ({u}, {v}). Skipping this corner.") - continue - point_3d = t * d - corners_3d.append(point_3d) - - if len(corners_3d) == 4: - width_vector = corners_3d[1] - corners_3d[0] - height_vector = corners_3d[3] - corners_3d[0] - width = np.linalg.norm(width_vector) - height = np.linalg.norm(height_vector) - hole_size = width * height - hole_sizes.append((hole, hole_size)) - #self.get_logger().info(f"Hole size computed: {hole_size}") - else: - self.get_logger().warning(f"Not enough valid corners for hole. Expected 4, got {len(corners_3d)}") - continue - - if not hole_sizes: - self.get_logger().warning("No valid hole sizes computed.") - return - - # Sort holes based on hole_size - hole_sizes.sort(key=lambda x: x[1]) - - self.smallest_hole = hole_sizes[0][0] - self.largest_hole = hole_sizes[-1][0] - - #self.get_logger().info(f"Smallest hole size: {hole_sizes[0][1]}") - #self.get_logger().info(f"Largest hole size: {hole_sizes[-1][1]}") - - def cleanup_old_holes(self, age_threshold=2.0): - # Get the current time as a builtin_interfaces.msg.Time object - current_time = self.get_clock().now().to_msg() - - # for bbox, timestamp in self.holes: - # self.get_logger().info(f"Time for cleanup: {((current_time.sec - timestamp.sec) + (current_time.nanosec - timestamp.nanosec) * 1e-9)}") - # Filter out holes older than the specified age threshold - self.holes = [(bbox, timestamp) for bbox, timestamp in self.holes - if ((current_time.sec - timestamp.sec) + (current_time.nanosec - timestamp.nanosec) * 1e-9) < age_threshold] - - def generate_unique_detection_id(self): - # Increment and return the counter to get a unique ID for each detection - self.detection_id_counter += 1 - return self.detection_id_counter - - def create_detection3d_message(self, box, cv_image, conf, hole_scale=None): - x_min, y_min, x_max, y_max = map(int, box.xyxy[0]) - bbox = (x_min, y_min, x_max, y_max) - bbox_width = x_max - x_min - bbox_height = y_max - y_min - - class_id = int(box.cls[0]) - - bbox_center_x = (x_min + x_max) / 2 - bbox_center_y = (y_min + y_max) / 2 - - class_name = self.class_id_map.get(class_id, "Unknown") - #self.get_logger().info(f"class name: {class_name}") - if class_name == "mapping_map": - - map_width = x_max - x_min - map_height = y_max - y_min - map_area = max(map_width,map_height) - #self.get_logger().info(f"map max: {map_area}") - if map_area < self.map_min_area: - self.get_logger().info(f"Not Publishing: map area {map_area} < {self.map_min_area}") - return None - self.get_logger().info(f"Publishing: map area {map_area} >= {self.map_min_area}") - #self.get_logger().info(f"publishing map") - self.latest_bbox_class_1 = (x_min, y_min, x_max, y_max) - elif class_name in ["torpedo_saw_top", "torpedo_shark_top"]: - - torpedo_width = x_max - x_min - torpedo_height = y_max - y_min - torpedo_area = max(torpedo_width,torpedo_height) - #self.get_logger().info(f"map max: {map_area}") - if torpedo_area < self.map_min_area: - self.get_logger().info(f"Not Publishing: map area {torpedo_area} < {self.map_min_area}") - return None - self.get_logger().info(f"Publishing: map area {torpedo_area} >= {self.map_min_area}") - #self.get_logger().info(f"publishing map") - self.latest_bbox_class_1 = (x_min, y_min, x_max, y_max) - # Replace the torpedo_hole detection logic in create_detection3d_message: - - elif class_name == "torpedo_hole": - if self.torpedo_centroid is not None and self.torpedo_quat is not None and self.latest_bbox_class_1 and self.is_inside_bbox(bbox, self.latest_bbox_class_1): - if self.plane_normal is None: - return None - d = np.linalg.inv(self.intrinsic_matrix) @ np.array([bbox_center_x, bbox_center_y, 1.0]) - d = d / np.linalg.norm(d) - - # Plane normal and point - n = self.plane_normal # From SVD - p0 = self.torpedo_centroid # Centroid of the plane - - # Compute t - numerator = np.dot(n, p0) - denominator = np.dot(n, d) - if denominator == 0: - return None # Avoid division by zero - - t = numerator / denominator - hole_position = t * d - - # Update centroid and orientation - hole_centroid = hole_position - hole_quat = self.torpedo_quat - - # Determine hole class name based on torpedo type and hole scale - if self.torp_top is None: - return None - self.get_logger().info(f"torpedo top: {self.torp_top}") - self.get_logger().info(f"hole scale: {hole_scale}") - if hole_scale == "smallest": # Top hole lol - if self.torp_top == "shark": - class_name = "torpedo_shark_hole" # Shark top -> shark hole - else: # torpedo_type == "saw" - class_name = "torpedo_sawfish_hole" # Saw top -> saw hole - elif hole_scale == "largest": # Bottom hole robosub moment the spaghet is 🤌 - if self.torp_top == "shark": - class_name = "torpedo_sawfish_hole" # Shark top -> saw hole (bottom) - else: # torpedo_type == "saw" - class_name = "torpedo_shark_hole" # Saw top -> shark hole (bottom) - else: - return None - - self.publish_marker(hole_quat, hole_centroid, class_name, bbox_width, bbox_height) - - # Create Detection3D message - detection = Detection3D() - detection.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - detection.header.stamp = self.detection_timestamp - else: - detection.header.stamp = self.get_clock().now().to_msg() - detection.results.append(self.create_object_hypothesis_with_pose(class_name, hole_centroid, hole_quat, conf)) - return detection - elif class_name == "mapping_hole": - if self.mapping_map_centroid is not None and self.mapping_map_quat is not None and self.latest_bbox_class_1 and self.is_inside_bbox(bbox, self.latest_bbox_class_1): - #hole_quat = self.mapping_map_quat - #hole_centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, self.mapping_map_centroid[2]) - - # if self.mapping_map_centroid[2] > 5: - # return None - if self.plane_normal is None: - return None - d = np.linalg.inv(self.intrinsic_matrix) @ np.array([bbox_center_x, bbox_center_y, 1.0]) - d = d / np.linalg.norm(d) - - # Plane normal and point - n = self.plane_normal # From SVD - p0 = self.mapping_map_centroid # Centroid of the plane - - # Compute t - numerator = np.dot(n, p0) - denominator = np.dot(n, d) - if denominator == 0: - return None # Avoid division by zero - - t = numerator / denominator - hole_position = t * d - - # Update centroid and orientation - hole_centroid = hole_position - hole_quat = self.mapping_map_quat - - if hole_scale == "smallest": - class_name = "torpedo_shark_hole" - elif hole_scale == "largest": - class_name = "torpedo_sawfish_hole" - else: - return None - - self.publish_marker(hole_quat, hole_centroid, class_name, bbox_width, bbox_height) - - # Create Detection3D message - detection = Detection3D() - detection.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - detection.header.stamp = self.detection_timestamp - else: - detection.header.stamp = self.get_clock().now().to_msg() - detection.results.append(self.create_object_hypothesis_with_pose(class_name, hole_centroid, hole_quat, conf)) - return detection - - # elif class_name == "torpedo_open": - # self.latest_bbox_class_7 = (x_min, y_min, x_max, y_max) - # elif class_name == "torpedo_closed": - # self.latest_bbox_class_8 = (x_min, y_min, x_max, y_max) - # elif class_name == "torpedo_hole": - # if self.open_torpedo_centroid is not None and self.open_torpedo_quat is not None and self.latest_bbox_class_7 and self.is_inside_bbox(bbox, self.latest_bbox_class_7): - # class_name = "torpedo_open_hole" - # hole_quat = self.open_torpedo_quat - # hole_centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, self.open_torpedo_centroid[2]) - # elif self.closed_torpedo_centroid is not None and self.closed_torpedo_quat is not None and self.latest_bbox_class_8 and self.is_inside_bbox(bbox, self.latest_bbox_class_8): - # class_name = "torpedo_closed_hole" - # hole_quat = self.closed_torpedo_quat - # hole_centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, self.closed_torpedo_centroid[2]) - # else: - # return None - - # if self.use_incoming_timestamp: - # self.holes.append(((x_min, y_min, x_max, y_max), self.detection_timestamp)) - # else: - # self.holes.append(((x_min, y_min, x_max, y_max), self.get_clock().now().to_msg())) - - - # self.publish_marker(hole_quat, hole_centroid, class_name, bbox_width, bbox_height) - - # # Create Detection3D message - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - # detection.results.append(self.create_object_hypothesis_with_pose(class_name, hole_centroid, hole_quat, conf)) - # return detection - - if class_name == "slalom_red": - shrink_x = (x_max - x_min) * 0.3 - shrink_y = (y_max - y_min) * self.class_detect_shrink - else: - # Calculate the shrink size based on the class_detect_shrink percentage - shrink_x = (x_max - x_min) * self.class_detect_shrink - shrink_y = (y_max - y_min) * self.class_detect_shrink - - # Adjust the bounding box coordinates to exclude the edges - x_min = int(x_min + shrink_x) - x_max = int(x_max - shrink_x) - y_min = int(y_min + shrink_y) - y_max = int(y_max - shrink_y) - - # Extract the region of interest based on the bounding box - mask_roi = self.mask[y_min:y_max, x_min:x_max] - cropped_gray_image = self.gray_image[y_min:y_max, x_min:x_max] - masked_gray_image = cv2.bitwise_and(cropped_gray_image, cropped_gray_image, mask=mask_roi) - - if class_name == "mapping_map": - # Prepare the ROI mask, excluding the holes - mask_roi = self.mask[y_min:y_max, x_min:x_max].copy() # Work on a copy to avoid modifying the original - - # Dynamic padding calculation based on bounding box size - padding_x = int((x_max - x_min) * 0.1) # 10% of the bounding box width - padding_y = int((y_max - y_min) * 0.1) # 10% of the bounding box height - #self.get_logger().info(f"holes for exclusion count: {len(self.holes)}") - - self.get_logger().info(f"Holes: {len(self.holes)}") - for hole_bbox, _ in self.holes: - hole_x_min, hole_y_min, hole_x_max, hole_y_max = hole_bbox - adjusted_hole_x_min = max(hole_x_min - x_min - padding_x, 0) - adjusted_hole_y_min = max(hole_y_min - y_min - padding_y, 0) - adjusted_hole_x_max = min(hole_x_max - x_min + padding_x, mask_roi.shape[1]) - adjusted_hole_y_max = min(hole_y_max - y_min + padding_y, mask_roi.shape[0]) - - # Set the hole region in mask_roi to 0 to exclude it from feature detection - mask_roi[adjusted_hole_y_min:adjusted_hole_y_max, adjusted_hole_x_min:adjusted_hole_x_max] = 0 - - # Apply morphological operations to refine the exclusion zones - kernel = np.ones((5, 5), np.uint8) - mask_roi = cv2.dilate(mask_roi, kernel, iterations=1) - mask_roi = cv2.erode(mask_roi, kernel, iterations=1) - - # Continue with feature detection using the adjusted mask_roi - masked_gray_image = cv2.bitwise_and(cropped_gray_image, cropped_gray_image, mask=mask_roi) - - if class_name in ["torpedo_shark_top", "torpedo_saw_top"]: - # Prepare the ROI mask, excluding the holes - mask_roi = self.mask[y_min:y_max, x_min:x_max].copy() # Work on a copy to avoid modifying the original - - # Dynamic padding calculation based on bounding box size - padding_x = int((x_max - x_min) * 0.1) # 10% of the bounding box width - padding_y = int((y_max - y_min) * 0.1) # 10% of the bounding box height - #self.get_logger().info(f"holes for exclusion count: {len(self.holes)}") - - self.get_logger().info(f"Holes: {len(self.holes)}") - for hole_bbox, _ in self.holes: - hole_x_min, hole_y_min, hole_x_max, hole_y_max = hole_bbox - adjusted_hole_x_min = max(hole_x_min - x_min - padding_x, 0) - adjusted_hole_y_min = max(hole_y_min - y_min - padding_y, 0) - adjusted_hole_x_max = min(hole_x_max - x_min + padding_x, mask_roi.shape[1]) - adjusted_hole_y_max = min(hole_y_max - y_min + padding_y, mask_roi.shape[0]) - - # Set the hole region in mask_roi to 0 to exclude it from feature detection - mask_roi[adjusted_hole_y_min:adjusted_hole_y_max, adjusted_hole_x_min:adjusted_hole_x_max] = 0 - - # Apply morphological operations to refine the exclusion zones - kernel = np.ones((5, 5), np.uint8) - mask_roi = cv2.dilate(mask_roi, kernel, iterations=1) - mask_roi = cv2.erode(mask_roi, kernel, iterations=1) - - # Continue with feature detection using the adjusted mask_roi - masked_gray_image = cv2.bitwise_and(cropped_gray_image, cropped_gray_image, mask=mask_roi) - - # elif class_name == "slalom_close": - # depth_value = self.depth_image[int(bbox_center_y), int(bbox_center_x)] - # centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, float(depth_value)) - # quat, _ = self.calculate_quaternion_and_euler_angles(-self.default_normal) - - # self.publish_marker(quat, centroid, class_name, bbox_width, bbox_height) - - # # Create Detection3D message - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - - # # Set the pose - # class_name = "slalom_close" - # detection.results.append(self.create_object_hypothesis_with_pose(class_name, centroid, quat, conf)) - - # return detection - - - - # elif class_name == "slalom_red": - # depth_value = self.depth_image[int(bbox_center_y), int(bbox_center_x)] - # centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, float(depth_value)) - # quat, _ = self.calculate_quaternion_and_euler_angles(-self.default_normal) - - # self.publish_marker(quat, centroid, class_name, bbox_width, bbox_height) - - - # # Create Detection3D message - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - - # # Set the pose - # class_name = "slalom_close" - # detection.results.append(self.create_object_hypothesis_with_pose(class_name, centroid, quat, conf)) - - # return detection - - elif class_name == "slalom_red": - # Sample the depth value at the center of the bounding box - depth_value = self.depth_image[int(bbox_center_y), int(bbox_center_x)] - # self.get_logger().info(f"bbox_center_x: {bbox_center_x}") - # self.get_logger().info(f"bbox_center_y: {bbox_center_y}") - # self.get_logger().info(f"depth: {depth_value}") - if np.isnan(depth_value) or math.isinf(bbox_center_x) or math.isinf(bbox_center_y) or math.isinf(depth_value): - #self.get_logger().info("rejecting slalom_red") - return None - centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, float(depth_value)) - quat, _ = self.calculate_quaternion_and_euler_angles(-self.default_normal) - # self.publish_marker(quat, centroid, class_name, bbox_width, bbox_height) - - # Create Detection3D message - detection = Detection3D() - detection.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - detection.header.stamp = self.detection_timestamp - else: - detection.header.stamp = self.get_clock().now().to_msg() - - # Set the pose - #class_name = "bin_target" - detection.results.append(self.create_object_hypothesis_with_pose(class_name, centroid, quat, conf)) - - return detection - elif class_name in ["torpedo_open", "torpedo_closed"]: - # Prepare the ROI mask, excluding the holes - mask_roi = self.mask[y_min:y_max, x_min:x_max].copy() # Work on a copy to avoid modifying the original - - # Padding for exclusion zone - padding = 10 - self.get_logger().info(f"Holes: {len(self.holes)}") - for hole_bbox, _ in self.holes: - # For simplicity, let's assume hole_bbox is a tuple of (hole_x_min, hole_y_min, hole_x_max, hole_y_max) - # You might need to adjust the coordinates based on the ROI's position - hole_x_min, hole_y_min, hole_x_max, hole_y_max = hole_bbox - adjusted_hole_x_min = max(hole_x_min - x_min - padding, 0) - adjusted_hole_y_min = max(hole_y_min - y_min - padding, 0) - adjusted_hole_x_max = min(hole_x_max - x_min + padding, mask_roi.shape[1]) - adjusted_hole_y_max = min(hole_y_max - y_min + padding, mask_roi.shape[0]) - - # Set the hole region in mask_roi to 0 to exclude it from feature detection - mask_roi[adjusted_hole_y_min:adjusted_hole_y_max, adjusted_hole_x_min:adjusted_hole_x_max] = 0 - - # Continue with feature detection using the adjusted mask_roi - masked_gray_image = cv2.bitwise_and(cropped_gray_image, cropped_gray_image, mask=mask_roi) - - if class_name == "Bin": - class_name = "bin_target" - - #self.get_logger().info(f"class det3d: {class_name}") - # Detect features within the object's bounding box - good_features = cv2.goodFeaturesToTrack(masked_gray_image, maxCorners=0, qualityLevel=0.02, minDistance=1) - - if good_features is not None: - #self.get_logger().info(f"good features: {class_name}") - good_features[:, 0, 0] += x_min # Adjust X coordinates - good_features[:, 0, 1] += y_min # Adjust Y coordinates - - # Convert features to a list of (x, y) points - feature_points = [pt[0] for pt in good_features] +class YOLONode(Node): + def __init__(self): + super().__init__('yolo_orientation') + self.declare_parameters( + namespace='', + parameters=[ + ### YAML Params + ('active_camera', 'ffc'), + ('ffc_model', ''), + ('dfc_model', ''), + ('ffc_class_id_map', ''), + ('dfc_class_id_map', ''), + ('ffc_threshold', 0.9), + ('dfc_threshold', 0.9), + ('ffc_iou', 0.9), + ('dfc_iou', 0.9), + ('robot_namespace', 'talos'), + + ### Tuning (can be in YAML, but don't need to be) + ('class_detect_shrink', 0.0), # Shrinks the mask around the class + ('min_points', 5), # Minimum points required for SVD + ('publish_interval', 0.1), # For visualization markers (also drives lifetime) + ('marker_lifetime', 5.0), # Marker lifetime in seconds (0 = persist until replaced/deleted) + ('slalom_history_size', 10), # The closest slalom from history is published + ('use_incoming_timestamp', True), # Timestamp for detection comes from image callback msg + ('log_processing_time', False), + ('export', True), # Export model + ('print_camera_info', False), + ('torpedo_task_camera', 'ffc'), # Determines which camera will do weird stuff with blood/fire for now (should only be ffc) + ('grid_step', 8), # Pixel spacing for the surface grid sample (smaller = denser) + ('max_sample_points', 500), # Cap points per patch fed to SVD/cloud (0 = uncapped) + ('cloud_color_mode', 'class'), # Point cloud coloring: 'class' (flat COLOR_MAP color) or 'pixel' (sampled from image) + ('publish_box_markers', False), + ] + ) + + self.robot_ns = self.get_parameter('robot_namespace').get_parameter_value().string_value + self.active_camera = self.get_parameter('active_camera').get_parameter_value().string_value + + # Node-level tunables + self.log_processing_time = self.get_parameter('log_processing_time').get_parameter_value().bool_value + self.use_incoming_timestamp = self.get_parameter('use_incoming_timestamp').get_parameter_value().bool_value + self.export = self.get_parameter('export').get_parameter_value().bool_value + self.print_camera_info = self.get_parameter('print_camera_info').get_parameter_value().bool_value + self.publish_interval = self.get_parameter('publish_interval').get_parameter_value().double_value + self.torpedo_task_camera = self.get_parameter('torpedo_task_camera').get_parameter_value().string_value + + self.create_publishers() + + self.bridge = CvBridge() + self.depth_image = None + self.camera_info_gathered = False + + # tf + self.tf_buffer = Buffer() + self.tf_listener = TransformListener(self.tf_buffer, self) + + # The detection logic class + self.detector = DetectionProcessor( + config=self._build_processor_config(), + logger=self.get_logger(), + now_stamp=lambda: self.get_clock().now().to_msg(), + tf_buffer=self.tf_buffer, + ) + + self.create_switch_service() + self.create_slalom_switch_service() + + self.setup_camera() + + def _build_processor_config(self): + mode_str = self.get_parameter('cloud_color_mode').get_parameter_value().string_value + try: + color_mode = CloudColorMode(mode_str) + except ValueError: + self.get_logger().warning( + f"Unknown cloud_color_mode '{mode_str}', falling back to 'class'") + color_mode = CloudColorMode.CLASS + return ProcessorConfig( + class_detect_shrink=self.get_parameter('class_detect_shrink').get_parameter_value().double_value, + min_points=self.get_parameter('min_points').get_parameter_value().integer_value, + slalom_history_size=self.get_parameter('slalom_history_size').get_parameter_value().integer_value, + use_incoming_timestamp=self.use_incoming_timestamp, + publish_interval=self.publish_interval, + marker_lifetime=self.get_parameter('marker_lifetime').get_parameter_value().double_value, + grid_step=self.get_parameter('grid_step').get_parameter_value().integer_value, + max_sample_points=self.get_parameter('max_sample_points').get_parameter_value().integer_value, + cloud_color_mode=color_mode, + publish_box_markers=self.get_parameter('publish_box_markers').get_parameter_value().bool_value, + ) + + def create_publishers(self): + self.marker_array_publisher = self.create_publisher(MarkerArray, '~/visualization_marker_array', 10) + self.annotated_image_publisher = self.create_publisher(CompressedImage, '~/annotated/compressed', 10) + self.point_cloud_publisher = self.create_publisher(PointCloud2, '~/point_cloud', 10) + self.detection_publisher = self.create_publisher(Detection3DArray, 'detected_objects', 10) + + ### Services + def create_switch_service(self): + self.srv = self.create_service(SetBool, 'set_camera_is_dfc', self.switch_camera_callback) + self.get_logger().info("Camera switch service created. Call to toggle between ffc and dfc cameras") + + def create_slalom_switch_service(self): + self.slalom_srv = self.create_service(SetString, 'set_slalom_type', self.switch_slalom_callback) + self.get_logger().info("Slalom switch service created. Call to change name of pubbed slalom det") + + def switch_camera_callback(self, request, response): + if getattr(self, 'camera_switch_in_progress', False): + response.success = False + response.message = "Camera switch already in progress." + return response + + self.camera_switch_in_progress = True + + new_camera = 'dfc' if request.data else 'ffc' + old_camera = self.active_camera + + if new_camera == old_camera: + response.success = True + response.message = f"Camera already set to {new_camera}, no change needed" + self.camera_switch_in_progress = False + return response + + self.get_logger().info(f"Switching from {self.active_camera} to {new_camera}") + old_camera = self.active_camera + self.active_camera = new_camera + + # Reconfigure after a short delay. + self.delayed_timer = self.create_timer(0.1, self.delayed_setup) + + response.success = True + response.message = f"Successfully switched from {old_camera} to {new_camera}" + return response + + def switch_slalom_callback(self, request, response): + self.detector.set_slalom_name(request.data) + response.success = True + response.message = f"Successfully set slalom type to {request.data}" + return response + + def delayed_setup(self): + try: + self.setup_camera() + finally: + self.camera_switch_in_progress = False + self.delayed_timer.cancel() + + ### Cammera lifecycle + def setup_camera(self): + self.get_logger().info(f"Active camera: {self.active_camera}") + self.camera_prefix = self.active_camera + self.frame_id = f'{self.robot_ns}/{self.camera_prefix}_left_camera_optical_frame' + + yolo_model = self.get_parameter(f'{self.active_camera}_model').get_parameter_value().string_value + class_id_map_str = self.get_parameter(f'{self.active_camera}_class_id_map').get_parameter_value().string_value + self.conf = self.get_parameter(f'{self.active_camera}_threshold').get_parameter_value().double_value + self.iou = self.get_parameter(f'{self.active_camera}_iou').get_parameter_value().double_value + + self.get_logger().info(f"Yolo Model: {yolo_model}") + self.get_logger().info(f"Class id map str: {class_id_map_str}") + self.get_logger().info(f"Confidence Threshold: {self.conf}") + self.get_logger().info(f"IOU: {self.iou}") + + self.load_class_id_map(class_id_map_str) + + # Task profile: torpedo (fire/blood) logic runs only on the chosen camera (ffc really, but generic here cause why not) + # On the other camera those classes fall through to the standard path (so bins works like normal) + self.detector.set_torpedo_enabled(self.active_camera == self.torpedo_task_camera) + + weights_dir = os.path.join(get_package_share_directory("tensor_detector"), 'weights') + model_path = os.path.join(weights_dir, yolo_model) + self.get_logger().info(f"Loading model path: {model_path}") + self.model = YoloModel(model_path=model_path, export=self.export) + + self.reset_collection_variables() + self.destroy_subscriptions() + self.create_subscriptions() + + def load_class_id_map(self, class_id_map_str): + # YAML is the single source of truth for the class map + self.class_id_map = yaml.safe_load(class_id_map_str) if class_id_map_str else {} + if not self.class_id_map: + # Not crashing out here because there's if the other camera config is empty after switch the node would crash + self.get_logger().warning("No class_id_map provided in params; no detections will be produced.") + + self.get_logger().info(f"Class id map: {self.class_id_map}") + + def reset_collection_variables(self): + self.depth_image = None + self.camera_info_gathered = False + + def destroy_subscriptions(self): + for attr in ('zed_info_subscription', 'image_subscription', 'depth_subscription'): + if hasattr(self, attr): + sub = getattr(self, attr) + try: + topic = sub.topic_name + except Exception: + topic = "unknown" + self.destroy_subscription(sub) + self.get_logger().info(f"Destroying subscription: {topic}") + + def create_subscriptions(self): + #TODO: Move these to config file, but it's type dependent + base = f'/{self.robot_ns}/{self.camera_prefix}/zed_node' + + info_topic = f'{base}/left/camera_info' + self.zed_info_subscription = self.create_subscription( + CameraInfo, info_topic, self.camera_info_callback, 1) + self.get_logger().info(f"Creating camera info subcription: {info_topic}") + + image_topic = f'{base}/left/image_rect_color/compressed' + self.image_subscription = self.create_subscription( + CompressedImage, image_topic, self.image_callback, 1) + self.get_logger().info(f"Creating image subcription: {image_topic}") + + depth_topic = f'{base}/depth/depth_registered' + self.depth_subscription = self.create_subscription( + Image, depth_topic, self.depth_callback, 1) + self.get_logger().info(f"Creating depth subcription: {depth_topic}") + + ### Callbacks + def has_subscribers(self, publisher): + return publisher.get_subscription_count() > 0 + + def camera_info_callback(self, msg): + if not self.camera_info_gathered: + if self.print_camera_info: + self.get_logger().info(f"Camera info: {msg}") + self.intrinsic_matrix = np.array(msg.k).reshape((3, 3)) + self.fx = msg.k[0] + self.cx = msg.k[2] + self.fy = msg.k[4] + self.cy = msg.k[5] + self.camera_info_gathered = True + + def depth_callback(self, msg): + self.depth_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') + + def _stamp(self, msg): + if self.use_incoming_timestamp: + return msg.header.stamp + return self.get_clock().now().to_msg() + + ### THE MEAT + def image_callback(self, msg: CompressedImage): + start_time = time.perf_counter() if self.log_processing_time else None + + if self.depth_image is None or not self.camera_info_gathered: + self.get_logger().warning( + "Skipping image because either no depth image or camera info is available.", + throttle_duration_sec=1) + return + + # Get the image from the msg + cv_image = self.bridge.compressed_imgmsg_to_cv2(msg, "bgr8") + if cv_image is None: + return + + # Run inference + results = self.model.infer(cv_image, conf=self.conf, iou=self.iou) + + # Store everything we need in the frame dataclass 🤌 + frame = Frame( + image=cv_image, + depth=self.depth_image, + fx=self.fx, fy=self.fy, cx=self.cx, cy=self.cy, + K=self.intrinsic_matrix, + frame_id=self.frame_id, + timestamp=self._stamp(msg), + class_id_map=self.class_id_map, + conf=self.conf, + want_markers=self.has_subscribers(self.marker_array_publisher), + want_cloud=self.has_subscribers(self.point_cloud_publisher), + want_overlay_points=self.has_subscribers(self.annotated_image_publisher) + ) + + # The generic planar and task specific detection logic + detections, markers = self.detector.process(results, frame) + + # Publish markers: clear the previous set, then add the current one + if self.has_subscribers(self.marker_array_publisher): + marker_array = MarkerArray() + clear = Marker() + clear.action = Marker.DELETEALL + marker_array.markers.append(clear) + if markers: + marker_array.markers.extend(markers) + self.marker_array_publisher.publish(marker_array) + + # Publish point cloud for visualization + if self.has_subscribers(self.point_cloud_publisher): + cloud = self.detector.take_point_cloud(self.frame_id, self._stamp(msg)) + if cloud is not None: + self.point_cloud_publisher.publish(cloud) + + # Publish annotated frame for visualization + if self.has_subscribers(self.annotated_image_publisher): + annotated_frame = results[0].plot() + self.annotated_image_publisher.publish(self.bridge.cv2_to_compressed_imgmsg(annotated_frame)) + + # Publish detections for mapping + if self.has_subscribers(self.detection_publisher): + self.detection_publisher.publish(detections) + + # Debug log + if self.log_processing_time: + elapsed = time.perf_counter() - start_time + self.get_logger().info(f"total={elapsed * 1000:.0f}ms ({1 / elapsed:.1f} fps)") - # min_depth_point = [] - # for point in feature_points: - # if self.depth_image[point[0],point[1]] < self.depth_image[min_depth_point[0], min_depth_point[1]]: - # min_depth_point = [point[0],point[1]] - - - - # Get 3D points from feature points - points_3d = self.get_3d_points(feature_points, cv_image) - - if points_3d is not None and len(points_3d) >= self.min_points: - normal, _, centroid = self.fit_plane_to_points(points_3d) - - centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, centroid[2]) - - if normal[2] > 0: - normal = -normal - self.plane_normal = normal - quat, _ = self.calculate_quaternion_and_euler_angles(normal) - - self.get_logger().info(f"Class name: {class_name}") - if class_name == "torpedo_open": - self.open_torpedo_centroid = centroid - self.open_torpedo_quat = quat - elif class_name == "torpedo_closed": - self.closed_torpedo_centroid = centroid - self.closed_torpedo_quat = quat - elif class_name == "mapping_map": - self.mapping_map_centroid = centroid - self.mapping_map_quat = quat - #class_name == "gate_sawfish" - class_name = "torpedo" - elif class_name == self.bin_target: - class_name = "bin_target" - elif class_name in ["torpedo_shark_top", "torpedo_saw_top"]: - self.torpedo_centroid = centroid - self.torpedo_quat = quat - if class_name == "torpedo_shark_top": - self.torpedo_type = "shark" - else: - self.torpedo_type = "saw" - - # When calling publish_marker, pass these dimensions along with other required information - self.publish_marker(quat, centroid, class_name, bbox_width, bbox_height) - - # Create Detection3D message - detection = Detection3D() - detection.header.frame_id = self.frame_id - - if self.use_incoming_timestamp: - detection.header.stamp = self.detection_timestamp - else: - detection.header.stamp = self.get_clock().now().to_msg() - - # Set the pose - detection.results.append(self.create_object_hypothesis_with_pose(class_name, centroid, quat, conf)) - - return detection - - # else: - # self.get_logger().info(f"wtf: {class_name}") - # if self.latest_buoy is not None and class_name == "buoy": - # centroid = self.calculate_centroid(bbox_center_x, bbox_center_y, self.latest_buoy[2]) - # quat, _ = self.calculate_quaternion_and_euler_angles(-self.default_normal) - # self.publish_marker(quat, centroid, class_name, bbox_width, bbox_height) - # detection = Detection3D() - # detection.header.frame_id = self.frame_id - # if self.use_incoming_timestamp: - # detection.header.stamp = self.detection_timestamp - # else: - # detection.header.stamp = self.get_clock().now().to_msg() - - # # Set the pose - # detection.results.append(self.create_object_hypothesis_with_pose(class_name, centroid, quat, conf)) - - # return detection - - # return None - - def calculate_centroid(self, center_x, center_y, z): - center_3d_x = (center_x - self.cx) * z / self.fx - center_3d_y = (center_y - self.cy) * z / self.fy - return [center_3d_x, center_3d_y, z] - - def create_object_hypothesis_with_pose(self, class_name, centroid, quat, conf): - hypothesis_with_pose = ObjectHypothesisWithPose() - hypothesis = ObjectHypothesis() - - hypothesis.class_id = class_name +def main(args=None): + rclpy.init(args=args) + yolo_node = YOLONode() + rclpy.spin(yolo_node) + yolo_node.destroy_node() + rclpy.shutdown() - #temp - if class_name == "mapping_map": - hypothesis.class_id = "gate_cold" # Gaslight the robot - if class_name in ["torpedo_shark_top", "torpedo_saw_top"]: - hypothesis.class_id = "torpedo" - - - hypothesis.score = conf.item() # Convert from numpy float to float - - hypothesis_with_pose.hypothesis = hypothesis - hypothesis_with_pose.pose.pose.position.x = centroid[0] - hypothesis_with_pose.pose.pose.position.y = centroid[1] - hypothesis_with_pose.pose.pose.position.z = centroid[2] - hypothesis_with_pose.pose.pose.orientation.x = quat[0] - hypothesis_with_pose.pose.pose.orientation.y = quat[1] - hypothesis_with_pose.pose.pose.orientation.z = quat[2] - hypothesis_with_pose.pose.pose.orientation.w = quat[3] - - return hypothesis_with_pose - - def publish_accumulated_point_cloud(self): - if not self.has_subscribers(self.point_cloud_publisher): - return # Skip if no subscribers - - if not self.accumulated_points: - return # Skip if there are no points - - # Prepare the PointCloud message - cloud = PointCloud() - cloud.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - cloud.header.stamp = self.detection_timestamp - else: - cloud.header.stamp = self.get_clock().now().to_msg() - - - # Convert accumulated 3D points to Point32 messages and add to the PointCloud - for point in self.accumulated_points: - cloud.points.append(Point32(x=float(point[0]), y=float(point[1]), z=float(point[2]))) - - # Publish the accumulated point cloud - self.point_cloud_publisher.publish(cloud) - - # Clear the accumulated points after publishing - self.accumulated_points.clear() - - def overlay_points_on_image(self, image, points): - """Draw circles on the image for each point with safety checks""" - if len(points) == 0: - return - - for point in points: - try: - # Check for valid point structure - if len(point) < 3: - continue - - # Check for valid z-coordinate to avoid division by zero - if point[2] <= 0 or np.isnan(point[2]) or np.isinf(point[2]): - continue - - # Check for valid x and y coordinates - if np.isnan(point[0]) or np.isinf(point[0]) or np.isnan(point[1]) or np.isinf(point[1]): - continue - - # Transform the 3D point back to 2D - x2d = int(point[0] * self.fx / point[2] + self.cx) - y2d = int(point[1] * self.fy / point[2] + self.cy) - - # Check if the projected point is within image bounds - if (0 <= x2d < image.shape[1] and 0 <= y2d < image.shape[0]): - cv2.circle(image, (x2d, y2d), radius=3, color=(0, 255, 0), thickness=-1) - - except (ZeroDivisionError, OverflowError, ValueError): - # Silently continue on mathematical errors - continue - except Exception as e: - # Log unexpected errors but continue processing - self.get_logger().warning(f"Unexpected error in overlay_points_on_image: {e}") - continue - - def get_3d_points(self, feature_points, cv_image): - points_3d = [] - - - for x, y in feature_points: - xi = int(x) - yi = int(y) - - # Make sure the point is on the image - if yi >= self.depth_image.shape[0] or xi >= self.depth_image.shape[1]: - continue - - # Make sure the point is on the mask - if self.mask[yi, xi] != 255: - continue - - z = self.depth_image[yi, xi] - if np.isnan(z) or z == 0: - continue - - point_3d = self.calculate_centroid(xi, yi, z) - points_3d.append(point_3d) - - # Now, overlay points on the image - self.overlay_points_on_image(cv_image, points_3d) - - # Prepare PointCloud message - cloud = PointCloud() - cloud.header.frame_id = self.frame_id # Adjust the frame ID as necessary - - if self.use_incoming_timestamp: - cloud.header.stamp = self.detection_timestamp - else: - cloud.header.stamp = self.get_clock().now().to_msg() - - # Convert points to a numpy array - points_3d = np.array(points_3d) - - # Filter outlier points - points_3d = self.radius_outlier_removal(points_3d, min_neighbors=min(10,int(len(points_3d)*0.8))) - points_3d = self.statistical_outlier_removal(points_3d, k=min(10,int(len(points_3d) * 0.8))) - # self.get_logger().info(f"points3d: {len(points_3d)}") - if points_3d is not None: - self.accumulated_points.extend(points_3d) # Add the new points to the accumulated list - - # Optionally, limit the size of the accumulated points to prevent unbounded growth - max_points = 10000 # Example limit, adjust based on your needs - if len(self.accumulated_points) > max_points: - self.accumulated_points = self.accumulated_points[-max_points:] - - if len(points_3d) < self.min_points: - return None - - return points_3d - - def statistical_outlier_removal(self, points_3d, k=10, std_ratio=1.0): - """ - Remove statistical outliers from the point cloud. - - :param points_3d: Numpy array of 3D points - :param k: Number of nearest neighbors to use for mean distance calculation - :param std_ratio: Standard deviation ratio threshold - :return: Filtered array of 3D points - """ - mean_distances = np.zeros(len(points_3d)) - for i, point in enumerate(points_3d): - distances = np.linalg.norm(points_3d - point, axis=1) - sorted_distances = np.sort(distances) - mean_distances[i] = np.mean(sorted_distances[1:k+1]) - - mean_dist_global = np.mean(mean_distances) - std_dev = np.std(mean_distances) - - threshold = mean_dist_global + std_ratio * std_dev - filtered_indices = np.where(mean_distances < threshold)[0] - return points_3d[filtered_indices] - - def radius_outlier_removal(self, points_3d, radius=1.0, min_neighbors=10): - """ - Remove radius outliers from the point cloud. - - :param points_3d: Numpy array of 3D points - :param radius: The radius within which to count neighbors - :param min_neighbors: Minimum number of neighbors within the radius for the point to be kept - :return: Filtered array of 3D points - """ - filtered_indices = [] - for i, point in enumerate(points_3d): - distances = np.linalg.norm(points_3d - point, axis=1) - if len(np.where(distances <= radius)[0]) > min_neighbors: - filtered_indices.append(i) - - return points_3d[filtered_indices] - - def fit_plane_to_points(self, points_3d): - try: - if len(points_3d) == 0: - self.get_logger().warning("No 3D points available for plane fitting.") - return None, None, None - centroid = np.mean(points_3d, axis=0) - u, s, vh = np.linalg.svd(points_3d - centroid) - normal = vh[-1] - normal = normal / np.linalg.norm(normal) - d = -np.dot(normal, centroid) - return normal, d, centroid - except: - pass - - def calculate_quaternion_and_euler_angles(self, normal): - - if np.allclose(normal, self.default_normal): - quat = [0.0, 0.0, 0.0, 1.0] # No rotation needed - euler_angles = None - else: - rotation = self.calculate_rotation(normal) - quat = rotation.as_quat() - euler_angles = rotation.as_euler('xyz',degrees=True) - - return quat, euler_angles - - def calculate_rotation(self, normal): - - # Compute rotation axis (cross product) and angle (dot product) - axis = np.cross(self.default_normal, normal) - axis_length = np.linalg.norm(axis) - if axis_length == 0: - # Normal is in the opposite direction - axis = np.array([1, 0, 0]) - angle = np.pi - else: - axis /= axis_length # Normalize the rotation axis - angle = np.arccos(np.dot(self.default_normal, normal)) - - # Convert axis-angle to quaternion - rotation = R.from_rotvec(axis * angle) - - return rotation - - def publish_marker(self, quat, centroid, class_name, bbox_width, bbox_height): - if not self.has_subscribers(self.marker_array_publisher): - return - - # Create a plane marker - plane_marker = Marker() - plane_marker.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - plane_marker.header.stamp = self.detection_timestamp - else: - plane_marker.header.stamp = self.get_clock().now().to_msg() - plane_marker.ns = "detection_markers" - plane_marker.id = self.generate_unique_detection_id() - plane_marker.type = Marker.CUBE - plane_marker.action = Marker.ADD - - # Set marker lifetime (auto-delete after this duration) - plane_marker.lifetime.nanosec = int(self.publish_interval * 4.0 * 1e9) - - # Set the position of the plane marker - plane_marker.pose.position.x = centroid[0] - plane_marker.pose.position.y = centroid[1] - plane_marker.pose.position.z = centroid[2] - - # Set the plane marker's orientation - plane_marker.pose.orientation.x = quat[0] - plane_marker.pose.orientation.y = quat[1] - plane_marker.pose.orientation.z = quat[2] - plane_marker.pose.orientation.w = quat[3] - - # Set the scale of the plane marker based on the bounding box size - plane_marker.scale.x = float(bbox_width) / 150.0 - plane_marker.scale.y = float(bbox_height) / 150.0 - if class_name == "buoy": - plane_marker.scale.z = 0.01 - else: - plane_marker.scale.z = 0.05 - - # Set the color and transparency (alpha) of the plane marker - color = self.get_color_for_class(class_name) - plane_marker.color.r = color[0] - plane_marker.color.g = color[1] - plane_marker.color.b = color[2] - plane_marker.color.a = 0.8 - - # Append the plane marker to publish all at once - # self.temp_markers.append(plane_marker) - - # Create an arrow marker - arrow_marker = Marker() - arrow_marker.header.frame_id = self.frame_id - if self.use_incoming_timestamp: - arrow_marker.header.stamp = self.detection_timestamp - else: - arrow_marker.header.stamp = self.get_clock().now().to_msg() - arrow_marker.ns = "orientation_markers" - arrow_marker.id = self.generate_unique_detection_id() - arrow_marker.type = Marker.ARROW - arrow_marker.action = Marker.ADD - - # Set marker lifetime (auto-delete after this duration) - arrow_marker.lifetime.nanosec = int(self.publish_interval * 4.0 * 1e9) - - # Set the position of the arrow marker - arrow_marker.pose.position.x = centroid[0] - arrow_marker.pose.position.y = centroid[1] - arrow_marker.pose.position.z = centroid[2] - - # Create a rotation for -90 degrees around the y-axis - additional_rotation = R.from_euler('y', -90, degrees=True).as_quat() - - # Apply the additional rotation to the plane's quaternion - arrow_quat = R.from_quat(quat) * R.from_quat(additional_rotation) - arrow_quat = arrow_quat.as_quat() - - # Set the arrow marker's orientation - arrow_marker.pose.orientation.x = arrow_quat[0] - arrow_marker.pose.orientation.y = arrow_quat[1] - arrow_marker.pose.orientation.z = arrow_quat[2] - arrow_marker.pose.orientation.w = arrow_quat[3] - - # Set the scale for the arrow marker - arrow_marker.scale.x = 1.0 # Length of the arrow - arrow_marker.scale.y = 0.05 # Width of the arrow - arrow_marker.scale.z = 0.05 # Height of the arrow - - # Set the color and transparency (alpha) of the arrow marker - arrow_marker.color.r = color[0] - arrow_marker.color.g = color[1] - arrow_marker.color.b = color[2] - arrow_marker.color.a = 0.8 - - # Append the arrow marker to publish all at once - self.temp_markers.append(arrow_marker) - def publish_markers(self, markers): - if not self.has_subscribers(self.marker_array_publisher): - return - - current_time = time.time() - if current_time - self.last_publish_time > self.publish_interval: - marker_array = MarkerArray() - marker_array.markers = markers - self.last_publish_time = current_time - self.marker_array_publisher.publish(marker_array) - # Clear the markers after publishing - markers.clear() - - def get_color_for_class(self, class_name): - return self.color_map.get(class_name, (1.0, 1.0, 1.0)) # Default to white - -def main(args=None): - rclpy.init(args=args) - yolo_node = YOLONode() - rclpy.spin(yolo_node) - yolo_node.destroy_node() - rclpy.shutdown() - if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/tensor_detector/weights/dfc_rs_26.pt b/tensor_detector/weights/dfc_rs_26.pt new file mode 100644 index 0000000..f28eda3 Binary files /dev/null and b/tensor_detector/weights/dfc_rs_26.pt differ diff --git a/tensor_detector/weights/ffc_rs_26.pt b/tensor_detector/weights/ffc_rs_26.pt new file mode 100644 index 0000000..ad4b77f Binary files /dev/null and b/tensor_detector/weights/ffc_rs_26.pt differ