From be30f21b309a9305618d592f2971da3b62288707 Mon Sep 17 00:00:00 2001
From: Shaurya Dewan <shauryad@nvidia.com>
Date: Thu, 5 Feb 2026 14:15:37 -0800
Subject: [PATCH 01/87] Code for Flexiv Rizon 4s support for gear insertion.

---
 .../isaaclab_assets/robots/flexiv.py          |  91 +++-
 .../gear_assembly/config/rizon_4s/__init__.py |  44 ++
 .../config/rizon_4s/agents/__init__.py        |   4 +
 .../config/rizon_4s/agents/rsl_rl_ppo_cfg.py  |  49 +++
 .../config/rizon_4s/joint_pos_env_cfg.py      | 398 ++++++++++++++++++
 .../config/rizon_4s/ros_inference_env_cfg.py  | 105 +++++
 6 files changed, 690 insertions(+), 1 deletion(-)
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/__init__.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/rsl_rl_ppo_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 2e4b14347da8..d0fa786c0728 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -9,10 +9,13 @@
 The following configurations are available:
 
 * :obj:`FLEXIV_RIZON4S_CFG`: The Flexiv Rizon 4s arm without a gripper.
+* :obj:`FLEXIV_RIZON4S_GRAV_GRIPPER_CFG`: The Flexiv Rizon 4s arm with Grav gripper.
 
 Reference: https://www.flexiv.com/product/rizon
 """
 
+import math
+
 import isaaclab.sim as sim_utils
 from isaaclab.actuators import ImplicitActuatorCfg
 from isaaclab.assets.articulation import ArticulationCfg
@@ -79,5 +82,91 @@
         ),
     },
 )
-
 """Configuration of Flexiv Rizon 4s arm using implicit actuator models."""
+
+
+FLEXIV_RIZON4S_GRAV_GRIPPER_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path="/home/shauryad/workspaces/rl_policy/isaac_sim_ws/exts/isaacsim.robot.manipulators.examples/data/flexiv/Rizon4s_with_Grav.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+            max_depenetration_velocity=5.0,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,
+            solver_position_iteration_count=16,
+            solver_velocity_iteration_count=1,
+        ),
+        activate_contact_sensors=False,
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        joint_pos={
+            "joint1": 0.0,
+            "joint2": -0.785,
+            "joint3": 0.0,
+            "joint4": -1.571,
+            "joint5": 0.0,
+            "joint6": 0.785,
+            "joint7": 0.0,
+            "finger_joint": math.radians(45.0),
+            ".*_knuckle_joint": 0.0,
+        },
+        pos=(0.0, 0.0, 0.0),
+        rot=(1.0, 0.0, 0.0, 0.0),
+    ),
+    actuators={
+        "shoulder": ImplicitActuatorCfg(
+            joint_names_expr=["joint[1-2]"],
+            effort_limit_sim=123.0,
+            velocity_limit_sim=2.094,
+            stiffness=1320.0,
+            damping=72.0,
+            friction=0.0,
+            armature=0.0,
+        ),
+        "elbow": ImplicitActuatorCfg(
+            joint_names_expr=["joint[3-4]"],
+            effort_limit_sim=64.0,
+            velocity_limit_sim=2.443,
+            stiffness=600.0,
+            damping=35.0,
+            friction=0.0,
+            armature=0.0,
+        ),
+        "wrist": ImplicitActuatorCfg(
+            joint_names_expr=["joint[5-7]"],
+            effort_limit_sim=39.0,
+            velocity_limit_sim=4.887,
+            stiffness=216.0,
+            damping=29.0,
+            friction=0.0,
+            armature=0.0,
+        ),
+        "gripper_drive": ImplicitActuatorCfg(
+            joint_names_expr=["finger_joint"],
+            effort_limit_sim=10.0,
+            velocity_limit_sim=2.0,
+            stiffness=40.0,
+            damping=1.0,
+            friction=0.0,
+            armature=0.0,
+        ),
+        "gripper_passive": ImplicitActuatorCfg(
+            joint_names_expr=[".*_knuckle_joint"],
+            effort_limit_sim=1.0,
+            velocity_limit_sim=1.0,
+            stiffness=0.0,
+            damping=0.0,
+            friction=0.0,
+            armature=0.0,
+        ),
+    },
+)
+"""Configuration of Flexiv Rizon 4s arm with Grav gripper using implicit actuator models.
+
+The Grav gripper is a parallel gripper with the following joint configuration:
+- finger_joint: Main actuation joint (opened: 45 deg, closed: -8.88 deg)
+- *_knuckle_joint: Passive/mimic joints (not directly actuated)
+
+End effector body: right_finger_tip
+"""
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
new file mode 100644
index 000000000000..328a5ca09d56
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+
+# Flexiv Rizon 4s
+gym.register(
+    id="Isaac-Deploy-GearAssembly-Rizon4s-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:Rizon4sGearAssemblyEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:Rizon4sGearAssemblyRNNPPORunnerCfg",
+    },
+)
+
+gym.register(
+    id="Isaac-Deploy-GearAssembly-Rizon4s-Play-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:Rizon4sGearAssemblyEnvCfg_PLAY",
+    },
+)
+
+# Flexiv Rizon 4s - ROS Inference
+gym.register(
+    id="Isaac-Deploy-GearAssembly-Rizon4s-ROS-Inference-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.ros_inference_env_cfg:Rizon4sGearAssemblyROSInferenceEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:Rizon4sGearAssemblyRNNPPORunnerCfg",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/__init__.py
new file mode 100644
index 000000000000..cf59b16a1e2e
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/rsl_rl_ppo_cfg.py
new file mode 100644
index 000000000000..06f64e731afc
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/agents/rsl_rl_ppo_cfg.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab.utils import configclass
+
+from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticRecurrentCfg, RslRlPpoAlgorithmCfg
+
+
+@configclass
+class Rizon4sGearAssemblyRNNPPORunnerCfg(RslRlOnPolicyRunnerCfg):
+    num_steps_per_env = 512
+    max_iterations = 1500
+    save_interval = 50
+    experiment_name = "gear_assembly_rizon4s"
+    clip_actions = 1.0
+    resume = False
+    obs_groups = {
+        "policy": ["policy"],
+        "critic": ["critic"],
+    }
+    policy = RslRlPpoActorCriticRecurrentCfg(
+        state_dependent_std=True,
+        init_noise_std=1.0,
+        actor_obs_normalization=True,
+        critic_obs_normalization=True,
+        actor_hidden_dims=[256, 128, 64],
+        critic_hidden_dims=[256, 128, 64],
+        noise_std_type="log",
+        activation="elu",
+        rnn_type="lstm",
+        rnn_hidden_dim=256,
+        rnn_num_layers=2,
+    )
+    algorithm = RslRlPpoAlgorithmCfg(
+        value_loss_coef=1.0,
+        use_clipped_value_loss=True,
+        clip_param=0.2,
+        entropy_coef=0.0,
+        num_learning_epochs=8,
+        num_mini_batches=16,
+        learning_rate=5.0e-4,
+        schedule="adaptive",
+        gamma=0.99,
+        lam=0.95,
+        desired_kl=0.008,
+        max_grad_norm=1.0,
+    )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
new file mode 100644
index 000000000000..11104f2968ec
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -0,0 +1,398 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+
+import torch
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+from isaaclab.managers import EventTermCfg as EventTerm
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.utils import configclass
+
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
+import isaaclab_tasks.manager_based.manipulation.deploy.mdp.events as gear_assembly_events
+from isaaclab_tasks.manager_based.manipulation.deploy.gear_assembly.gear_assembly_env_cfg import GearAssemblyEnvCfg
+
+##
+# Pre-defined configs
+##
+from isaaclab_assets import FLEXIV_RIZON4S_GRAV_GRIPPER_CFG  # isort: skip
+
+
+##
+# Gripper-specific helper functions
+##
+
+
+def set_finger_joint_pos_grav_gripper(
+    joint_pos: torch.Tensor,
+    reset_ind_joint_pos: list[int],
+    finger_joints: list[int],
+    finger_joint_position: float,
+):
+    """Set finger joint positions for Flexiv Grav gripper.
+
+    The Grav gripper has a single main actuation joint (finger_joint).
+    The finger_joint_position is expected to be in the range [0, 1] where:
+    - 0.0 corresponds to fully closed (-8.88 degrees = -0.155 rad)
+    - 1.0 corresponds to fully opened (45 degrees = 0.785 rad)
+
+    Args:
+        joint_pos: Joint positions tensor
+        reset_ind_joint_pos: Row indices into the sliced joint_pos tensor
+        finger_joints: List of finger joint indices (only first one is used for Grav)
+        finger_joint_position: Target position for finger joints (0-1 normalized)
+    """
+    # Grav gripper joint limits (in radians)
+    GRAV_CLOSED_POS = math.radians(-8.88)  # -0.155 rad
+    GRAV_OPENED_POS = math.radians(45.0)   # 0.785 rad
+
+    # Map normalized position to actual joint position
+    # finger_joint_position of 0 = closed, 1 = opened
+    actual_pos = GRAV_CLOSED_POS + finger_joint_position * (GRAV_OPENED_POS - GRAV_CLOSED_POS)
+
+    for idx in reset_ind_joint_pos:
+        if len(finger_joints) > 0:
+            # Set the main finger_joint
+            joint_pos[idx, finger_joints[0]] = actual_pos
+
+
+##
+# Environment configuration
+##
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    robot_joint_stiffness_and_damping = EventTerm(
+        func=mdp.randomize_actuator_gains,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg(
+                "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
+            ),  # only the arm joints are randomized
+            "stiffness_distribution_params": (0.75, 1.5),
+            "damping_distribution_params": (0.3, 3.0),
+            "operation": "scale",
+            "distribution": "log_uniform",
+        },
+    )
+
+    joint_friction = EventTerm(
+        func=mdp.randomize_joint_parameters,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+            "friction_distribution_params": (0.3, 0.7),
+            "operation": "add",
+            "distribution": "uniform",
+        },
+    )
+
+    small_gear_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_small", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    medium_gear_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_medium", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    large_gear_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_large", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    gear_base_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    robot_physics_material = EventTerm(
+        func=mdp.randomize_rigid_body_material,
+        mode="startup",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", body_names=".*finger.*"),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
+            "restitution_range": (0.0, 0.0),
+            "num_buckets": 16,
+        },
+    )
+
+    randomize_gear_type = EventTerm(
+        func=gear_assembly_events.randomize_gear_type,
+        mode="reset",
+        params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+    )
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+    randomize_gears_and_base_pose = EventTerm(
+        func=gear_assembly_events.randomize_gears_and_base_pose,
+        mode="reset",
+        params={
+            "pose_range": {
+                "x": [-0.1, 0.1],
+                "y": [-0.25, 0.25],
+                "z": [-0.1, 0.1],
+                "roll": [-math.pi / 90, math.pi / 90],  # 2 degree
+                "pitch": [-math.pi / 90, math.pi / 90],  # 2 degree
+                "yaw": [-math.pi / 6, math.pi / 6],  # 30 degree
+            },
+            "gear_pos_range": {
+                "x": [-0.02, 0.02],
+                "y": [-0.02, 0.02],
+                "z": [0.0575, 0.0775],  # 0.045 + 0.0225
+            },
+            "velocity_range": {},
+        },
+    )
+
+    set_robot_to_grasp_pose = EventTerm(
+        func=gear_assembly_events.set_robot_to_grasp_pose,
+        mode="reset",
+        params={
+            "robot_asset_cfg": SceneEntityCfg("robot"),
+            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+        },
+    )
+
+
+@configclass
+class Rizon4sGearAssemblyEnvCfg(GearAssemblyEnvCfg):
+    """Configuration for Flexiv Rizon 4s with Grav Gripper Gear Assembly Environment.
+
+    The Flexiv Rizon 4s is a 7-DOF collaborative robot arm equipped with the
+    Flexiv Grav parallel gripper for gear manipulation tasks.
+    """
+
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # Robot-specific parameters for Flexiv Rizon 4s with Grav gripper
+        # End effector body name - using the gripper finger tip for accurate grasp positioning
+        self.end_effector_body_name = "right_finger_tip"
+        self.num_arm_joints = 7  # Number of arm joints (Rizon 4s has 7 DOF)
+        # Rotation offset for grasp pose (quaternion [w, x, y, z])
+        # Rizon 4s end-effector is along z-direction, so we need to rotate for downward facing
+        self.grasp_rot_offset = [
+            0.0,
+            1.0,
+            0.0,
+            0.0,
+        ]  # 180 degrees around X axis for downward facing EE
+        self.gripper_joint_setter_func = set_finger_joint_pos_grav_gripper  # Grav gripper joint setter function
+
+        # Gear orientation termination thresholds (in degrees)
+        self.gear_orientation_roll_threshold_deg = 15.0  # Maximum allowed roll deviation
+        self.gear_orientation_pitch_threshold_deg = 15.0  # Maximum allowed pitch deviation
+        self.gear_orientation_yaw_threshold_deg = 180.0  # Maximum allowed yaw deviation
+
+        # Common observation configuration for Rizon 4s joints (arm only, not gripper)
+        self.observations.policy.joint_pos.params["asset_cfg"].joint_names = [
+            "joint1",
+            "joint2",
+            "joint3",
+            "joint4",
+            "joint5",
+            "joint6",
+            "joint7",
+        ]
+        self.observations.policy.joint_vel.params["asset_cfg"].joint_names = [
+            "joint1",
+            "joint2",
+            "joint3",
+            "joint4",
+            "joint5",
+            "joint6",
+            "joint7",
+        ]
+
+        # override events
+        self.events = EventCfg()
+
+        # Update termination thresholds from config
+        self.terminations.gear_orientation_exceeded.params["roll_threshold_deg"] = (
+            self.gear_orientation_roll_threshold_deg
+        )
+        self.terminations.gear_orientation_exceeded.params["pitch_threshold_deg"] = (
+            self.gear_orientation_pitch_threshold_deg
+        )
+        self.terminations.gear_orientation_exceeded.params["yaw_threshold_deg"] = (
+            self.gear_orientation_yaw_threshold_deg
+        )
+
+        # Action configuration for Rizon 4s arm
+        # Using smaller action scale similar to UR10e (0.025) for stability
+        self.joint_action_scale = 0.025
+        self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
+            asset_name="robot",
+            joint_names=[
+                "joint1",
+                "joint2",
+                "joint3",
+                "joint4",
+                "joint5",
+                "joint6",
+                "joint7",
+            ],
+            scale=self.joint_action_scale,
+            use_zero_offset=True,
+        )
+
+        # Switch robot to Flexiv Rizon 4s with Grav gripper
+        self.scene.robot = FLEXIV_RIZON4S_GRAV_GRIPPER_CFG.replace(
+            prim_path="{ENV_REGEX_NS}/Robot",
+            spawn=FLEXIV_RIZON4S_GRAV_GRIPPER_CFG.spawn.replace(
+                rigid_props=sim_utils.RigidBodyPropertiesCfg(
+                    disable_gravity=True,
+                    max_depenetration_velocity=5.0,
+                    linear_damping=0.0,
+                    angular_damping=0.0,
+                    max_linear_velocity=1000.0,
+                    max_angular_velocity=3666.0,
+                    enable_gyroscopic_forces=True,
+                    solver_position_iteration_count=16,
+                    solver_velocity_iteration_count=1,
+                    max_contact_impulse=1e32,
+                ),
+                articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+                    enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1
+                ),
+                collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
+            ),
+            # Initial joint positions for Rizon 4s with Grav gripper - configured for gear assembly task
+            # These positions place the arm in a suitable configuration for grasping gears
+            init_state=ArticulationCfg.InitialStateCfg(
+                joint_pos={
+                    "joint1": 0.0,
+                    "joint2": -0.785,  # -45 degrees
+                    "joint3": 0.0,
+                    "joint4": -1.571,  # -90 degrees
+                    "joint5": 0.0,
+                    "joint6": 0.785,  # 45 degrees
+                    "joint7": 0.0,
+                    # Grav gripper - start partially open for grasp
+                    "finger_joint": math.radians(30.0),
+                    # Initialize passive joints
+                    ".*_knuckle_joint": 0.0,
+                },
+                pos=(0.0, 0.0, 0.0),
+                rot=(1.0, 0.0, 0.0, 0.0),
+            ),
+        )
+
+        # Grav gripper main actuator configuration for gear manipulation
+        self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
+            joint_names_expr=["finger_joint"],
+            effort_limit_sim=10.0,
+            velocity_limit_sim=2.0,
+            stiffness=40.0,
+            damping=1.0,
+            friction=0.0,
+            armature=0.0,
+        )
+
+        # Passive/mimic joints in the gripper - set to zero stiffness/damping
+        self.scene.robot.actuators["gripper_passive"] = ImplicitActuatorCfg(
+            joint_names_expr=[".*_knuckle_joint"],
+            effort_limit_sim=1.0,
+            velocity_limit_sim=1.0,
+            stiffness=0.0,
+            damping=0.0,
+            friction=0.0,
+            armature=0.0,
+        )
+
+        # Gear offsets and grasp positions for Rizon 4s with Grav gripper
+        # These offsets are relative to the end effector frame (finger tip)
+        # Z offset accounts for the gripper length from flange to finger tip
+        self.gear_offsets_grasp = {
+            "gear_small": [0.0, self.gear_offsets["gear_small"][0], -0.12],
+            "gear_medium": [0.0, self.gear_offsets["gear_medium"][0], -0.12],
+            "gear_large": [0.0, self.gear_offsets["gear_large"][0], -0.12],
+        }
+
+        # Grasp widths for Grav gripper (normalized 0-1 values)
+        # These values determine the finger opening for different gear sizes
+        # Grav gripper range: -8.88° (closed) to 45° (open)
+        self.hand_grasp_width = {
+            "gear_small": 0.7,   # More open for small gear
+            "gear_medium": 0.5,  # Medium opening
+            "gear_large": 0.4,   # Less open for large gear
+        }
+
+        # Close widths for Grav gripper (normalized 0-1 values)
+        # Slightly tighter than grasp width for secure grip
+        self.hand_close_width = {
+            "gear_small": 0.75,
+            "gear_medium": 0.55,
+            "gear_large": 0.45,
+        }
+
+        # Populate event term parameters
+        self.events.set_robot_to_grasp_pose.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+        self.events.set_robot_to_grasp_pose.params["end_effector_body_name"] = self.end_effector_body_name
+        self.events.set_robot_to_grasp_pose.params["num_arm_joints"] = self.num_arm_joints
+        self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
+
+        # Populate termination term parameters
+        self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+        self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
+        self.terminations.gear_dropped.params["grasp_rot_offset"] = self.grasp_rot_offset
+
+        self.terminations.gear_orientation_exceeded.params["end_effector_body_name"] = self.end_effector_body_name
+        self.terminations.gear_orientation_exceeded.params["grasp_rot_offset"] = self.grasp_rot_offset
+
+
+@configclass
+class Rizon4sGearAssemblyEnvCfg_PLAY(Rizon4sGearAssemblyEnvCfg):
+    """Play configuration for Flexiv Rizon 4s gear assembly."""
+
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+        # make a smaller scene for play
+        self.scene.num_envs = 50
+        self.scene.env_spacing = 2.5
+        # disable randomization for play
+        self.observations.policy.enable_corruption = False
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
new file mode 100644
index 000000000000..67ff5f9e79dd
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import math
+
+from isaaclab.assets import RigidObjectCfg
+from isaaclab.utils import configclass
+
+from .joint_pos_env_cfg import Rizon4sGearAssemblyEnvCfg
+
+
+@configclass
+class Rizon4sGearAssemblyROSInferenceEnvCfg(Rizon4sGearAssemblyEnvCfg):
+    """Configuration for ROS inference with Flexiv Rizon 4s and Grav gripper.
+
+    This configuration:
+    - Exposes variables needed for ROS inference
+    - Overrides robot and gear initial poses for fixed/deterministic setup
+    """
+
+    def __post_init__(self):
+        # post init of parent
+        super().__post_init__()
+
+        # Variables used by Isaac Manipulator for on robot inference
+        # These parameters allow the ROS inference node to validate environment configuration,
+        # perform checks during inference, and correctly interpret observations and actions.
+        self.obs_order = ["arm_dof_pos", "arm_dof_vel", "shaft_pos", "shaft_quat"]
+        self.policy_action_space = "joint"
+        # Use inherited joint names from parent's observation configuration
+        self.arm_joint_names = self.observations.policy.joint_pos.params["asset_cfg"].joint_names
+        # Use inherited num_arm_joints from parent
+        self.action_space = self.num_arm_joints
+        # State space and observation space for Rizon 4s with Grav gripper (7 DOF arm + 1 gripper)
+        # State: 7 joint pos + 7 joint vel + 3 shaft pos + 4 shaft quat + 3 gear pos + 4 gear quat = 28
+        # For critic: additional gear observations
+        self.state_space = 28
+        # Observation: 7 joint pos + 7 joint vel + 3 shaft pos + 4 shaft quat = 21
+        self.observation_space = 21
+
+        # Set joint_action_scale from the existing arm_action.scale
+        self.joint_action_scale = self.actions.arm_action.scale
+
+        # Dynamically generate action_scale_joint_space based on action_space
+        self.action_scale_joint_space = [self.joint_action_scale] * self.action_space
+
+        # Override robot initial pose for ROS inference (fixed pose, no randomization)
+        # Joint positions and pos are inherited from parent, only override rotation to be deterministic
+        self.scene.robot.init_state.rot = (1.0, 0.0, 0.0, 0.0)
+
+        # Override gear base initial pose (fixed pose for ROS inference)
+        # Position configured for Rizon 4s workspace
+        self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(0.5, 0.0, -0.1),
+            rot=(-0.70711, 0.0, 0.0, 0.70711),
+        )
+
+        # Override gear initial poses (fixed poses for ROS inference)
+        # Small gear
+        self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(0.5, 0.0, -0.1),
+            rot=(-0.70711, 0.0, 0.0, 0.70711),
+        )
+
+        # Medium gear
+        self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(0.5, 0.0, -0.1),
+            rot=(-0.70711, 0.0, 0.0, 0.70711),
+        )
+
+        # Large gear
+        self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(0.5, 0.0, -0.1),
+            rot=(-0.70711, 0.0, 0.0, 0.70711),
+        )
+
+        # Fixed asset parameters for ROS inference - derived from configuration
+        # These parameters are used by the ROS inference node to validate the environment setup
+        # and apply appropriate noise models for robust real-world deployment.
+        # Derive position center from gear base init state
+        self.fixed_asset_init_pos_center = list(self.scene.factory_gear_base.init_state.pos)
+        # Derive position range from parent's randomize_gears_and_base_pose event pose_range
+        pose_range = self.events.randomize_gears_and_base_pose.params["pose_range"]
+        self.fixed_asset_init_pos_range = [
+            pose_range["x"][1],  # max value
+            pose_range["y"][1],  # max value
+            pose_range["z"][1],  # max value
+        ]
+        # Orientation in degrees (quaternion (-0.70711, 0.0, 0.0, 0.70711) = -90° around Z)
+        self.fixed_asset_init_orn_deg = [0.0, 0.0, -90.0]
+        # Derive orientation range from parent's pose_range (radians to degrees)
+        self.fixed_asset_init_orn_deg_range = [
+            math.degrees(pose_range["roll"][1]),  # convert radians to degrees
+            math.degrees(pose_range["pitch"][1]),
+            math.degrees(pose_range["yaw"][1]),
+        ]
+        # Derive observation noise level from parent's gear_shaft_pos noise configuration
+        gear_shaft_pos_noise = self.observations.policy.gear_shaft_pos.noise.noise_cfg.n_max
+        self.fixed_asset_pos_obs_noise_level = [
+            gear_shaft_pos_noise,
+            gear_shaft_pos_noise,
+            gear_shaft_pos_noise,
+        ]

From 75fd33118d5e1f3480cd2f627f0d062b85d453eb Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 9 Feb 2026 12:27:33 -0800
Subject: [PATCH 02/87] Fixes for Flexiv gear insertion env.

---
 .../isaaclab_assets/robots/flexiv.py          |  33 +--
 .../config/rizon_4s/joint_pos_env_cfg.py      | 200 ++++++++++--------
 .../manipulation/deploy/mdp/events.py         |  21 +-
 3 files changed, 146 insertions(+), 108 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index d0fa786c0728..de3cb72b3818 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -102,25 +102,26 @@
     init_state=ArticulationCfg.InitialStateCfg(
         joint_pos={
             "joint1": 0.0,
-            "joint2": -0.785,
+            "joint2": -0.698,
             "joint3": 0.0,
-            "joint4": -1.571,
+            "joint4": 1.571,
             "joint5": 0.0,
-            "joint6": 0.785,
+            "joint6": 0.698,
             "joint7": 0.0,
-            "finger_joint": math.radians(45.0),
-            ".*_knuckle_joint": 0.0,
+            "finger_joint": 0.0,
+            "left_outer_finger_joint": 0.0,
+            "right_outer_finger_joint": 0.0,
         },
         pos=(0.0, 0.0, 0.0),
-        rot=(1.0, 0.0, 0.0, 0.0),
+        rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
         "shoulder": ImplicitActuatorCfg(
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=1320.0,
-            damping=72.0,
+            stiffness=6000.0,
+            damping=108.5,
             friction=0.0,
             armature=0.0,
         ),
@@ -128,8 +129,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=600.0,
-            damping=35.0,
+            stiffness=4200.0,
+            damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
@@ -137,17 +138,17 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=216.0,
-            damping=29.0,
+            stiffness=1500.0,
+            damping=54.2,
             friction=0.0,
             armature=0.0,
         ),
         "gripper_drive": ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=10.0,
-            velocity_limit_sim=2.0,
-            stiffness=40.0,
-            damping=1.0,
+            effort_limit_sim=200.0,
+            velocity_limit_sim=0.6,
+            stiffness=2e3,
+            damping=1e1,
             friction=0.0,
             armature=0.0,
         ),
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 11104f2968ec..505033f49c9d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -9,7 +9,7 @@
 
 import isaaclab.sim as sim_utils
 from isaaclab.actuators import ImplicitActuatorCfg
-from isaaclab.assets import ArticulationCfg
+from isaaclab.assets import ArticulationCfg, RigidObjectCfg
 from isaaclab.managers import EventTermCfg as EventTerm
 from isaaclab.managers import SceneEntityCfg
 from isaaclab.utils import configclass
@@ -29,37 +29,44 @@
 ##
 
 
-def set_finger_joint_pos_grav_gripper(
+def set_finger_joint_pos_grav(
     joint_pos: torch.Tensor,
     reset_ind_joint_pos: list[int],
     finger_joints: list[int],
     finger_joint_position: float,
 ):
-    """Set finger joint positions for Flexiv Grav gripper.
-
-    The Grav gripper has a single main actuation joint (finger_joint).
-    The finger_joint_position is expected to be in the range [0, 1] where:
-    - 0.0 corresponds to fully closed (-8.88 degrees = -0.155 rad)
-    - 1.0 corresponds to fully opened (45 degrees = 0.785 rad)
+    """Set finger joint positions for Grav gripper.
 
     Args:
         joint_pos: Joint positions tensor
         reset_ind_joint_pos: Row indices into the sliced joint_pos tensor
-        finger_joints: List of finger joint indices (only first one is used for Grav)
-        finger_joint_position: Target position for finger joints (0-1 normalized)
+        finger_joints: List of all gripper joint indices (6 joints total)
+        finger_joint_position: Target position for main finger joint (in radians)
+
+    Note:
+        Grav gripper joint structure (indices from finger_joints list):
+        [0] finger_joint - main controllable joint
+        [1] left_inner_knuckle_joint - mimic with -1 gearing
+        [2] right_inner_knuckle_joint - mimic with -1 gearing
+        [3] right_outer_knuckle_joint - mimic with -1 gearing
+        [4] left_outer_finger_joint - mimic with +1 gearing
+        [5] right_outer_finger_joint - mimic with +1 gearing
     """
-    # Grav gripper joint limits (in radians)
-    GRAV_CLOSED_POS = math.radians(-8.88)  # -0.155 rad
-    GRAV_OPENED_POS = math.radians(45.0)   # 0.785 rad
+    for idx in reset_ind_joint_pos:
+        if len(finger_joints) < 6:
+            raise ValueError(f"Grav gripper requires at least 6 finger joints, got {len(finger_joints)}")
 
-    # Map normalized position to actual joint position
-    # finger_joint_position of 0 = closed, 1 = opened
-    actual_pos = GRAV_CLOSED_POS + finger_joint_position * (GRAV_OPENED_POS - GRAV_CLOSED_POS)
+        # Main controllable joint
+        joint_pos[idx, finger_joints[0]] = finger_joint_position
 
-    for idx in reset_ind_joint_pos:
-        if len(finger_joints) > 0:
-            # Set the main finger_joint
-            joint_pos[idx, finger_joints[0]] = actual_pos
+        # Mimic joints with -1 gearing
+        joint_pos[idx, finger_joints[1]] = finger_joint_position  # left_inner_knuckle_joint
+        joint_pos[idx, finger_joints[2]] = finger_joint_position  # right_inner_knuckle_joint
+        joint_pos[idx, finger_joints[3]] = finger_joint_position  # right_outer_knuckle_joint
+
+        # Mimic joints with +1 gearing
+        joint_pos[idx, finger_joints[4]] = -finger_joint_position  # left_outer_finger_joint
+        joint_pos[idx, finger_joints[5]] = -finger_joint_position  # right_outer_finger_joint
 
 
 ##
@@ -71,30 +78,32 @@ def set_finger_joint_pos_grav_gripper(
 class EventCfg:
     """Configuration for events."""
 
-    robot_joint_stiffness_and_damping = EventTerm(
-        func=mdp.randomize_actuator_gains,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg(
-                "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
-            ),  # only the arm joints are randomized
-            "stiffness_distribution_params": (0.75, 1.5),
-            "damping_distribution_params": (0.3, 3.0),
-            "operation": "scale",
-            "distribution": "log_uniform",
-        },
-    )
-
-    joint_friction = EventTerm(
-        func=mdp.randomize_joint_parameters,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
-            "friction_distribution_params": (0.3, 0.7),
-            "operation": "add",
-            "distribution": "uniform",
-        },
-    )
+    # NOTE: Domain randomization for actuator gains and joint friction disabled for stability.
+    # Re-enable once the base simulation is stable and working.
+    # robot_joint_stiffness_and_damping = EventTerm(
+    #     func=mdp.randomize_actuator_gains,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg(
+    #             "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
+    #         ),
+    #         "stiffness_distribution_params": (0.75, 1.5),
+    #         "damping_distribution_params": (0.3, 3.0),
+    #         "operation": "scale",
+    #         "distribution": "log_uniform",
+    #     },
+    # )
+
+    # joint_friction = EventTerm(
+    #     func=mdp.randomize_joint_parameters,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+    #         "friction_distribution_params": (0.3, 0.7),
+    #         "operation": "add",
+    #         "distribution": "uniform",
+    #     },
+    # )
 
     small_gear_physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,
@@ -137,8 +146,8 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
-            "static_friction_range": (0.75, 0.75),
-            "dynamic_friction_range": (0.75, 0.75),
+            "static_friction_range": (0.0, 0.0),
+            "dynamic_friction_range": (0.0, 0.0),
             "restitution_range": (0.0, 0.0),
             "num_buckets": 16,
         },
@@ -149,8 +158,8 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("robot", body_names=".*finger.*"),
-            "static_friction_range": (0.75, 0.75),
-            "dynamic_friction_range": (0.75, 0.75),
+            "static_friction_range": (3.0, 3.0),
+            "dynamic_friction_range": (3.0, 3.0),
             "restitution_range": (0.0, 0.0),
             "num_buckets": 16,
         },
@@ -208,18 +217,17 @@ def __post_init__(self):
         super().__post_init__()
 
         # Robot-specific parameters for Flexiv Rizon 4s with Grav gripper
-        # End effector body name - using the gripper finger tip for accurate grasp positioning
-        self.end_effector_body_name = "right_finger_tip"
+        self.end_effector_body_name = "link7"  # End effector body name for IK
         self.num_arm_joints = 7  # Number of arm joints (Rizon 4s has 7 DOF)
-        # Rotation offset for grasp pose (quaternion [w, x, y, z])
-        # Rizon 4s end-effector is along z-direction, so we need to rotate for downward facing
+        # Rotation offset for grasp pose (quaternion [x, y, z, w])
+        # Computed from IK convergence for downward-facing end effector
         self.grasp_rot_offset = [
-            0.0,
-            1.0,
+            -0.707,
+            0.707,
             0.0,
             0.0,
-        ]  # 180 degrees around X axis for downward facing EE
-        self.gripper_joint_setter_func = set_finger_joint_pos_grav_gripper  # Grav gripper joint setter function
+        ]
+        self.gripper_joint_setter_func = set_finger_joint_pos_grav  # Grav gripper joint setter function
 
         # Gear orientation termination thresholds (in degrees)
         self.gear_orientation_roll_threshold_deg = 15.0  # Maximum allowed roll deviation
@@ -261,8 +269,8 @@ def __post_init__(self):
         )
 
         # Action configuration for Rizon 4s arm
-        # Using smaller action scale similar to UR10e (0.025) for stability
-        self.joint_action_scale = 0.025
+        # Using smaller action scale for stability
+        self.joint_action_scale = 0.01
         self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
             asset_name="robot",
             joint_names=[
@@ -290,43 +298,38 @@ def __post_init__(self):
                     max_linear_velocity=1000.0,
                     max_angular_velocity=3666.0,
                     enable_gyroscopic_forces=True,
-                    solver_position_iteration_count=16,
+                    solver_position_iteration_count=4,
                     solver_velocity_iteration_count=1,
                     max_contact_impulse=1e32,
                 ),
                 articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-                    enabled_self_collisions=False, solver_position_iteration_count=16, solver_velocity_iteration_count=1
+                    enabled_self_collisions=False, solver_position_iteration_count=4, solver_velocity_iteration_count=1
                 ),
                 collision_props=sim_utils.CollisionPropertiesCfg(contact_offset=0.005, rest_offset=0.0),
             ),
-            # Initial joint positions for Rizon 4s with Grav gripper - configured for gear assembly task
-            # These positions place the arm in a suitable configuration for grasping gears
+            # Joint positions based on IK from center of distribution for randomized gear positions
             init_state=ArticulationCfg.InitialStateCfg(
                 joint_pos={
                     "joint1": 0.0,
-                    "joint2": -0.785,  # -45 degrees
+                    "joint2": -0.698,
                     "joint3": 0.0,
-                    "joint4": -1.571,  # -90 degrees
+                    "joint4": 1.571,
                     "joint5": 0.0,
-                    "joint6": 0.785,  # 45 degrees
+                    "joint6": 0.698,
                     "joint7": 0.0,
-                    # Grav gripper - start partially open for grasp
-                    "finger_joint": math.radians(30.0),
-                    # Initialize passive joints
-                    ".*_knuckle_joint": 0.0,
                 },
                 pos=(0.0, 0.0, 0.0),
-                rot=(1.0, 0.0, 0.0, 0.0),
+                rot=(0.0, 0.0, 0.0, 1.0),
             ),
         )
 
-        # Grav gripper main actuator configuration for gear manipulation
+        # Grav gripper actuator configuration for gear manipulation
         self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=10.0,
-            velocity_limit_sim=2.0,
-            stiffness=40.0,
-            damping=1.0,
+            effort_limit_sim=2.0,
+            velocity_limit_sim=1.0,
+            stiffness=2e3,
+            damping=1e1,
             friction=0.0,
             armature=0.0,
         )
@@ -342,30 +345,45 @@ def __post_init__(self):
             armature=0.0,
         )
 
+        # Override gear initial states for Rizon (closer to robot, centered)
+        self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(-0.6, 0.0, -0.1),
+            rot=(0.0, 0.0, 0.70711, 0.70711),
+        )
+        self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(-0.6, 0.0, -0.1),
+            rot=(0.0, 0.0, 0.70711, 0.70711),
+        )
+        self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(-0.6, 0.0, -0.1),
+            rot=(0.0, 0.0, 0.70711, 0.70711),
+        )
+        self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=(-0.6, 0.0, -0.1),
+            rot=(0.0, 0.0, 0.70711, 0.70711),
+        )
+
         # Gear offsets and grasp positions for Rizon 4s with Grav gripper
-        # These offsets are relative to the end effector frame (finger tip)
-        # Z offset accounts for the gripper length from flange to finger tip
+        # These offsets are relative to the end effector frame (link7)
+        # Z offset accounts for the gripper length from link7 to finger tip
         self.gear_offsets_grasp = {
-            "gear_small": [0.0, self.gear_offsets["gear_small"][0], -0.12],
-            "gear_medium": [0.0, self.gear_offsets["gear_medium"][0], -0.12],
-            "gear_large": [0.0, self.gear_offsets["gear_large"][0], -0.12],
+            "gear_small": [0.0, -self.gear_offsets["gear_small"][0], -0.35],
+            "gear_medium": [0.0, -self.gear_offsets["gear_medium"][0], -0.35],
+            "gear_large": [0.0, -self.gear_offsets["gear_large"][0], -0.35],
         }
 
-        # Grasp widths for Grav gripper (normalized 0-1 values)
-        # These values determine the finger opening for different gear sizes
-        # Grav gripper range: -8.88° (closed) to 45° (open)
+        # Grasp widths for Grav gripper (raw radian values for finger_joint)
         self.hand_grasp_width = {
-            "gear_small": 0.7,   # More open for small gear
-            "gear_medium": 0.5,  # Medium opening
-            "gear_large": 0.4,   # Less open for large gear
+            "gear_small": 0.05,
+            "gear_medium": 0.2,
+            "gear_large": 0.28,
         }
 
-        # Close widths for Grav gripper (normalized 0-1 values)
-        # Slightly tighter than grasp width for secure grip
+        # Close widths for Grav gripper (raw radian values for finger_joint)
         self.hand_close_width = {
-            "gear_small": 0.75,
-            "gear_medium": 0.55,
-            "gear_large": 0.45,
+            "gear_small": 0.0,
+            "gear_medium": 0.139626,
+            "gear_large": 0.139626,
         }
 
         # Populate event term parameters
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
index 503fd3af75d8..6acf4489e8cb 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
@@ -216,7 +216,7 @@ def __call__(
         robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
         pos_threshold: float = 1e-6,
         rot_threshold: float = 1e-6,
-        max_iterations: int = 10,
+        max_iterations: int = 50,
         pos_randomization_range: dict | None = None,
         gear_offsets_grasp: dict | None = None,
         end_effector_body_name: str | None = None,
@@ -340,6 +340,22 @@ def __call__(
 
             # Update joint positions
             joint_pos = joint_pos + delta_dof_pos
+
+            # Wrap arm joint positions to fall within robot's actual joint limits
+            joint_pos_limits = self.robot_asset.data.joint_pos_limits[env_ids, :self.num_arm_joints, :]
+            joint_min = joint_pos_limits[:, :, 0]
+            joint_max = joint_pos_limits[:, :, 1]
+            joint_range = joint_max - joint_min
+
+            # Wrap only the arm joint positions (not gripper joints)
+            arm_joint_pos = joint_pos[:, :self.num_arm_joints]
+            arm_joint_pos = torch.where(
+                joint_range > 0,
+                joint_min + torch.remainder(arm_joint_pos - joint_min, joint_range),
+                arm_joint_pos,
+            )
+            joint_pos[:, :self.num_arm_joints] = arm_joint_pos
+
             joint_vel = torch.zeros_like(joint_pos)
 
             # Write to sim
@@ -348,6 +364,9 @@ def __call__(
             self.robot_asset.write_joint_position_to_sim_index(position=joint_pos, env_ids=env_ids)
             self.robot_asset.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids)
 
+        # Reset joint velocities to zero after IK convergence
+        joint_vel = torch.zeros_like(self.robot_asset.data.joint_vel[env_ids])
+
         # Set gripper to grasp position
         joint_pos = wp.to_torch(self.robot_asset.data.joint_pos)[env_ids].clone()
 

From c566bab38d5360a7fa7139615d050f45dd98582c Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 9 Feb 2026 12:46:34 -0800
Subject: [PATCH 03/87] Quaternion fix.

---
 .../isaaclab_tasks/direct/automate/factory_control.py           | 2 +-
 .../gear_assembly/config/rizon_4s/ros_inference_env_cfg.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py b/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
index 2d7d98e0fb0e..cea15cdf6946 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
@@ -116,7 +116,7 @@ def get_pose_error(
 
         fingertip_midpoint_quat_norm = quat_mul(fingertip_midpoint_quat, quat_conjugate(fingertip_midpoint_quat))[
             :, 3
-        ]  # W component is at index 3 in XYZW format
+        ]  # scalar component (w component in XYZW format)
         fingertip_midpoint_quat_inv = quat_conjugate(fingertip_midpoint_quat) / fingertip_midpoint_quat_norm.unsqueeze(
             -1
         )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index 67ff5f9e79dd..e66dbe07a8e5 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -48,7 +48,7 @@ def __post_init__(self):
 
         # Override robot initial pose for ROS inference (fixed pose, no randomization)
         # Joint positions and pos are inherited from parent, only override rotation to be deterministic
-        self.scene.robot.init_state.rot = (1.0, 0.0, 0.0, 0.0)
+        self.scene.robot.init_state.rot = (0.0, 0.0, 0.0, 1.0)  # Identity quaternion (x, y, z, w)
 
         # Override gear base initial pose (fixed pose for ROS inference)
         # Position configured for Rizon 4s workspace

From bf1655626fffa59ff48348a4883bf9738c7b3e0f Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 9 Feb 2026 12:56:03 -0800
Subject: [PATCH 04/87] Reverted stiffness and damping.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index de3cb72b3818..994e1a311688 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -120,8 +120,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=6000.0,
-            damping=108.5,
+            stiffness=1320.0,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -129,8 +129,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=4200.0,
-            damping=90.7,
+            stiffness=600.0,
+            damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -138,8 +138,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=1500.0,
-            damping=54.2,
+            stiffness=216.0,
+            damping=29.0,
             friction=0.0,
             armature=0.0,
         ),

From 7cfa46c02a730bac967ac03e32878cc8704fe708 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 24 Feb 2026 10:54:49 -0800
Subject: [PATCH 05/87] Minor fix for gear insertion as per warp changes.

---
 .../manager_based/manipulation/deploy/mdp/events.py           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
index 6acf4489e8cb..a8cf6b306099 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
@@ -342,7 +342,7 @@ def __call__(
             joint_pos = joint_pos + delta_dof_pos
 
             # Wrap arm joint positions to fall within robot's actual joint limits
-            joint_pos_limits = self.robot_asset.data.joint_pos_limits[env_ids, :self.num_arm_joints, :]
+            joint_pos_limits = wp.to_torch(self.robot_asset.data.joint_pos_limits)[env_ids, :self.num_arm_joints, :]
             joint_min = joint_pos_limits[:, :, 0]
             joint_max = joint_pos_limits[:, :, 1]
             joint_range = joint_max - joint_min
@@ -365,7 +365,7 @@ def __call__(
             self.robot_asset.write_joint_velocity_to_sim_index(velocity=joint_vel, env_ids=env_ids)
 
         # Reset joint velocities to zero after IK convergence
-        joint_vel = torch.zeros_like(self.robot_asset.data.joint_vel[env_ids])
+        joint_vel = torch.zeros_like(wp.to_torch(self.robot_asset.data.joint_vel)[env_ids])
 
         # Set gripper to grasp position
         joint_pos = wp.to_torch(self.robot_asset.data.joint_pos)[env_ids].clone()

From c7bfbcf74ec6f995643d5fef8299553e8a5690d3 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 25 Feb 2026 13:16:02 -0800
Subject: [PATCH 06/87] Minor fix as per quaternion format change.

---
 .../manager_based/manipulation/deploy/mdp/observations.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
index e70b77d7d8f9..92f933123f69 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/observations.py
@@ -186,7 +186,7 @@ def __call__(
 
         # Ensure w component is positive (q and -q represent the same rotation)
         # Pick one canonical form to reduce observation variation seen by the policy
-        w_negative = base_quat[:, 0] < 0
+        w_negative = base_quat[:, 3] < 0
         positive_quat = base_quat.clone()
         positive_quat[w_negative] = -base_quat[w_negative]
 
@@ -335,7 +335,7 @@ def __call__(self, env: ManagerBasedRLEnv) -> torch.Tensor:
 
         # Ensure w component is positive (q and -q represent the same rotation)
         # Pick one canonical form to reduce observation variation seen by the policy
-        w_negative = gear_quat[:, 0] < 0
+        w_negative = gear_quat[:, 3] < 0
         gear_positive_quat = gear_quat.clone()
         gear_positive_quat[w_negative] = -gear_quat[w_negative]
 

From e6be50df480b63a2124dac8ead0636a5af54b415 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 25 Feb 2026 16:32:04 -0800
Subject: [PATCH 07/87] Minor fix as per quaternion format change.

---
 .../config/rizon_4s/ros_inference_env_cfg.py           | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index e66dbe07a8e5..9517446f4fad 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -54,26 +54,26 @@ def __post_init__(self):
         # Position configured for Rizon 4s workspace
         self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
             pos=(0.5, 0.0, -0.1),
-            rot=(-0.70711, 0.0, 0.0, 0.70711),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Override gear initial poses (fixed poses for ROS inference)
         # Small gear
         self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
             pos=(0.5, 0.0, -0.1),
-            rot=(-0.70711, 0.0, 0.0, 0.70711),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Medium gear
         self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
             pos=(0.5, 0.0, -0.1),
-            rot=(-0.70711, 0.0, 0.0, 0.70711),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Large gear
         self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
             pos=(0.5, 0.0, -0.1),
-            rot=(-0.70711, 0.0, 0.0, 0.70711),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Fixed asset parameters for ROS inference - derived from configuration
@@ -88,7 +88,7 @@ def __post_init__(self):
             pose_range["y"][1],  # max value
             pose_range["z"][1],  # max value
         ]
-        # Orientation in degrees (quaternion (-0.70711, 0.0, 0.0, 0.70711) = -90° around Z)
+        # Orientation in degrees (quaternion (0.0, 0.0, 0.70711, -0.70711) = -90° around Z)
         self.fixed_asset_init_orn_deg = [0.0, 0.0, -90.0]
         # Derive orientation range from parent's pose_range (radians to degrees)
         self.fixed_asset_init_orn_deg_range = [

From c7b3ae3a07b9835092d7595d74b6357470816fa7 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 25 Feb 2026 16:35:57 -0800
Subject: [PATCH 08/87] Removed comments.

---
 source/isaaclab_assets/isaaclab_assets/robots/flexiv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 994e1a311688..c351ac3366e7 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -87,7 +87,7 @@
 
 FLEXIV_RIZON4S_GRAV_GRIPPER_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
-        usd_path="/home/shauryad/workspaces/rl_policy/isaac_sim_ws/exts/isaacsim.robot.manipulators.examples/data/flexiv/Rizon4s_with_Grav.usd",
+        usd_path=f"{ISAACLAB_NUCLEUS_DIR}/Robots/Flexiv/Rizon4s/rizon4s_with_grav.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
             disable_gravity=False,
             max_depenetration_velocity=5.0,

From 8a42c1093bfe99ec6977b86cf9ec0a7c411de73f Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 26 Feb 2026 17:02:12 -0800
Subject: [PATCH 09/87] Minor param fixes for Flexiv gear insertion.

---
 .../gear_assembly/config/rizon_4s/joint_pos_env_cfg.py    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 505033f49c9d..62b952b35b9d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -146,8 +146,8 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
-            "static_friction_range": (0.0, 0.0),
-            "dynamic_friction_range": (0.0, 0.0),
+            "static_friction_range": (0.75, 0.75),
+            "dynamic_friction_range": (0.75, 0.75),
             "restitution_range": (0.0, 0.0),
             "num_buckets": 16,
         },
@@ -270,7 +270,7 @@ def __post_init__(self):
 
         # Action configuration for Rizon 4s arm
         # Using smaller action scale for stability
-        self.joint_action_scale = 0.01
+        self.joint_action_scale = 0.025
         self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
             asset_name="robot",
             joint_names=[
@@ -326,7 +326,7 @@ def __post_init__(self):
         # Grav gripper actuator configuration for gear manipulation
         self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=2.0,
+            effort_limit_sim=200.0,
             velocity_limit_sim=1.0,
             stiffness=2e3,
             damping=1e1,

From f10c7d393d240928c02a3d4cac7b28099c11cec1 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 27 Feb 2026 13:13:23 -0800
Subject: [PATCH 10/87] Fixed some params and reduced some randomizations.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 62b952b35b9d..e83bcc5b56ad 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -146,8 +146,8 @@ class EventCfg:
         mode="startup",
         params={
             "asset_cfg": SceneEntityCfg("factory_gear_base", body_names=".*"),
-            "static_friction_range": (0.75, 0.75),
-            "dynamic_friction_range": (0.75, 0.75),
+            "static_friction_range": (0.0, 0.0),
+            "dynamic_friction_range": (0.0, 0.0),
             "restitution_range": (0.0, 0.0),
             "num_buckets": 16,
         },
@@ -179,16 +179,20 @@ class EventCfg:
         params={
             "pose_range": {
                 "x": [-0.1, 0.1],
-                "y": [-0.25, 0.25],
+                # "y": [-0.25, 0.25],
+                "y": [-0.1, 0.1],
                 "z": [-0.1, 0.1],
                 "roll": [-math.pi / 90, math.pi / 90],  # 2 degree
                 "pitch": [-math.pi / 90, math.pi / 90],  # 2 degree
                 "yaw": [-math.pi / 6, math.pi / 6],  # 30 degree
             },
             "gear_pos_range": {
-                "x": [-0.02, 0.02],
-                "y": [-0.02, 0.02],
-                "z": [0.0575, 0.0775],  # 0.045 + 0.0225
+                # "x": [-0.02, 0.02],
+                # "y": [-0.02, 0.02],
+                # "z": [0.0575, 0.0775],  # 0.045 + 0.0225
+                "x": [-0.0, 0.0],
+                "y": [-0.0, 0.0],
+                "z": [0.0675, 0.0675],  # 0.045 + 0.0225
             },
             "velocity_range": {},
         },
@@ -199,7 +203,8 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
         },
     )
 
@@ -326,7 +331,7 @@ def __post_init__(self):
         # Grav gripper actuator configuration for gear manipulation
         self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=200.0,
+            effort_limit_sim=2.0,
             velocity_limit_sim=1.0,
             stiffness=2e3,
             damping=1e1,

From 83bb7366f0e4468382574129f8bab21e063fb978 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 27 Feb 2026 13:41:23 -0800
Subject: [PATCH 11/87] Fixed init state of setup.

---
 .../config/rizon_4s/joint_pos_env_cfg.py             | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index e83bcc5b56ad..c2e937f8661e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -352,19 +352,23 @@ def __post_init__(self):
 
         # Override gear initial states for Rizon (closer to robot, centered)
         self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(-0.6, 0.0, -0.1),
+            # pos=(-0.6, 0.0, -0.1),
+            pos=(0.63, -0.083, -0.1),
             rot=(0.0, 0.0, 0.70711, 0.70711),
         )
         self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(-0.6, 0.0, -0.1),
+            # pos=(-0.6, 0.0, -0.1),
+            pos=(0.63, -0.083, -0.1),
             rot=(0.0, 0.0, 0.70711, 0.70711),
         )
         self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(-0.6, 0.0, -0.1),
+            # pos=(-0.6, 0.0, -0.1),
+            pos=(0.63, -0.083, -0.1),
             rot=(0.0, 0.0, 0.70711, 0.70711),
         )
         self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(-0.6, 0.0, -0.1),
+            # pos=(-0.6, 0.0, -0.1),
+            pos=(0.63, -0.083, -0.1),
             rot=(0.0, 0.0, 0.70711, 0.70711),
         )
 

From bb894b6b276288aaf703a4f54f2fb36e5b818ea4 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 27 Feb 2026 13:42:01 -0800
Subject: [PATCH 12/87] Large gear only.

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index c2e937f8661e..8e31e2255ba9 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -168,7 +168,8 @@ class EventCfg:
     randomize_gear_type = EventTerm(
         func=gear_assembly_events.randomize_gear_type,
         mode="reset",
-        params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+        # params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+        params={"gear_types": ["gear_large"]},
     )
 
     reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")

From 3af04a752017fde0c457f685635dedcac6eca3c0 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 2 Mar 2026 16:33:17 -0800
Subject: [PATCH 13/87] Added gear randomizations.

---
 .../config/rizon_4s/joint_pos_env_cfg.py         | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 8e31e2255ba9..27fe8e9d1e94 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -180,20 +180,20 @@ class EventCfg:
         params={
             "pose_range": {
                 "x": [-0.1, 0.1],
-                # "y": [-0.25, 0.25],
-                "y": [-0.1, 0.1],
+                "y": [-0.25, 0.25],
+                # "y": [-0.1, 0.1],
                 "z": [-0.1, 0.1],
                 "roll": [-math.pi / 90, math.pi / 90],  # 2 degree
                 "pitch": [-math.pi / 90, math.pi / 90],  # 2 degree
                 "yaw": [-math.pi / 6, math.pi / 6],  # 30 degree
             },
             "gear_pos_range": {
-                # "x": [-0.02, 0.02],
-                # "y": [-0.02, 0.02],
-                # "z": [0.0575, 0.0775],  # 0.045 + 0.0225
-                "x": [-0.0, 0.0],
-                "y": [-0.0, 0.0],
-                "z": [0.0675, 0.0675],  # 0.045 + 0.0225
+                "x": [-0.02, 0.02],
+                "y": [-0.02, 0.02],
+                "z": [0.0575, 0.0775],  # 0.045 + 0.0225
+                # "x": [-0.0, 0.0],
+                # "y": [-0.0, 0.0],
+                # "z": [0.0675, 0.0675],  # 0.045 + 0.0225
             },
             "velocity_range": {},
         },

From 7320117623b0be2021da16164285caa1983410a6 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 2 Mar 2026 16:34:48 -0800
Subject: [PATCH 14/87] Added stiffness, damping and friction randomization.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 27fe8e9d1e94..8cb5f66eab51 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -80,30 +80,30 @@ class EventCfg:
 
     # NOTE: Domain randomization for actuator gains and joint friction disabled for stability.
     # Re-enable once the base simulation is stable and working.
-    # robot_joint_stiffness_and_damping = EventTerm(
-    #     func=mdp.randomize_actuator_gains,
-    #     mode="reset",
-    #     params={
-    #         "asset_cfg": SceneEntityCfg(
-    #             "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
-    #         ),
-    #         "stiffness_distribution_params": (0.75, 1.5),
-    #         "damping_distribution_params": (0.3, 3.0),
-    #         "operation": "scale",
-    #         "distribution": "log_uniform",
-    #     },
-    # )
-
-    # joint_friction = EventTerm(
-    #     func=mdp.randomize_joint_parameters,
-    #     mode="reset",
-    #     params={
-    #         "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
-    #         "friction_distribution_params": (0.3, 0.7),
-    #         "operation": "add",
-    #         "distribution": "uniform",
-    #     },
-    # )
+    robot_joint_stiffness_and_damping = EventTerm(
+        func=mdp.randomize_actuator_gains,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg(
+                "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
+            ),
+            "stiffness_distribution_params": (0.75, 1.5),
+            "damping_distribution_params": (0.3, 3.0),
+            "operation": "scale",
+            "distribution": "log_uniform",
+        },
+    )
+
+    joint_friction = EventTerm(
+        func=mdp.randomize_joint_parameters,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+            "friction_distribution_params": (0.3, 0.7),
+            "operation": "add",
+            "distribution": "uniform",
+        },
+    )
 
     small_gear_physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,

From 11bff1d521a3516155a00e84670d5457de01d2fd Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 2 Mar 2026 16:38:48 -0800
Subject: [PATCH 15/87] All gears.

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 8cb5f66eab51..7ad19b348ab2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -168,8 +168,8 @@ class EventCfg:
     randomize_gear_type = EventTerm(
         func=gear_assembly_events.randomize_gear_type,
         mode="reset",
-        # params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
-        params={"gear_types": ["gear_large"]},
+        params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+        # params={"gear_types": ["gear_large"]},
     )
 
     reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")

From 4cb84e0dc59ecb82dee27fea14d8f49bb4075cd4 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 2 Mar 2026 16:41:48 -0800
Subject: [PATCH 16/87] Removed param randomization.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 7ad19b348ab2..959cbd9dd89d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -80,30 +80,30 @@ class EventCfg:
 
     # NOTE: Domain randomization for actuator gains and joint friction disabled for stability.
     # Re-enable once the base simulation is stable and working.
-    robot_joint_stiffness_and_damping = EventTerm(
-        func=mdp.randomize_actuator_gains,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg(
-                "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
-            ),
-            "stiffness_distribution_params": (0.75, 1.5),
-            "damping_distribution_params": (0.3, 3.0),
-            "operation": "scale",
-            "distribution": "log_uniform",
-        },
-    )
-
-    joint_friction = EventTerm(
-        func=mdp.randomize_joint_parameters,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
-            "friction_distribution_params": (0.3, 0.7),
-            "operation": "add",
-            "distribution": "uniform",
-        },
-    )
+    # robot_joint_stiffness_and_damping = EventTerm(
+    #     func=mdp.randomize_actuator_gains,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg(
+    #             "robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]
+    #         ),
+    #         "stiffness_distribution_params": (0.75, 1.5),
+    #         "damping_distribution_params": (0.3, 3.0),
+    #         "operation": "scale",
+    #         "distribution": "log_uniform",
+    #     },
+    # )
+
+    # joint_friction = EventTerm(
+    #     func=mdp.randomize_joint_parameters,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+    #         "friction_distribution_params": (0.3, 0.7),
+    #         "operation": "add",
+    #         "distribution": "uniform",
+    #     },
+    # )
 
     small_gear_physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,

From a09520e7b8bd70e4e47d8f353309d68b5d99d84a Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 3 Mar 2026 13:07:44 -0800
Subject: [PATCH 17/87] Added gear randomization within gripper.

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 959cbd9dd89d..1d05272b3250 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -204,8 +204,8 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
-            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
+            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
         },
     )
 

From cee549dd3db97122977edd4ffe25bce562b505b4 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 3 Mar 2026 13:09:22 -0800
Subject: [PATCH 18/87] Large gear only.

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 1d05272b3250..ae69fd2e8a59 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -168,8 +168,8 @@ class EventCfg:
     randomize_gear_type = EventTerm(
         func=gear_assembly_events.randomize_gear_type,
         mode="reset",
-        params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
-        # params={"gear_types": ["gear_large"]},
+        # params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+        params={"gear_types": ["gear_large"]},
     )
 
     reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")

From 2d4f306ce5b9cc04ac2a56b3c8049bf9d26a6980 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 3 Mar 2026 19:00:42 -0800
Subject: [PATCH 19/87] Changed gears and gear base mean position.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index ae69fd2e8a59..f1193b55181e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -204,8 +204,8 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
-            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
+            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
         },
     )
 
@@ -354,23 +354,27 @@ def __post_init__(self):
         # Override gear initial states for Rizon (closer to robot, centered)
         self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
             # pos=(-0.6, 0.0, -0.1),
-            pos=(0.63, -0.083, -0.1),
-            rot=(0.0, 0.0, 0.70711, 0.70711),
+            # pos=(0.63, -0.083, -0.1),
+            pos=(0.481, -0.073, 0.071),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
         self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
             # pos=(-0.6, 0.0, -0.1),
-            pos=(0.63, -0.083, -0.1),
-            rot=(0.0, 0.0, 0.70711, 0.70711),
+            # pos=(0.63, -0.083, -0.1),
+            pos=(0.481, -0.073, 0.071),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
         self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
             # pos=(-0.6, 0.0, -0.1),
-            pos=(0.63, -0.083, -0.1),
-            rot=(0.0, 0.0, 0.70711, 0.70711),
+            # pos=(0.63, -0.083, -0.1),
+            pos=(0.481, -0.073, 0.071),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
         self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
             # pos=(-0.6, 0.0, -0.1),
-            pos=(0.63, -0.083, -0.1),
-            rot=(0.0, 0.0, 0.70711, 0.70711),
+            # pos=(0.63, -0.083, -0.1),
+            pos=(0.481, -0.073, 0.071),
+            rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Gear offsets and grasp positions for Rizon 4s with Grav gripper

From 77579a8156ef7757180441d4eef4f147afd8b4d1 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 3 Mar 2026 19:01:47 -0800
Subject: [PATCH 20/87] .

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index f1193b55181e..afe58be7bf12 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -168,8 +168,8 @@ class EventCfg:
     randomize_gear_type = EventTerm(
         func=gear_assembly_events.randomize_gear_type,
         mode="reset",
-        # params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
-        params={"gear_types": ["gear_large"]},
+        params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
+        # params={"gear_types": ["gear_large"]},
     )
 
     reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")

From 8b71bd72bf0364fa4f4af753c6535b703bfb00cd Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 4 Mar 2026 16:46:45 -0800
Subject: [PATCH 21/87] Fixed gear base and gears init and increased
 randomization.

---
 .../gear_assembly/config/rizon_4s/joint_pos_env_cfg.py   | 9 ++++-----
 .../config/rizon_4s/ros_inference_env_cfg.py             | 8 ++++----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index afe58be7bf12..0ff37b16f339 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -179,12 +179,11 @@ class EventCfg:
         mode="reset",
         params={
             "pose_range": {
-                "x": [-0.1, 0.1],
+                "x": [-0.2, 0.2],
                 "y": [-0.25, 0.25],
-                # "y": [-0.1, 0.1],
-                "z": [-0.1, 0.1],
-                "roll": [-math.pi / 90, math.pi / 90],  # 2 degree
-                "pitch": [-math.pi / 90, math.pi / 90],  # 2 degree
+                "z": [-0.2, 0.2],
+                "roll": [-math.pi / 60, math.pi / 60],  # 2 degree
+                "pitch": [-math.pi / 60, math.pi / 60],  # 2 degree
                 "yaw": [-math.pi / 6, math.pi / 6],  # 30 degree
             },
             "gear_pos_range": {
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index 9517446f4fad..6ff7f2cb3d7d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -53,26 +53,26 @@ def __post_init__(self):
         # Override gear base initial pose (fixed pose for ROS inference)
         # Position configured for Rizon 4s workspace
         self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.5, 0.0, -0.1),
+            pos=(0.481, -0.073, -0.005),
             rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Override gear initial poses (fixed poses for ROS inference)
         # Small gear
         self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.5, 0.0, -0.1),
+            pos=(0.481, -0.073, -0.005),
             rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Medium gear
         self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.5, 0.0, -0.1),
+            pos=(0.481, -0.073, -0.005),
             rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 
         # Large gear
         self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.5, 0.0, -0.1),
+            pos=(0.481, -0.073, -0.005),
             rot=(0.0, 0.0, 0.70711, -0.70711),
         )
 

From 6be409fd8e0bef34f1a975dd4b044e8da9b9ffc8 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 4 Mar 2026 16:47:44 -0800
Subject: [PATCH 22/87] Increased gripper gear randomization.

---
 .../config/rizon_4s/joint_pos_env_cfg.py             | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 0ff37b16f339..2c587d999873 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -179,11 +179,11 @@ class EventCfg:
         mode="reset",
         params={
             "pose_range": {
-                "x": [-0.2, 0.2],
+                "x": [-0.1, 0.1],
                 "y": [-0.25, 0.25],
-                "z": [-0.2, 0.2],
-                "roll": [-math.pi / 60, math.pi / 60],  # 2 degree
-                "pitch": [-math.pi / 60, math.pi / 60],  # 2 degree
+                "z": [-0.1, 0.1],
+                "roll": [-math.pi / 90, math.pi / 90],  # 2 degree
+                "pitch": [-math.pi / 90, math.pi / 90],  # 2 degree
                 "yaw": [-math.pi / 6, math.pi / 6],  # 30 degree
             },
             "gear_pos_range": {
@@ -203,8 +203,8 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
-            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
+            "pos_randomization_range": {"x": [-0.005, 0.005], "y": [-0.005, 0.005], "z": [-0.005, 0.005]},
+            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
         },
     )
 

From e991802a0d7e90d0d3a5aa4c57b9bbb258889eb8 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 4 Mar 2026 16:50:58 -0800
Subject: [PATCH 23/87] Added friction randomization.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 2c587d999873..8ca6a6a67781 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -94,16 +94,16 @@ class EventCfg:
     #     },
     # )
 
-    # joint_friction = EventTerm(
-    #     func=mdp.randomize_joint_parameters,
-    #     mode="reset",
-    #     params={
-    #         "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
-    #         "friction_distribution_params": (0.3, 0.7),
-    #         "operation": "add",
-    #         "distribution": "uniform",
-    #     },
-    # )
+    joint_friction = EventTerm(
+        func=mdp.randomize_joint_parameters,
+        mode="reset",
+        params={
+            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+            "friction_distribution_params": (0.3, 0.7),
+            "operation": "add",
+            "distribution": "uniform",
+        },
+    )
 
     small_gear_physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,
@@ -203,8 +203,8 @@ class EventCfg:
         mode="reset",
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
-            "pos_randomization_range": {"x": [-0.005, 0.005], "y": [-0.005, 0.005], "z": [-0.005, 0.005]},
-            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
+            # "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.005, 0.005], "z": [-0.003, 0.003]},
+            "pos_randomization_range": {"x": [-0.0, 0.0], "y": [-0.00, 0.00], "z": [-0.00, 0.00]},
         },
     )
 

From 2f55109c592865b64c78525c1f42300347543563 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 4 Mar 2026 16:52:17 -0800
Subject: [PATCH 24/87] Increased observation randomization.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      | 20 +++++++++----------
 .../gear_assembly/gear_assembly_env_cfg.py    |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 8ca6a6a67781..dc09405e27fb 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -94,16 +94,16 @@ class EventCfg:
     #     },
     # )
 
-    joint_friction = EventTerm(
-        func=mdp.randomize_joint_parameters,
-        mode="reset",
-        params={
-            "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
-            "friction_distribution_params": (0.3, 0.7),
-            "operation": "add",
-            "distribution": "uniform",
-        },
-    )
+    # joint_friction = EventTerm(
+    #     func=mdp.randomize_joint_parameters,
+    #     mode="reset",
+    #     params={
+    #         "asset_cfg": SceneEntityCfg("robot", joint_names=["joint[1-2]", "joint[3-4]", "joint[5-7]"]),
+    #         "friction_distribution_params": (0.3, 0.7),
+    #         "operation": "add",
+    #         "distribution": "uniform",
+    #     },
+    # )
 
     small_gear_physics_material = EventTerm(
         func=mdp.randomize_rigid_body_material,
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 6a3fc7f080f4..b5354c057b87 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -191,7 +191,7 @@ class PolicyCfg(ObsGroup):
             func=mdp.gear_shaft_pos_w,
             params={},  # Will be populated in __post_init__
             noise=ResetSampledConstantNoiseModelCfg(
-                noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add")
+                noise_cfg=UniformNoiseCfg(n_min=-0.01, n_max=0.01, operation="add")
             ),
         )
         gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)

From 4c298562419c6580cf1a25df49f9f7879d9e6c66 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 9 Mar 2026 16:07:07 -0700
Subject: [PATCH 25/87] Reduced obs noise, fixed params for better gear
 gripping without slippage.

---
 .../gear_assembly/config/rizon_4s/joint_pos_env_cfg.py    | 8 ++++----
 .../deploy/gear_assembly/gear_assembly_env_cfg.py         | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index dc09405e27fb..3e2e80c9ed66 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -303,8 +303,8 @@ def __post_init__(self):
                     max_linear_velocity=1000.0,
                     max_angular_velocity=3666.0,
                     enable_gyroscopic_forces=True,
-                    solver_position_iteration_count=4,
-                    solver_velocity_iteration_count=1,
+                    solver_position_iteration_count=8,
+                    solver_velocity_iteration_count=2,
                     max_contact_impulse=1e32,
                 ),
                 articulation_props=sim_utils.ArticulationRootPropertiesCfg(
@@ -331,10 +331,10 @@ def __post_init__(self):
         # Grav gripper actuator configuration for gear manipulation
         self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=2.0,
+            effort_limit_sim=50.0,
             velocity_limit_sim=1.0,
             stiffness=2e3,
-            damping=1e1,
+            damping=1e2,
             friction=0.0,
             armature=0.0,
         )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b5354c057b87..6a3fc7f080f4 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -191,7 +191,7 @@ class PolicyCfg(ObsGroup):
             func=mdp.gear_shaft_pos_w,
             params={},  # Will be populated in __post_init__
             noise=ResetSampledConstantNoiseModelCfg(
-                noise_cfg=UniformNoiseCfg(n_min=-0.01, n_max=0.01, operation="add")
+                noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add")
             ),
         )
         gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)

From 8f65f06c1d94b16abc5bc6ac8f67d2e0702e3a62 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 12 Mar 2026 13:04:21 -0700
Subject: [PATCH 26/87] Revert "Reduced obs noise, fixed params for better gear
 gripping without slippage."

This reverts commit c604e873db32e24a50d1358f672f3dda82dc84f0.
---
 .../gear_assembly/config/rizon_4s/joint_pos_env_cfg.py    | 8 ++++----
 .../deploy/gear_assembly/gear_assembly_env_cfg.py         | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 3e2e80c9ed66..dc09405e27fb 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -303,8 +303,8 @@ def __post_init__(self):
                     max_linear_velocity=1000.0,
                     max_angular_velocity=3666.0,
                     enable_gyroscopic_forces=True,
-                    solver_position_iteration_count=8,
-                    solver_velocity_iteration_count=2,
+                    solver_position_iteration_count=4,
+                    solver_velocity_iteration_count=1,
                     max_contact_impulse=1e32,
                 ),
                 articulation_props=sim_utils.ArticulationRootPropertiesCfg(
@@ -331,10 +331,10 @@ def __post_init__(self):
         # Grav gripper actuator configuration for gear manipulation
         self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=50.0,
+            effort_limit_sim=2.0,
             velocity_limit_sim=1.0,
             stiffness=2e3,
-            damping=1e2,
+            damping=1e1,
             friction=0.0,
             armature=0.0,
         )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 6a3fc7f080f4..b5354c057b87 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -191,7 +191,7 @@ class PolicyCfg(ObsGroup):
             func=mdp.gear_shaft_pos_w,
             params={},  # Will be populated in __post_init__
             noise=ResetSampledConstantNoiseModelCfg(
-                noise_cfg=UniformNoiseCfg(n_min=-0.005, n_max=0.005, operation="add")
+                noise_cfg=UniformNoiseCfg(n_min=-0.01, n_max=0.01, operation="add")
             ),
         )
         gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)

From 01c999acea04f42c86bbed00f3eac209cab1e75b Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 12 Mar 2026 13:59:22 -0700
Subject: [PATCH 27/87] Added reward functions for EE vs gear keypoint
 distance.

---
 .../config/rizon_4s/joint_pos_env_cfg.py      |   9 +
 .../config/ur_10e/joint_pos_env_cfg.py        |  18 ++
 .../gear_assembly/gear_assembly_env_cfg.py    |  21 ++
 .../manipulation/deploy/mdp/rewards.py        | 253 +++++++++++++++++-
 4 files changed, 300 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index dc09405e27fb..5a8119c58fd0 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -406,6 +406,15 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
+        # Populate reward term parameters for EE-gear keypoint tracking
+        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
+        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
         self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index 3cb8eeae956e..d8de9818414a 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -385,6 +385,15 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
+        # Populate reward term parameters for EE-gear keypoint tracking
+        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
+        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
         self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
@@ -483,6 +492,15 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
+        # Populate reward term parameters for EE-gear keypoint tracking
+        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
+        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
         self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b5354c057b87..f41a4b789617 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -263,6 +263,27 @@ class RewardsCfg:
         },
     )
 
+    ee_gear_keypoint_tracking = RewTerm(
+        func=mdp.keypoint_ee_gear_error,
+        weight=-0.5,
+        params={
+            "robot_asset_cfg": SceneEntityCfg("robot"),
+            "keypoint_scale": 0.15,
+        },
+    )
+
+    ee_gear_keypoint_tracking_exp = RewTerm(
+        func=mdp.keypoint_ee_gear_error_exp,
+        weight=0.5,
+        params={
+            "robot_asset_cfg": SceneEntityCfg("robot"),
+            "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
+            "kp_use_sum_of_exps": False,
+            "keypoint_scale": 0.15,
+        },
+    )
+
+
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
 
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index 2204a0186bc0..badc58b4ad52 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -13,9 +13,10 @@
 import warp as wp
 
 from isaaclab.managers import ManagerTermBase, RewardTermCfg, SceneEntityCfg
-from isaaclab.utils.math import combine_frame_transforms
+from isaaclab.utils.math import combine_frame_transforms, quat_apply, quat_mul
 
 if TYPE_CHECKING:
+    from isaaclab.assets import Articulation
     from isaaclab.envs import ManagerBasedRLEnv
     from isaaclab.sensors.frame_transformer.frame_transformer import FrameTransformer
 
@@ -397,6 +398,256 @@ def __call__(
         return keypoint_reward_exp
 
 
+class keypoint_ee_gear_error(ManagerTermBase):
+    """Compute keypoint distance between the robot end effector and the gear's grasp-corrected pose.
+
+    Transforms the gear's actual world pose into the expected EE position/orientation
+    using grasp offsets, so that the distance is ~0 when properly holding the gear
+    and increases when the gripper drifts away.
+    """
+
+    def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
+        super().__init__(cfg, env)
+
+        self.robot_asset_cfg: SceneEntityCfg = cfg.params.get("robot_asset_cfg", SceneEntityCfg("robot"))
+        self.robot_asset: Articulation = env.scene[self.robot_asset_cfg.name]
+
+        self.end_effector_body_name: str = cfg.params["end_effector_body_name"]
+        grasp_rot_offset = cfg.params["grasp_rot_offset"]
+        self.grasp_rot_offset_tensor = (
+            torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1)
+        )
+
+        gear_offsets_grasp = cfg.params["gear_offsets_grasp"]
+        self.gear_grasp_offsets_stacked = torch.stack(
+            [
+                torch.tensor(gear_offsets_grasp["gear_small"], device=env.device, dtype=torch.float32),
+                torch.tensor(gear_offsets_grasp["gear_medium"], device=env.device, dtype=torch.float32),
+                torch.tensor(gear_offsets_grasp["gear_large"], device=env.device, dtype=torch.float32),
+            ],
+            dim=0,
+        )
+
+        self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
+        self.env_indices = torch.arange(env.num_envs, device=env.device)
+        self.gear_assets = {
+            "gear_small": env.scene["factory_gear_small"],
+            "gear_medium": env.scene["factory_gear_medium"],
+            "gear_large": env.scene["factory_gear_large"],
+        }
+
+        eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
+        self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
+
+        self.keypoint_computer = _compute_keypoint_distance(cfg, env)
+        self._step_count = 0
+
+    def __call__(
+        self,
+        env: ManagerBasedRLEnv,
+        robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+        end_effector_body_name: str = "",
+        grasp_rot_offset: list | None = None,
+        gear_offsets_grasp: dict | None = None,
+        keypoint_scale: float = 1.0,
+        add_cube_center_kp: bool = True,
+    ) -> torch.Tensor:
+        if self.eef_idx is None:
+            return torch.zeros(env.num_envs, device=env.device)
+
+        gear_type_manager: randomize_gear_type = env._gear_type_manager
+        self.gear_type_indices = gear_type_manager.get_all_gear_type_indices()
+
+        eef_pos = wp.to_torch(self.robot_asset.data.body_link_pos_w)[:, self.eef_idx]
+        eef_quat = wp.to_torch(self.robot_asset.data.body_link_quat_w)[:, self.eef_idx]
+
+        all_gear_pos = torch.stack(
+            [
+                wp.to_torch(self.gear_assets["gear_small"].data.root_link_pos_w),
+                wp.to_torch(self.gear_assets["gear_medium"].data.root_link_pos_w),
+                wp.to_torch(self.gear_assets["gear_large"].data.root_link_pos_w),
+            ],
+            dim=1,
+        )
+        all_gear_quat = torch.stack(
+            [
+                wp.to_torch(self.gear_assets["gear_small"].data.root_link_quat_w),
+                wp.to_torch(self.gear_assets["gear_medium"].data.root_link_quat_w),
+                wp.to_torch(self.gear_assets["gear_large"].data.root_link_quat_w),
+            ],
+            dim=1,
+        )
+
+        gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
+        gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
+
+        gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
+        grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
+        gear_grasp_pos = gear_pos + quat_apply(gear_quat_grasp, grasp_offsets)
+
+        keypoint_dist_sep = self.keypoint_computer.compute(
+            current_pos=eef_pos,
+            current_quat=eef_quat,
+            target_pos=gear_grasp_pos,
+            target_quat=gear_quat_grasp,
+            keypoint_scale=keypoint_scale,
+        )
+
+        mean_kp_error = keypoint_dist_sep.mean(-1)
+        mean_error_scalar = mean_kp_error.mean().item()
+
+        if not hasattr(env, "extras"):
+            env.extras = {}
+        if "log" not in env.extras:
+            env.extras["log"] = {}
+        env.extras["log"]["ee_gear_kp_error/mean_keypoint_dist"] = mean_error_scalar
+
+        self._step_count += 1
+        import carb
+
+        carb.log_info(
+            f"[ee_gear_kp_error] step={self._step_count}"
+            f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | reward(unweighted)={-mean_error_scalar:.5f}"
+        )
+
+        return mean_kp_error
+
+
+class keypoint_ee_gear_error_exp(ManagerTermBase):
+    """Compute exponential keypoint reward between the robot end effector and the gear's grasp-corrected pose.
+
+    Transforms the gear's actual world pose into the expected EE position/orientation
+    using grasp offsets, so that the reward is high (~1) when properly holding the gear
+    and drops sharply when the gripper drifts away.
+    """
+
+    def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
+        super().__init__(cfg, env)
+
+        self.robot_asset_cfg: SceneEntityCfg = cfg.params.get("robot_asset_cfg", SceneEntityCfg("robot"))
+        self.robot_asset: Articulation = env.scene[self.robot_asset_cfg.name]
+
+        self.end_effector_body_name: str = cfg.params["end_effector_body_name"]
+        grasp_rot_offset = cfg.params["grasp_rot_offset"]
+        self.grasp_rot_offset_tensor = (
+            torch.tensor(grasp_rot_offset, device=env.device, dtype=torch.float32).unsqueeze(0).repeat(env.num_envs, 1)
+        )
+
+        gear_offsets_grasp = cfg.params["gear_offsets_grasp"]
+        self.gear_grasp_offsets_stacked = torch.stack(
+            [
+                torch.tensor(gear_offsets_grasp["gear_small"], device=env.device, dtype=torch.float32),
+                torch.tensor(gear_offsets_grasp["gear_medium"], device=env.device, dtype=torch.float32),
+                torch.tensor(gear_offsets_grasp["gear_large"], device=env.device, dtype=torch.float32),
+            ],
+            dim=0,
+        )
+
+        self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
+        self.env_indices = torch.arange(env.num_envs, device=env.device)
+        self.gear_assets = {
+            "gear_small": env.scene["factory_gear_small"],
+            "gear_medium": env.scene["factory_gear_medium"],
+            "gear_large": env.scene["factory_gear_large"],
+        }
+
+        eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
+        self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
+
+        self.keypoint_computer = _compute_keypoint_distance(cfg, env)
+        self._step_count = 0
+
+    def __call__(
+        self,
+        env: ManagerBasedRLEnv,
+        robot_asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+        end_effector_body_name: str = "",
+        grasp_rot_offset: list | None = None,
+        gear_offsets_grasp: dict | None = None,
+        kp_exp_coeffs: list[tuple[float, float]] = [(1.0, 0.1)],
+        kp_use_sum_of_exps: bool = True,
+        keypoint_scale: float = 1.0,
+        add_cube_center_kp: bool = True,
+    ) -> torch.Tensor:
+        if self.eef_idx is None:
+            return torch.zeros(env.num_envs, device=env.device)
+
+        gear_type_manager: randomize_gear_type = env._gear_type_manager
+        self.gear_type_indices = gear_type_manager.get_all_gear_type_indices()
+
+        eef_pos = wp.to_torch(self.robot_asset.data.body_link_pos_w)[:, self.eef_idx]
+        eef_quat = wp.to_torch(self.robot_asset.data.body_link_quat_w)[:, self.eef_idx]
+
+        all_gear_pos = torch.stack(
+            [
+                wp.to_torch(self.gear_assets["gear_small"].data.root_link_pos_w),
+                wp.to_torch(self.gear_assets["gear_medium"].data.root_link_pos_w),
+                wp.to_torch(self.gear_assets["gear_large"].data.root_link_pos_w),
+            ],
+            dim=1,
+        )
+        all_gear_quat = torch.stack(
+            [
+                wp.to_torch(self.gear_assets["gear_small"].data.root_link_quat_w),
+                wp.to_torch(self.gear_assets["gear_medium"].data.root_link_quat_w),
+                wp.to_torch(self.gear_assets["gear_large"].data.root_link_quat_w),
+            ],
+            dim=1,
+        )
+
+        gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
+        gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
+
+        gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
+        grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
+        gear_grasp_pos = gear_pos + quat_apply(gear_quat_grasp, grasp_offsets)
+
+        keypoint_dist_sep = self.keypoint_computer.compute(
+            current_pos=eef_pos,
+            current_quat=eef_quat,
+            target_pos=gear_grasp_pos,
+            target_quat=gear_quat_grasp,
+            keypoint_scale=keypoint_scale,
+        )
+
+        mean_kp_error = keypoint_dist_sep.mean(-1)
+
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        if kp_use_sum_of_exps:
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += (
+                    1.0 / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))
+                ).mean(-1)
+        else:
+            keypoint_dist = keypoint_dist_sep.mean(-1)
+            for coeff in kp_exp_coeffs:
+                a, b = coeff
+                keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+
+        mean_error_scalar = mean_kp_error.mean().item()
+        mean_reward_scalar = keypoint_reward_exp.mean().item()
+
+        if not hasattr(env, "extras"):
+            env.extras = {}
+        if "log" not in env.extras:
+            env.extras["log"] = {}
+        env.extras["log"]["ee_gear_kp_error_exp/mean_keypoint_dist"] = mean_error_scalar
+        env.extras["log"]["ee_gear_kp_error_exp/mean_exp_reward"] = mean_reward_scalar
+
+        self._step_count += 1
+        import carb
+
+        carb.log_info(
+            f"[ee_gear_kp_error_exp] step={self._step_count}"
+            f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | mean_exp_reward={mean_reward_scalar:.5f}"
+        )
+
+        return keypoint_reward_exp
+
+
 ##
 # Helper functions and classes
 ##

From ff8f48ac24a3fa8eab9cc34374f3b07744f7f883 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 12 Mar 2026 14:06:14 -0700
Subject: [PATCH 28/87] Weight scaling with training iters for EE vs gear
 rewards.

---
 .../gear_assembly/gear_assembly_env_cfg.py    |  4 ++
 .../manipulation/deploy/mdp/rewards.py        | 41 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index f41a4b789617..175d35269152 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,6 +269,8 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
+            "weight_ramp_start": 0.0,
+            "weight_ramp_steps": 5000,
         },
     )
 
@@ -280,6 +282,8 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
+            "weight_ramp_start": 0.0,
+            "weight_ramp_steps": 5000,
         },
     )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index badc58b4ad52..3efd8534742e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -404,6 +404,10 @@ class keypoint_ee_gear_error(ManagerTermBase):
     Transforms the gear's actual world pose into the expected EE position/orientation
     using grasp offsets, so that the distance is ~0 when properly holding the gear
     and increases when the gripper drifts away.
+
+    Supports linear weight ramp-up: the returned reward is scaled by a factor that
+    linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
+    env steps, allowing the reward to grow in importance as training progresses.
     """
 
     def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
@@ -428,6 +432,9 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             dim=0,
         )
 
+        self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
+        self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
+
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
         self.gear_assets = {
@@ -442,6 +449,10 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
+    def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
+        progress = min(env.common_step_counter / max(self.weight_ramp_steps, 1), 1.0)
+        return self.weight_ramp_start + (1.0 - self.weight_ramp_start) * progress
+
     def __call__(
         self,
         env: ManagerBasedRLEnv,
@@ -451,6 +462,8 @@ def __call__(
         gear_offsets_grasp: dict | None = None,
         keypoint_scale: float = 1.0,
         add_cube_center_kp: bool = True,
+        weight_ramp_start: float = 0.0,
+        weight_ramp_steps: int = 1,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -494,6 +507,10 @@ def __call__(
         )
 
         mean_kp_error = keypoint_dist_sep.mean(-1)
+
+        weight_scale = self._get_weight_scale(env)
+        scaled_reward = mean_kp_error * weight_scale
+
         mean_error_scalar = mean_kp_error.mean().item()
 
         if not hasattr(env, "extras"):
@@ -501,6 +518,7 @@ def __call__(
         if "log" not in env.extras:
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error/mean_keypoint_dist"] = mean_error_scalar
+        env.extras["log"]["ee_gear_kp_error/weight_scale"] = weight_scale
 
         self._step_count += 1
         import carb
@@ -508,10 +526,11 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | weight_scale={weight_scale:.4f}"
             f" | reward(unweighted)={-mean_error_scalar:.5f}"
         )
 
-        return mean_kp_error
+        return scaled_reward
 
 
 class keypoint_ee_gear_error_exp(ManagerTermBase):
@@ -520,6 +539,10 @@ class keypoint_ee_gear_error_exp(ManagerTermBase):
     Transforms the gear's actual world pose into the expected EE position/orientation
     using grasp offsets, so that the reward is high (~1) when properly holding the gear
     and drops sharply when the gripper drifts away.
+
+    Supports linear weight ramp-up: the returned reward is scaled by a factor that
+    linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
+    env steps, allowing the reward to grow in importance as training progresses.
     """
 
     def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
@@ -544,6 +567,9 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             dim=0,
         )
 
+        self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
+        self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
+
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
         self.gear_assets = {
@@ -558,6 +584,10 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
+    def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
+        progress = min(env.common_step_counter / max(self.weight_ramp_steps, 1), 1.0)
+        return self.weight_ramp_start + (1.0 - self.weight_ramp_start) * progress
+
     def __call__(
         self,
         env: ManagerBasedRLEnv,
@@ -569,6 +599,8 @@ def __call__(
         kp_use_sum_of_exps: bool = True,
         keypoint_scale: float = 1.0,
         add_cube_center_kp: bool = True,
+        weight_ramp_start: float = 0.0,
+        weight_ramp_steps: int = 1,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -626,6 +658,9 @@ def __call__(
                 a, b = coeff
                 keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
 
+        weight_scale = self._get_weight_scale(env)
+        scaled_reward = keypoint_reward_exp * weight_scale
+
         mean_error_scalar = mean_kp_error.mean().item()
         mean_reward_scalar = keypoint_reward_exp.mean().item()
 
@@ -635,6 +670,7 @@ def __call__(
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error_exp/mean_keypoint_dist"] = mean_error_scalar
         env.extras["log"]["ee_gear_kp_error_exp/mean_exp_reward"] = mean_reward_scalar
+        env.extras["log"]["ee_gear_kp_error_exp/weight_scale"] = weight_scale
 
         self._step_count += 1
         import carb
@@ -642,10 +678,11 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error_exp] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | weight_scale={weight_scale:.4f}"
             f" | mean_exp_reward={mean_reward_scalar:.5f}"
         )
 
-        return keypoint_reward_exp
+        return scaled_reward
 
 
 ##

From e7e93aaafe773fd59282af8617541ed19a1d1966 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 09:22:01 -0700
Subject: [PATCH 29/87] Fixed weight scaling schedule.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 175d35269152..8a2ed5b90a48 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -270,7 +270,7 @@ class RewardsCfg:
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
             "weight_ramp_start": 0.0,
-            "weight_ramp_steps": 5000,
+            "weight_ramp_steps": 250_000,
         },
     )
 
@@ -283,7 +283,7 @@ class RewardsCfg:
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
             "weight_ramp_start": 0.0,
-            "weight_ramp_steps": 5000,
+            "weight_ramp_steps": 250_000,
         },
     )
 

From c4737aecc9e2c887e81014a943504a7364555fc7 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 09:25:03 -0700
Subject: [PATCH 30/87] Disabled weight scaling and reduced fixed val.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 8a2ed5b90a48..c4ac95f8c74c 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,7 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "weight_ramp_start": 0.0,
+            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )
@@ -282,7 +282,7 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "weight_ramp_start": 0.0,
+            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )

From 31ea2516e342a5296890f2174fa059980fb113f0 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 09:25:51 -0700
Subject: [PATCH 31/87] Changed EE vs gear reward weight value.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index c4ac95f8c74c..01e2a04693e2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.5,
+        weight=-0.05,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -276,7 +276,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.5,
+        weight=0.05,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From cba665f9ec71c4911eec2f1ab54a17f78129bf1d Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 09:27:37 -0700
Subject: [PATCH 32/87] Changed EE vs gear reward weight value.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 01e2a04693e2..3308d714e8bd 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.05,
+        weight=-0.25,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -276,7 +276,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.05,
+        weight=0.25,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From 5f245c20c31d6543b067c43de328ca2460f0b2bd Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 09:54:57 -0700
Subject: [PATCH 33/87] Changed EE vs gear reward to trigger after threshold.

---
 .../gear_assembly/gear_assembly_env_cfg.py    |  2 ++
 .../manipulation/deploy/mdp/rewards.py        | 21 +++++++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 3308d714e8bd..a2f00d170866 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,6 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
+            "dead_zone_threshold": 0.005,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
@@ -282,6 +283,7 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
+            "dead_zone_threshold": 0.005,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index 3efd8534742e..ac80c513fcb2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -434,6 +434,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
+        self.dead_zone_threshold: float = cfg.params.get("dead_zone_threshold", 0.0)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -464,6 +465,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
+        dead_zone_threshold: float = 0.0,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -507,17 +509,20 @@ def __call__(
         )
 
         mean_kp_error = keypoint_dist_sep.mean(-1)
+        penalized_error = torch.clamp(mean_kp_error - self.dead_zone_threshold, min=0.0)
 
         weight_scale = self._get_weight_scale(env)
-        scaled_reward = mean_kp_error * weight_scale
+        scaled_reward = penalized_error * weight_scale
 
         mean_error_scalar = mean_kp_error.mean().item()
+        mean_penalized_scalar = penalized_error.mean().item()
 
         if not hasattr(env, "extras"):
             env.extras = {}
         if "log" not in env.extras:
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error/mean_keypoint_dist"] = mean_error_scalar
+        env.extras["log"]["ee_gear_kp_error/mean_penalized_error"] = mean_penalized_scalar
         env.extras["log"]["ee_gear_kp_error/weight_scale"] = weight_scale
 
         self._step_count += 1
@@ -526,8 +531,8 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | penalized_error={mean_penalized_scalar:.5f}"
             f" | weight_scale={weight_scale:.4f}"
-            f" | reward(unweighted)={-mean_error_scalar:.5f}"
         )
 
         return scaled_reward
@@ -569,6 +574,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
+        self.dead_zone_threshold: float = cfg.params.get("dead_zone_threshold", 0.0)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -601,6 +607,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
+        dead_zone_threshold: float = 0.0,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -643,20 +650,22 @@ def __call__(
             keypoint_scale=keypoint_scale,
         )
 
+        penalized_dist_sep = torch.clamp(keypoint_dist_sep - self.dead_zone_threshold, min=0.0)
+
         mean_kp_error = keypoint_dist_sep.mean(-1)
 
-        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
+        keypoint_reward_exp = torch.zeros_like(penalized_dist_sep[:, 0])
         if kp_use_sum_of_exps:
             for coeff in kp_exp_coeffs:
                 a, b = coeff
                 keypoint_reward_exp += (
-                    1.0 / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))
+                    1.0 / (torch.exp(a * penalized_dist_sep) + b + torch.exp(-a * penalized_dist_sep))
                 ).mean(-1)
         else:
-            keypoint_dist = keypoint_dist_sep.mean(-1)
+            penalized_dist = penalized_dist_sep.mean(-1)
             for coeff in kp_exp_coeffs:
                 a, b = coeff
-                keypoint_reward_exp += 1.0 / (torch.exp(a * keypoint_dist) + b + torch.exp(-a * keypoint_dist))
+                keypoint_reward_exp += 1.0 / (torch.exp(a * penalized_dist) + b + torch.exp(-a * penalized_dist))
 
         weight_scale = self._get_weight_scale(env)
         scaled_reward = keypoint_reward_exp * weight_scale

From c24b49bef9925220b35a661b5e0e7fc2a9f29cd7 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 13 Mar 2026 10:06:23 -0700
Subject: [PATCH 34/87] Replaced distance threshold with insertion progress
 threshold.

---
 .../gear_assembly/gear_assembly_env_cfg.py    |  4 +-
 .../manipulation/deploy/mdp/rewards.py        | 82 +++++++++++++++----
 2 files changed, 68 insertions(+), 18 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index a2f00d170866..5bd6212ac975 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,7 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "dead_zone_threshold": 0.005,
+            "insertion_threshold": 0.03,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
@@ -283,7 +283,7 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "dead_zone_threshold": 0.005,
+            "insertion_threshold": 0.03,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index ac80c513fcb2..27afeab0bfde 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -405,6 +405,11 @@ class keypoint_ee_gear_error(ManagerTermBase):
     using grasp offsets, so that the distance is ~0 when properly holding the gear
     and increases when the gripper drifts away.
 
+    The reward is gated on insertion progress: it only activates when the
+    gear-to-gear-base keypoint error drops below ``insertion_threshold``,
+    indicating the gear is substantially inserted. Before that point the
+    reward returns 0 so it does not interfere with the insertion policy.
+
     Supports linear weight ramp-up: the returned reward is scaled by a factor that
     linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
     env steps, allowing the reward to grow in importance as training progresses.
@@ -434,7 +439,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
-        self.dead_zone_threshold: float = cfg.params.get("dead_zone_threshold", 0.0)
+        self.insertion_threshold: float = cfg.params.get("insertion_threshold", 0.03)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -443,11 +448,13 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             "gear_medium": env.scene["factory_gear_medium"],
             "gear_large": env.scene["factory_gear_large"],
         }
+        self.gear_base_asset = env.scene["factory_gear_base"]
 
         eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
         self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
 
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
+        self.insertion_keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
     def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
@@ -465,7 +472,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
-        dead_zone_threshold: float = 0.0,
+        insertion_threshold: float = 0.03,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -496,6 +503,21 @@ def __call__(
         gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
         gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
 
+        # -- Insertion gating: compute gear-to-gear-base keypoint error --
+        gear_base_pos = wp.to_torch(self.gear_base_asset.data.body_pos_w)[:, 0]
+        gear_base_quat = wp.to_torch(self.gear_base_asset.data.body_quat_w)[:, 0]
+
+        insertion_kp_dist = self.insertion_keypoint_computer.compute(
+            current_pos=gear_base_pos,
+            current_quat=gear_base_quat,
+            target_pos=gear_pos,
+            target_quat=gear_quat,
+            keypoint_scale=keypoint_scale,
+        )
+        insertion_kp_error = insertion_kp_dist.mean(-1)
+        is_inserted = (insertion_kp_error < self.insertion_threshold).float()
+
+        # -- EE-gear grasp quality reward --
         gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
         grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
         gear_grasp_pos = gear_pos + quat_apply(gear_quat_grasp, grasp_offsets)
@@ -509,20 +531,21 @@ def __call__(
         )
 
         mean_kp_error = keypoint_dist_sep.mean(-1)
-        penalized_error = torch.clamp(mean_kp_error - self.dead_zone_threshold, min=0.0)
 
         weight_scale = self._get_weight_scale(env)
-        scaled_reward = penalized_error * weight_scale
+        scaled_reward = mean_kp_error * weight_scale * is_inserted
 
         mean_error_scalar = mean_kp_error.mean().item()
-        mean_penalized_scalar = penalized_error.mean().item()
+        mean_insertion_error_scalar = insertion_kp_error.mean().item()
+        pct_inserted = is_inserted.mean().item()
 
         if not hasattr(env, "extras"):
             env.extras = {}
         if "log" not in env.extras:
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error/mean_keypoint_dist"] = mean_error_scalar
-        env.extras["log"]["ee_gear_kp_error/mean_penalized_error"] = mean_penalized_scalar
+        env.extras["log"]["ee_gear_kp_error/insertion_kp_error"] = mean_insertion_error_scalar
+        env.extras["log"]["ee_gear_kp_error/pct_envs_inserted"] = pct_inserted
         env.extras["log"]["ee_gear_kp_error/weight_scale"] = weight_scale
 
         self._step_count += 1
@@ -531,7 +554,8 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
-            f" | penalized_error={mean_penalized_scalar:.5f}"
+            f" | insertion_error={mean_insertion_error_scalar:.5f}"
+            f" | pct_inserted={pct_inserted:.3f}"
             f" | weight_scale={weight_scale:.4f}"
         )
 
@@ -545,6 +569,11 @@ class keypoint_ee_gear_error_exp(ManagerTermBase):
     using grasp offsets, so that the reward is high (~1) when properly holding the gear
     and drops sharply when the gripper drifts away.
 
+    The reward is gated on insertion progress: it only activates when the
+    gear-to-gear-base keypoint error drops below ``insertion_threshold``,
+    indicating the gear is substantially inserted. Before that point the
+    reward returns 0 so it does not interfere with the insertion policy.
+
     Supports linear weight ramp-up: the returned reward is scaled by a factor that
     linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
     env steps, allowing the reward to grow in importance as training progresses.
@@ -574,7 +603,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
-        self.dead_zone_threshold: float = cfg.params.get("dead_zone_threshold", 0.0)
+        self.insertion_threshold: float = cfg.params.get("insertion_threshold", 0.03)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -583,11 +612,13 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             "gear_medium": env.scene["factory_gear_medium"],
             "gear_large": env.scene["factory_gear_large"],
         }
+        self.gear_base_asset = env.scene["factory_gear_base"]
 
         eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
         self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
 
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
+        self.insertion_keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
     def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
@@ -607,7 +638,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
-        dead_zone_threshold: float = 0.0,
+        insertion_threshold: float = 0.03,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -638,6 +669,21 @@ def __call__(
         gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
         gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
 
+        # -- Insertion gating: compute gear-to-gear-base keypoint error --
+        gear_base_pos = wp.to_torch(self.gear_base_asset.data.body_pos_w)[:, 0]
+        gear_base_quat = wp.to_torch(self.gear_base_asset.data.body_quat_w)[:, 0]
+
+        insertion_kp_dist = self.insertion_keypoint_computer.compute(
+            current_pos=gear_base_pos,
+            current_quat=gear_base_quat,
+            target_pos=gear_pos,
+            target_quat=gear_quat,
+            keypoint_scale=keypoint_scale,
+        )
+        insertion_kp_error = insertion_kp_dist.mean(-1)
+        is_inserted = (insertion_kp_error < self.insertion_threshold).float()
+
+        # -- EE-gear grasp quality reward --
         gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
         grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
         gear_grasp_pos = gear_pos + quat_apply(gear_quat_grasp, grasp_offsets)
@@ -650,28 +696,28 @@ def __call__(
             keypoint_scale=keypoint_scale,
         )
 
-        penalized_dist_sep = torch.clamp(keypoint_dist_sep - self.dead_zone_threshold, min=0.0)
-
         mean_kp_error = keypoint_dist_sep.mean(-1)
 
-        keypoint_reward_exp = torch.zeros_like(penalized_dist_sep[:, 0])
+        keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
         if kp_use_sum_of_exps:
             for coeff in kp_exp_coeffs:
                 a, b = coeff
                 keypoint_reward_exp += (
-                    1.0 / (torch.exp(a * penalized_dist_sep) + b + torch.exp(-a * penalized_dist_sep))
+                    1.0 / (torch.exp(a * keypoint_dist_sep) + b + torch.exp(-a * keypoint_dist_sep))
                 ).mean(-1)
         else:
-            penalized_dist = penalized_dist_sep.mean(-1)
+            kp_dist_mean = keypoint_dist_sep.mean(-1)
             for coeff in kp_exp_coeffs:
                 a, b = coeff
-                keypoint_reward_exp += 1.0 / (torch.exp(a * penalized_dist) + b + torch.exp(-a * penalized_dist))
+                keypoint_reward_exp += 1.0 / (torch.exp(a * kp_dist_mean) + b + torch.exp(-a * kp_dist_mean))
 
         weight_scale = self._get_weight_scale(env)
-        scaled_reward = keypoint_reward_exp * weight_scale
+        scaled_reward = keypoint_reward_exp * weight_scale * is_inserted
 
         mean_error_scalar = mean_kp_error.mean().item()
         mean_reward_scalar = keypoint_reward_exp.mean().item()
+        mean_insertion_error_scalar = insertion_kp_error.mean().item()
+        pct_inserted = is_inserted.mean().item()
 
         if not hasattr(env, "extras"):
             env.extras = {}
@@ -679,6 +725,8 @@ def __call__(
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error_exp/mean_keypoint_dist"] = mean_error_scalar
         env.extras["log"]["ee_gear_kp_error_exp/mean_exp_reward"] = mean_reward_scalar
+        env.extras["log"]["ee_gear_kp_error_exp/insertion_kp_error"] = mean_insertion_error_scalar
+        env.extras["log"]["ee_gear_kp_error_exp/pct_envs_inserted"] = pct_inserted
         env.extras["log"]["ee_gear_kp_error_exp/weight_scale"] = weight_scale
 
         self._step_count += 1
@@ -687,6 +735,8 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error_exp] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
+            f" | insertion_error={mean_insertion_error_scalar:.5f}"
+            f" | pct_inserted={pct_inserted:.3f}"
             f" | weight_scale={weight_scale:.4f}"
             f" | mean_exp_reward={mean_reward_scalar:.5f}"
         )

From 6c0e8ec3fd553de85cc7dd91927c84f0cd0d75ff Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 13:37:57 -0700
Subject: [PATCH 35/87] Added action reward term, disabled ee vs gear reward
 term.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py            | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 5bd6212ac975..16e62174d590 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.25,
+        weight=-0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.25,
+        weight=0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
@@ -291,6 +291,7 @@ class RewardsCfg:
 
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
+    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 6f33337bb87a5eeef5056930ad7b3523d835aa32 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 16:00:25 -0700
Subject: [PATCH 36/87] Disabled action rate reward.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 16e62174d590..ad9051905f0c 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -290,7 +290,7 @@ class RewardsCfg:
     )
 
 
-    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
+    # action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
     action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 

From 7245311a6ba910ad3aca260200ec8a46f8dde6d1 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:25:38 -0700
Subject: [PATCH 37/87] Added thresholding on ee gear dist.

---
 .../gear_assembly/gear_assembly_env_cfg.py    |  8 +-
 .../manipulation/deploy/mdp/rewards.py        | 82 ++++++-------------
 2 files changed, 28 insertions(+), 62 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index ad9051905f0c..f3a45a59828e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,8 +269,8 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "insertion_threshold": 0.03,
-            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
+            "ee_gear_threshold": 0.05,
+            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )
@@ -283,8 +283,8 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "insertion_threshold": 0.03,
-            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
+            "ee_gear_threshold": 0.05,
+            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
index 27afeab0bfde..560f796c70f3 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/rewards.py
@@ -405,10 +405,9 @@ class keypoint_ee_gear_error(ManagerTermBase):
     using grasp offsets, so that the distance is ~0 when properly holding the gear
     and increases when the gripper drifts away.
 
-    The reward is gated on insertion progress: it only activates when the
-    gear-to-gear-base keypoint error drops below ``insertion_threshold``,
-    indicating the gear is substantially inserted. Before that point the
-    reward returns 0 so it does not interfere with the insertion policy.
+    The reward is gated on the EE-gear keypoint distance: it only activates when
+    the mean keypoint error is below ``ee_gear_threshold``, so the penalty only
+    applies when the gripper is reasonably close to the gear.
 
     Supports linear weight ramp-up: the returned reward is scaled by a factor that
     linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
@@ -439,7 +438,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
-        self.insertion_threshold: float = cfg.params.get("insertion_threshold", 0.03)
+        self.ee_gear_threshold: float = cfg.params.get("ee_gear_threshold", 0.05)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -448,13 +447,11 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             "gear_medium": env.scene["factory_gear_medium"],
             "gear_large": env.scene["factory_gear_large"],
         }
-        self.gear_base_asset = env.scene["factory_gear_base"]
 
         eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
         self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
 
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
-        self.insertion_keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
     def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
@@ -472,7 +469,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
-        insertion_threshold: float = 0.03,
+        ee_gear_threshold: float = 0.05,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -503,20 +500,6 @@ def __call__(
         gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
         gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
 
-        # -- Insertion gating: compute gear-to-gear-base keypoint error --
-        gear_base_pos = wp.to_torch(self.gear_base_asset.data.body_pos_w)[:, 0]
-        gear_base_quat = wp.to_torch(self.gear_base_asset.data.body_quat_w)[:, 0]
-
-        insertion_kp_dist = self.insertion_keypoint_computer.compute(
-            current_pos=gear_base_pos,
-            current_quat=gear_base_quat,
-            target_pos=gear_pos,
-            target_quat=gear_quat,
-            keypoint_scale=keypoint_scale,
-        )
-        insertion_kp_error = insertion_kp_dist.mean(-1)
-        is_inserted = (insertion_kp_error < self.insertion_threshold).float()
-
         # -- EE-gear grasp quality reward --
         gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
         grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
@@ -532,20 +515,21 @@ def __call__(
 
         mean_kp_error = keypoint_dist_sep.mean(-1)
 
+        # Gate on EE-gear distance: only penalize when gripper is close to gear
+        is_close = (mean_kp_error > self.ee_gear_threshold).float()
+
         weight_scale = self._get_weight_scale(env)
-        scaled_reward = mean_kp_error * weight_scale * is_inserted
+        scaled_reward = mean_kp_error * weight_scale * is_close
 
         mean_error_scalar = mean_kp_error.mean().item()
-        mean_insertion_error_scalar = insertion_kp_error.mean().item()
-        pct_inserted = is_inserted.mean().item()
+        pct_close = is_close.mean().item()
 
         if not hasattr(env, "extras"):
             env.extras = {}
         if "log" not in env.extras:
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error/mean_keypoint_dist"] = mean_error_scalar
-        env.extras["log"]["ee_gear_kp_error/insertion_kp_error"] = mean_insertion_error_scalar
-        env.extras["log"]["ee_gear_kp_error/pct_envs_inserted"] = pct_inserted
+        env.extras["log"]["ee_gear_kp_error/pct_envs_close"] = pct_close
         env.extras["log"]["ee_gear_kp_error/weight_scale"] = weight_scale
 
         self._step_count += 1
@@ -554,8 +538,7 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
-            f" | insertion_error={mean_insertion_error_scalar:.5f}"
-            f" | pct_inserted={pct_inserted:.3f}"
+            f" | pct_close={pct_close:.3f}"
             f" | weight_scale={weight_scale:.4f}"
         )
 
@@ -569,10 +552,9 @@ class keypoint_ee_gear_error_exp(ManagerTermBase):
     using grasp offsets, so that the reward is high (~1) when properly holding the gear
     and drops sharply when the gripper drifts away.
 
-    The reward is gated on insertion progress: it only activates when the
-    gear-to-gear-base keypoint error drops below ``insertion_threshold``,
-    indicating the gear is substantially inserted. Before that point the
-    reward returns 0 so it does not interfere with the insertion policy.
+    The reward is gated on the EE-gear keypoint distance: it only activates when
+    the mean keypoint error is below ``ee_gear_threshold``, so the reward only
+    applies when the gripper is reasonably close to the gear.
 
     Supports linear weight ramp-up: the returned reward is scaled by a factor that
     linearly increases from ``weight_ramp_start`` to 1.0 over ``weight_ramp_steps``
@@ -603,7 +585,7 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
 
         self.weight_ramp_start: float = cfg.params.get("weight_ramp_start", 0.0)
         self.weight_ramp_steps: int = cfg.params.get("weight_ramp_steps", 1)
-        self.insertion_threshold: float = cfg.params.get("insertion_threshold", 0.03)
+        self.ee_gear_threshold: float = cfg.params.get("ee_gear_threshold", 0.05)
 
         self.gear_type_indices = torch.zeros(env.num_envs, device=env.device, dtype=torch.long)
         self.env_indices = torch.arange(env.num_envs, device=env.device)
@@ -612,13 +594,11 @@ def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv):
             "gear_medium": env.scene["factory_gear_medium"],
             "gear_large": env.scene["factory_gear_large"],
         }
-        self.gear_base_asset = env.scene["factory_gear_base"]
 
         eef_indices, _ = self.robot_asset.find_bodies([self.end_effector_body_name])
         self.eef_idx = eef_indices[0] if len(eef_indices) > 0 else None
 
         self.keypoint_computer = _compute_keypoint_distance(cfg, env)
-        self.insertion_keypoint_computer = _compute_keypoint_distance(cfg, env)
         self._step_count = 0
 
     def _get_weight_scale(self, env: ManagerBasedRLEnv) -> float:
@@ -638,7 +618,7 @@ def __call__(
         add_cube_center_kp: bool = True,
         weight_ramp_start: float = 0.0,
         weight_ramp_steps: int = 1,
-        insertion_threshold: float = 0.03,
+        ee_gear_threshold: float = 0.05,
     ) -> torch.Tensor:
         if self.eef_idx is None:
             return torch.zeros(env.num_envs, device=env.device)
@@ -669,20 +649,6 @@ def __call__(
         gear_pos = all_gear_pos[self.env_indices, self.gear_type_indices]
         gear_quat = all_gear_quat[self.env_indices, self.gear_type_indices]
 
-        # -- Insertion gating: compute gear-to-gear-base keypoint error --
-        gear_base_pos = wp.to_torch(self.gear_base_asset.data.body_pos_w)[:, 0]
-        gear_base_quat = wp.to_torch(self.gear_base_asset.data.body_quat_w)[:, 0]
-
-        insertion_kp_dist = self.insertion_keypoint_computer.compute(
-            current_pos=gear_base_pos,
-            current_quat=gear_base_quat,
-            target_pos=gear_pos,
-            target_quat=gear_quat,
-            keypoint_scale=keypoint_scale,
-        )
-        insertion_kp_error = insertion_kp_dist.mean(-1)
-        is_inserted = (insertion_kp_error < self.insertion_threshold).float()
-
         # -- EE-gear grasp quality reward --
         gear_quat_grasp = quat_mul(gear_quat, self.grasp_rot_offset_tensor)
         grasp_offsets = self.gear_grasp_offsets_stacked[self.gear_type_indices]
@@ -698,6 +664,9 @@ def __call__(
 
         mean_kp_error = keypoint_dist_sep.mean(-1)
 
+        # Gate on EE-gear distance: only reward when gripper is close to gear
+        is_close = (mean_kp_error > self.ee_gear_threshold).float()
+
         keypoint_reward_exp = torch.zeros_like(keypoint_dist_sep[:, 0])
         if kp_use_sum_of_exps:
             for coeff in kp_exp_coeffs:
@@ -712,12 +681,11 @@ def __call__(
                 keypoint_reward_exp += 1.0 / (torch.exp(a * kp_dist_mean) + b + torch.exp(-a * kp_dist_mean))
 
         weight_scale = self._get_weight_scale(env)
-        scaled_reward = keypoint_reward_exp * weight_scale * is_inserted
+        scaled_reward = keypoint_reward_exp * weight_scale * is_close
 
         mean_error_scalar = mean_kp_error.mean().item()
         mean_reward_scalar = keypoint_reward_exp.mean().item()
-        mean_insertion_error_scalar = insertion_kp_error.mean().item()
-        pct_inserted = is_inserted.mean().item()
+        pct_close = is_close.mean().item()
 
         if not hasattr(env, "extras"):
             env.extras = {}
@@ -725,8 +693,7 @@ def __call__(
             env.extras["log"] = {}
         env.extras["log"]["ee_gear_kp_error_exp/mean_keypoint_dist"] = mean_error_scalar
         env.extras["log"]["ee_gear_kp_error_exp/mean_exp_reward"] = mean_reward_scalar
-        env.extras["log"]["ee_gear_kp_error_exp/insertion_kp_error"] = mean_insertion_error_scalar
-        env.extras["log"]["ee_gear_kp_error_exp/pct_envs_inserted"] = pct_inserted
+        env.extras["log"]["ee_gear_kp_error_exp/pct_envs_close"] = pct_close
         env.extras["log"]["ee_gear_kp_error_exp/weight_scale"] = weight_scale
 
         self._step_count += 1
@@ -735,8 +702,7 @@ def __call__(
         carb.log_info(
             f"[ee_gear_kp_error_exp] step={self._step_count}"
             f" | mean_kp_error={mean_error_scalar:.5f}"
-            f" | insertion_error={mean_insertion_error_scalar:.5f}"
-            f" | pct_inserted={pct_inserted:.3f}"
+            f" | pct_close={pct_close:.3f}"
             f" | weight_scale={weight_scale:.4f}"
             f" | mean_exp_reward={mean_reward_scalar:.5f}"
         )

From bebbfd916ba0b35695be7de111e02f231bda454a Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:26:29 -0700
Subject: [PATCH 38/87] Changed threshold.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index f3a45a59828e..f3daaf6aa1d9 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,7 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.05,
+            "ee_gear_threshold": 0.02,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
@@ -283,7 +283,7 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.05,
+            "ee_gear_threshold": 0.02,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },

From 4bf6f9a88cc7b3cf117319f182042718c0cbf469 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:27:38 -0700
Subject: [PATCH 39/87] Replaced action with action rate reward and made ee
 gear wt non-zero.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index f3daaf6aa1d9..b78ac999d613 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.0,
+        weight=-0.1,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.0,
+        weight=0.1,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
@@ -290,8 +290,8 @@ class RewardsCfg:
     )
 
 
-    # action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
+    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 9d5c142390d48e7e4be280981cb5cdac2fd2d7d8 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:28:30 -0700
Subject: [PATCH 40/87] Disabled scaling of ee gear wt.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py           | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b78ac999d613..47a3675723f2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,8 +269,8 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.02,
-            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
+            "ee_gear_threshold": 0.05,
+            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )
@@ -284,7 +284,7 @@ class RewardsCfg:
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.02,
-            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
+            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )

From 9a17b326d5bfb8bec3a59041252309be22d1a4e7 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:29:11 -0700
Subject: [PATCH 41/87] Fixed wt.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 47a3675723f2..b19455aea53d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,7 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.05,
+            "ee_gear_threshold": 0.02,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },

From 03a7fba4156ba91664ae1aed71f8fedc328bf3bc Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 16 Mar 2026 17:29:54 -0700
Subject: [PATCH 42/87] Changed threshold.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b19455aea53d..425005f493cd 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -269,7 +269,7 @@ class RewardsCfg:
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.02,
+            "ee_gear_threshold": 0.05,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
@@ -283,7 +283,7 @@ class RewardsCfg:
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.02,
+            "ee_gear_threshold": 0.05,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },

From 09f34b2a382584ea3af162b337fc60f01885a2d9 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 18 Mar 2026 09:46:28 -0700
Subject: [PATCH 43/87] Changes to distance threshold and wts.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 425005f493cd..47d0844503d2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,11 +265,11 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.1,
+        weight=-0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.05,
+            "ee_gear_threshold": 0.00,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
@@ -277,13 +277,13 @@ class RewardsCfg:
 
     ee_gear_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.1,
+        weight=0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
-            "ee_gear_threshold": 0.05,
+            "ee_gear_threshold": 0.00,
             "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },

From a60c5390d20d002cfa6ce9dd2f6dffe553d54191 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 18 Mar 2026 10:48:20 -0700
Subject: [PATCH 44/87] Fixed PD values.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 .../config/rizon_4s/joint_pos_env_cfg.py             |  2 +-
 .../deploy/gear_assembly/gear_assembly_env_cfg.py    |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index c351ac3366e7..5c5a70e176bc 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -120,8 +120,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=1320.0,
-            damping=72.0,
+            stiffness=6000.0,
+            damping=108.5,
             friction=0.0,
             armature=0.0,
         ),
@@ -129,8 +129,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=600.0,
-            damping=35.0,
+            stiffness=4200.0,
+            damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
@@ -138,8 +138,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=216.0,
-            damping=29.0,
+            stiffness=1500.0,
+            damping=54.2,
             friction=0.0,
             armature=0.0,
         ),
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 5a8119c58fd0..b58174c2585c 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -169,7 +169,7 @@ class EventCfg:
         func=gear_assembly_events.randomize_gear_type,
         mode="reset",
         params={"gear_types": ["gear_small", "gear_medium", "gear_large"]},
-        # params={"gear_types": ["gear_large"]},
+        # params={"gear_types": ["gear_small", "gear_medium"]},
     )
 
     reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 47d0844503d2..b607a5e63e75 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -270,7 +270,7 @@ class RewardsCfg:
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
-            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
+            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )
@@ -284,7 +284,7 @@ class RewardsCfg:
             "kp_use_sum_of_exps": False,
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
-            "weight_ramp_start": 1.0,  # Set to 0.0 to enable ramp-up
+            "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
             "weight_ramp_steps": 250_000,
         },
     )

From f982cbacaa7524683bed449f2c69dfe73ae32599 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 18 Mar 2026 12:10:33 -0700
Subject: [PATCH 45/87] Enabled action reward term.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b607a5e63e75..81fbba6b2d3f 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -291,7 +291,7 @@ class RewardsCfg:
 
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 17071cf31aa316a52a3de2eda632b6187e2d8392 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 19 Mar 2026 09:45:22 -0700
Subject: [PATCH 46/87] Reverted PD values.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 5c5a70e176bc..c351ac3366e7 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -120,8 +120,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=6000.0,
-            damping=108.5,
+            stiffness=1320.0,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -129,8 +129,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=4200.0,
-            damping=90.7,
+            stiffness=600.0,
+            damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -138,8 +138,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=1500.0,
-            damping=54.2,
+            stiffness=216.0,
+            damping=29.0,
             friction=0.0,
             armature=0.0,
         ),

From 87484c15aa2d60423cda9653a7c710aa76887304 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 19 Mar 2026 09:46:44 -0700
Subject: [PATCH 47/87] Removed action reward.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 81fbba6b2d3f..b607a5e63e75 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -291,7 +291,7 @@ class RewardsCfg:
 
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 853d7a2adf70bcb308e1b8138bcf6bf1a4d396ff Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 19 Mar 2026 10:00:01 -0700
Subject: [PATCH 48/87] Adding eval script for rsl_rl policies.

---
 scripts/reinforcement_learning/rsl_rl/eval.py | 321 ++++++++++++++++++
 1 file changed, 321 insertions(+)
 create mode 100644 scripts/reinforcement_learning/rsl_rl/eval.py

diff --git a/scripts/reinforcement_learning/rsl_rl/eval.py b/scripts/reinforcement_learning/rsl_rl/eval.py
new file mode 100644
index 000000000000..865e2a5c466d
--- /dev/null
+++ b/scripts/reinforcement_learning/rsl_rl/eval.py
@@ -0,0 +1,321 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to evaluate a trained RSL-RL policy over N episodes and report success rate and metrics."""
+
+"""Launch Isaac Sim Simulator first."""
+
+import argparse
+import sys
+
+from isaaclab.app import AppLauncher
+
+# local imports
+import cli_args  # isort: skip
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Evaluate a trained RSL-RL policy.")
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+parser.add_argument("--task", type=str, default=None, help="Name of the task.")
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment.")
+parser.add_argument("--num_episodes", type=int, default=100, help="Total number of episodes to evaluate.")
+parser.add_argument(
+    "--success_term",
+    type=str,
+    default=None,
+    help=(
+        "Name of a termination term that indicates task success. "
+        "If not set, only timeout vs. early-termination statistics are reported."
+    ),
+)
+parser.add_argument(
+    "--use_pretrained_checkpoint",
+    action="store_true",
+    help="Use the pre-trained checkpoint from Nucleus.",
+)
+# append RSL-RL cli arguments
+cli_args.add_rsl_rl_args(parser)
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+args_cli, hydra_args = parser.parse_known_args()
+
+# clear out sys.argv for Hydra
+sys.argv = [sys.argv[0]] + hydra_args
+
+# launch omniverse app
+app_launcher = AppLauncher(args_cli)
+simulation_app = app_launcher.app
+
+"""Rest everything follows."""
+
+import os
+import time
+from collections import defaultdict
+
+import gymnasium as gym
+import torch
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+from isaaclab.envs import (
+    DirectMARLEnv,
+    DirectMARLEnvCfg,
+    DirectRLEnvCfg,
+    ManagerBasedRLEnvCfg,
+    multi_agent_to_single_agent,
+)
+from isaaclab.utils.assets import retrieve_file_path
+
+from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper
+from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import get_checkpoint_path
+from isaaclab_tasks.utils.hydra import hydra_task_config
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Evaluate a trained RSL-RL policy."""
+    # -- resolve task name and checkpoint -----------------------------------------------
+    task_name = args_cli.task.split(":")[-1]
+    train_task_name = task_name.replace("-Play", "")
+
+    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+    env_cfg.seed = agent_cfg.seed
+    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+    log_root_path = os.path.abspath(log_root_path)
+    print(f"[INFO] Loading experiment from directory: {log_root_path}")
+
+    if args_cli.use_pretrained_checkpoint:
+        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
+        if not resume_path:
+            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
+            return
+    elif args_cli.checkpoint:
+        resume_path = retrieve_file_path(args_cli.checkpoint)
+    else:
+        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+    log_dir = os.path.dirname(resume_path)
+    env_cfg.log_dir = log_dir
+
+    # -- create environment -------------------------------------------------------------
+    env = gym.make(args_cli.task, cfg=env_cfg)
+
+    if isinstance(env.unwrapped, DirectMARLEnv):
+        env = multi_agent_to_single_agent(env)
+
+    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+    num_envs = env.unwrapped.num_envs
+
+    # -- load policy --------------------------------------------------------------------
+    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+    if agent_cfg.class_name == "OnPolicyRunner":
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    elif agent_cfg.class_name == "DistillationRunner":
+        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+    runner.load(resume_path)
+
+    policy = runner.get_inference_policy(device=env.unwrapped.device)
+    try:
+        policy_nn = runner.alg.policy
+    except AttributeError:
+        policy_nn = runner.alg.actor_critic
+
+    # -- resolve which termination terms exist ------------------------------------------
+    unwrapped = env.unwrapped
+    has_termination_manager = hasattr(unwrapped, "termination_manager")
+    term_names: list[str] = []
+    if has_termination_manager:
+        term_names = unwrapped.termination_manager.active_terms
+        print(f"[INFO] Termination terms: {term_names}")
+
+    success_term = args_cli.success_term
+    if success_term and success_term not in term_names:
+        print(
+            f"[WARN] --success_term '{success_term}' not found among termination terms {term_names}. "
+            "Success rate will not be tracked."
+        )
+        success_term = None
+
+    has_reward_manager = hasattr(unwrapped, "reward_manager")
+    reward_term_names: list[str] = []
+    if has_reward_manager:
+        reward_term_names = unwrapped.reward_manager.active_terms
+        print(f"[INFO] Reward terms: {reward_term_names}")
+
+    # -- tracking state -----------------------------------------------------------------
+    total_episodes_target = args_cli.num_episodes
+    completed_episodes = 0
+
+    # per-env accumulators (reset when that env's episode ends)
+    ep_returns = torch.zeros(num_envs, device=unwrapped.device)
+    ep_lengths = torch.zeros(num_envs, device=unwrapped.device, dtype=torch.long)
+    ep_reward_terms = {name: torch.zeros(num_envs, device=unwrapped.device) for name in reward_term_names}
+
+    # aggregate stats across finished episodes
+    all_returns: list[float] = []
+    all_lengths: list[int] = []
+    termination_counts: dict[str, int] = defaultdict(int)
+    timeout_count = 0
+    success_count = 0
+    reward_term_sums: dict[str, list[float]] = {name: [] for name in reward_term_names}
+
+    # extras-log keys seen across episodes (scalars injected by reward terms)
+    extras_log_sums: dict[str, list[float]] = defaultdict(list)
+
+    # -- evaluation loop ----------------------------------------------------------------
+    print(f"\n{'='*70}")
+    print(f"  Evaluating for {total_episodes_target} episodes  |  {num_envs} parallel envs")
+    print(f"{'='*70}\n")
+
+    obs = env.get_observations()
+    start_time = time.time()
+
+    while completed_episodes < total_episodes_target and simulation_app.is_running():
+        with torch.inference_mode():
+            actions = policy(obs)
+            obs, rewards, dones, extras = env.step(actions)
+            policy_nn.reset(dones)
+
+        ep_returns += rewards
+        ep_lengths += 1
+
+        # accumulate per-term rewards if the manager exposes step_reward
+        if has_reward_manager:
+            step_reward = unwrapped.reward_manager._step_reward  # (num_envs, num_terms)
+            for idx, name in enumerate(reward_term_names):
+                ep_reward_terms[name] += step_reward[:, idx]
+
+        # find which envs finished this step
+        done_mask = dones.bool()
+        done_ids = done_mask.nonzero(as_tuple=False).squeeze(-1)
+
+        if done_ids.numel() > 0:
+            for env_id in done_ids.tolist():
+                if completed_episodes >= total_episodes_target:
+                    break
+
+                # record episode stats
+                all_returns.append(ep_returns[env_id].item())
+                all_lengths.append(ep_lengths[env_id].item())
+
+                for name in reward_term_names:
+                    reward_term_sums[name].append(ep_reward_terms[name][env_id].item())
+
+                # determine termination cause
+                if has_termination_manager:
+                    for t_name in term_names:
+                        term_val = unwrapped.termination_manager.get_term(t_name)
+                        if term_val[env_id]:
+                            termination_counts[t_name] += 1
+
+                if extras.get("time_outs") is not None and extras["time_outs"][env_id]:
+                    timeout_count += 1
+
+                if success_term:
+                    term_val = unwrapped.termination_manager.get_term(success_term)
+                    if term_val[env_id]:
+                        success_count += 1
+
+                completed_episodes += 1
+
+                # reset per-env accumulators
+                ep_returns[env_id] = 0.0
+                ep_lengths[env_id] = 0
+                for name in reward_term_names:
+                    ep_reward_terms[name][env_id] = 0.0
+
+        # collect extras["log"] scalars (these are per-step averages across envs)
+        if "log" in extras:
+            for key, val in extras["log"].items():
+                if isinstance(val, (int, float)):
+                    extras_log_sums[key].append(val)
+
+        # periodic progress
+        if completed_episodes > 0 and completed_episodes % max(1, total_episodes_target // 10) == 0:
+            elapsed = time.time() - start_time
+            print(
+                f"  [{completed_episodes}/{total_episodes_target}] episodes  "
+                f"| mean return: {sum(all_returns) / len(all_returns):.3f}  "
+                f"| elapsed: {elapsed:.1f}s"
+            )
+
+    elapsed_total = time.time() - start_time
+
+    # -- report -------------------------------------------------------------------------
+    n = len(all_returns)
+    if n == 0:
+        print("[WARN] No episodes completed.")
+        env.close()
+        return
+
+    returns_t = torch.tensor(all_returns)
+    lengths_t = torch.tensor(all_lengths, dtype=torch.float)
+    step_dt = unwrapped.step_dt
+    max_ep_len = unwrapped.max_episode_length
+
+    print(f"\n{'='*70}")
+    print(f"  EVALUATION RESULTS  ({n} episodes, {elapsed_total:.1f}s)")
+    print(f"{'='*70}")
+    print(f"  Checkpoint : {resume_path}")
+    print(f"  Num envs   : {num_envs}")
+    print(f"  Step dt    : {step_dt:.4f}s   |  Max episode steps: {max_ep_len}")
+
+    print(f"\n  --- Episode Returns ---")
+    print(f"    Mean   : {returns_t.mean().item():.4f}")
+    print(f"    Std    : {returns_t.std().item():.4f}")
+    print(f"    Min    : {returns_t.min().item():.4f}")
+    print(f"    Max    : {returns_t.max().item():.4f}")
+
+    print(f"\n  --- Episode Length (steps) ---")
+    print(f"    Mean   : {lengths_t.mean().item():.1f}")
+    print(f"    Std    : {lengths_t.std().item():.1f}")
+    print(f"    Min    : {lengths_t.min().item():.0f}")
+    print(f"    Max    : {lengths_t.max().item():.0f}")
+
+    if success_term:
+        print(f"\n  --- Success Rate (term: '{success_term}') ---")
+        print(f"    Success: {success_count}/{n}  ({100.0 * success_count / n:.1f}%)")
+
+    print(f"\n  --- Termination Breakdown ---")
+    print(f"    Timeouts       : {timeout_count}/{n}  ({100.0 * timeout_count / n:.1f}%)")
+    for t_name in term_names:
+        cnt = termination_counts.get(t_name, 0)
+        print(f"    {t_name:30s}: {cnt}/{n}  ({100.0 * cnt / n:.1f}%)")
+
+    if reward_term_names:
+        print(f"\n  --- Mean Episodic Reward Terms ---")
+        for name in reward_term_names:
+            vals = reward_term_sums[name]
+            mean_val = sum(vals) / len(vals) if vals else 0.0
+            print(f"    {name:40s}: {mean_val:.6f}")
+
+    if extras_log_sums:
+        print(f"\n  --- Mean Extras/Log Metrics (per-step averages) ---")
+        for key in sorted(extras_log_sums.keys()):
+            # skip Episode_Reward/Episode_Termination keys (already covered above)
+            if key.startswith("Episode_Reward/") or key.startswith("Episode_Termination/"):
+                continue
+            vals = extras_log_sums[key]
+            mean_val = sum(vals) / len(vals) if vals else 0.0
+            print(f"    {key:50s}: {mean_val:.6f}")
+
+    print(f"\n{'='*70}\n")
+
+    env.close()
+
+
+if __name__ == "__main__":
+    main()
+    simulation_app.close()

From e798064ed0e4db9397e5b2a787fff02c2bdabb94 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 20 Mar 2026 12:12:28 -0700
Subject: [PATCH 49/87] Changing implicit to explicit ideal controller for
 flexiv.

---
 .../isaaclab_assets/robots/flexiv.py          | 34 +++++++++----------
 .../config/rizon_4s/joint_pos_env_cfg.py      | 13 +++----
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index c351ac3366e7..bb2e8e1931a3 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -17,7 +17,7 @@
 import math
 
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.actuators import IdealPDActuatorCfg, ImplicitActuatorCfg
 from isaaclab.assets.articulation import ArticulationCfg
 from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
 
@@ -116,46 +116,46 @@
         rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
-        "shoulder": ImplicitActuatorCfg(
+        "shoulder": IdealPDActuatorCfg(
             joint_names_expr=["joint[1-2]"],
-            effort_limit_sim=123.0,
-            velocity_limit_sim=2.094,
+            effort_limit=123.0,
+            velocity_limit=2.094,
             stiffness=1320.0,
             damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
-        "elbow": ImplicitActuatorCfg(
+        "elbow": IdealPDActuatorCfg(
             joint_names_expr=["joint[3-4]"],
-            effort_limit_sim=64.0,
-            velocity_limit_sim=2.443,
+            effort_limit=64.0,
+            velocity_limit=2.443,
             stiffness=600.0,
             damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
-        "wrist": ImplicitActuatorCfg(
+        "wrist": IdealPDActuatorCfg(
             joint_names_expr=["joint[5-7]"],
-            effort_limit_sim=39.0,
-            velocity_limit_sim=4.887,
+            effort_limit=39.0,
+            velocity_limit=4.887,
             stiffness=216.0,
             damping=29.0,
             friction=0.0,
             armature=0.0,
         ),
-        "gripper_drive": ImplicitActuatorCfg(
+        "gripper_drive": IdealPDActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=200.0,
-            velocity_limit_sim=0.6,
+            effort_limit=200.0,
+            velocity_limit=0.6,
             stiffness=2e3,
             damping=1e1,
             friction=0.0,
             armature=0.0,
         ),
-        "gripper_passive": ImplicitActuatorCfg(
+        "gripper_passive": IdealPDActuatorCfg(
             joint_names_expr=[".*_knuckle_joint"],
-            effort_limit_sim=1.0,
-            velocity_limit_sim=1.0,
+            effort_limit=1.0,
+            velocity_limit=1.0,
             stiffness=0.0,
             damping=0.0,
             friction=0.0,
@@ -163,7 +163,7 @@
         ),
     },
 )
-"""Configuration of Flexiv Rizon 4s arm with Grav gripper using implicit actuator models.
+"""Configuration of Flexiv Rizon 4s arm with Grav gripper using explicit ideal PD actuator models.
 
 The Grav gripper is a parallel gripper with the following joint configuration:
 - finger_joint: Main actuation joint (opened: 45 deg, closed: -8.88 deg)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index b58174c2585c..410c293433fd 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -9,6 +9,7 @@
 
 import isaaclab.sim as sim_utils
 from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.actuators import IdealPDActuatorCfg
 from isaaclab.assets import ArticulationCfg, RigidObjectCfg
 from isaaclab.managers import EventTermCfg as EventTerm
 from isaaclab.managers import SceneEntityCfg
@@ -329,10 +330,10 @@ def __post_init__(self):
         )
 
         # Grav gripper actuator configuration for gear manipulation
-        self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
+        self.scene.robot.actuators["gripper_drive"] = IdealPDActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit_sim=2.0,
-            velocity_limit_sim=1.0,
+            effort_limit=2.0,
+            velocity_limit=1.0,
             stiffness=2e3,
             damping=1e1,
             friction=0.0,
@@ -340,10 +341,10 @@ def __post_init__(self):
         )
 
         # Passive/mimic joints in the gripper - set to zero stiffness/damping
-        self.scene.robot.actuators["gripper_passive"] = ImplicitActuatorCfg(
+        self.scene.robot.actuators["gripper_passive"] = IdealPDActuatorCfg(
             joint_names_expr=[".*_knuckle_joint"],
-            effort_limit_sim=1.0,
-            velocity_limit_sim=1.0,
+            effort_limit=1.0,
+            velocity_limit=1.0,
             stiffness=0.0,
             damping=0.0,
             friction=0.0,

From f777fb931771976e960b0d4ab8596234a0309804 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 20 Mar 2026 16:12:18 -0700
Subject: [PATCH 50/87] Changed to DC Motor controller.

---
 .../isaaclab_assets/robots/flexiv.py          | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index bb2e8e1931a3..d3f3141053fc 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -17,7 +17,7 @@
 import math
 
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import IdealPDActuatorCfg, ImplicitActuatorCfg
+from isaaclab.actuators import DCMotorCfg, IdealPDActuatorCfg, ImplicitActuatorCfg
 from isaaclab.assets.articulation import ArticulationCfg
 from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
 
@@ -116,30 +116,33 @@
         rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
-        "shoulder": IdealPDActuatorCfg(
+        "shoulder": DCMotorCfg(
             joint_names_expr=["joint[1-2]"],
             effort_limit=123.0,
             velocity_limit=2.094,
-            stiffness=1320.0,
-            damping=72.0,
+            saturation_effort=246.0,
+            stiffness=6000.0,
+            damping=108.4,
             friction=0.0,
             armature=0.0,
         ),
-        "elbow": IdealPDActuatorCfg(
+        "elbow": DCMotorCfg(
             joint_names_expr=["joint[3-4]"],
             effort_limit=64.0,
             velocity_limit=2.443,
-            stiffness=600.0,
-            damping=35.0,
+            saturation_effort=128.0,
+            stiffness=4200.0,
+            damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
-        "wrist": IdealPDActuatorCfg(
+        "wrist": DCMotorCfg(
             joint_names_expr=["joint[5-7]"],
             effort_limit=39.0,
             velocity_limit=4.887,
-            stiffness=216.0,
-            damping=29.0,
+            saturation_effort=78.0,
+            stiffness=1500.0,
+            damping=54.2,
             friction=0.0,
             armature=0.0,
         ),
@@ -163,7 +166,7 @@
         ),
     },
 )
-"""Configuration of Flexiv Rizon 4s arm with Grav gripper using explicit ideal PD actuator models.
+"""Configuration of Flexiv Rizon 4s arm with Grav gripper using DC motor actuator models.
 
 The Grav gripper is a parallel gripper with the following joint configuration:
 - finger_joint: Main actuation joint (opened: 45 deg, closed: -8.88 deg)

From ace70c02acb59488a435a6f122b5aadf6bd5fba6 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 20 Mar 2026 16:42:32 -0700
Subject: [PATCH 51/87] Fixed physics frequency.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b607a5e63e75..8b2729d1e757 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -342,9 +342,9 @@ def __post_init__(self):
         self.episode_length_s = 6.66
         self.viewer.eye = (3.5, 3.5, 3.5)
         # simulation settings
-        self.decimation = 4
+        self.decimation = 33
         self.sim.render_interval = self.decimation
-        self.sim.dt = 1.0 / 120.0
+        self.sim.dt = 1.0 / 1000.0
 
         self.gear_offsets = {
             "gear_small": [0.076125, 0.0, 0.0],

From 0f49686cafc028c89976df8419eca5b6266bb93d Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 23 Mar 2026 14:48:44 -0700
Subject: [PATCH 52/87] Changed to ideal controller.

---
 .../isaaclab_assets/robots/flexiv.py                | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index d3f3141053fc..15d2a9c267a8 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -17,7 +17,7 @@
 import math
 
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import DCMotorCfg, IdealPDActuatorCfg, ImplicitActuatorCfg
+from isaaclab.actuators import IdealPDActuatorCfg, ImplicitActuatorCfg
 from isaaclab.assets.articulation import ArticulationCfg
 from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
 
@@ -116,31 +116,28 @@
         rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
-        "shoulder": DCMotorCfg(
+        "shoulder": IdealPDActuatorCfg(
             joint_names_expr=["joint[1-2]"],
             effort_limit=123.0,
             velocity_limit=2.094,
-            saturation_effort=246.0,
             stiffness=6000.0,
             damping=108.4,
             friction=0.0,
             armature=0.0,
         ),
-        "elbow": DCMotorCfg(
+        "elbow": IdealPDActuatorCfg(
             joint_names_expr=["joint[3-4]"],
             effort_limit=64.0,
             velocity_limit=2.443,
-            saturation_effort=128.0,
             stiffness=4200.0,
             damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
-        "wrist": DCMotorCfg(
+        "wrist": IdealPDActuatorCfg(
             joint_names_expr=["joint[5-7]"],
             effort_limit=39.0,
             velocity_limit=4.887,
-            saturation_effort=78.0,
             stiffness=1500.0,
             damping=54.2,
             friction=0.0,
@@ -166,7 +163,7 @@
         ),
     },
 )
-"""Configuration of Flexiv Rizon 4s arm with Grav gripper using DC motor actuator models.
+"""Configuration of Flexiv Rizon 4s arm with Grav gripper using IdealPD actuator models.
 
 The Grav gripper is a parallel gripper with the following joint configuration:
 - finger_joint: Main actuation joint (opened: 45 deg, closed: -8.88 deg)

From 67b0e784ff1b24c685f2fb985bdb516c2f342292 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 30 Mar 2026 13:50:10 -0700
Subject: [PATCH 53/87] Fixed lazy_export stubs and lazy imports for Flexiv
 gear insertion after rebase.

- Added FLEXIV_RIZON4S_CFG, FLEXIV_RIZON4S_GRAV_GRIPPER_CFG to .pyi stubs
- Added keypoint_ee_gear_error, keypoint_ee_gear_error_exp to mdp .pyi stub
- Moved import carb and Articulation to lazy/TYPE_CHECKING in rewards.py

Made-with: Cursor
---
 source/isaaclab_assets/isaaclab_assets/__init__.pyi           | 2 ++
 source/isaaclab_assets/isaaclab_assets/robots/__init__.pyi    | 3 ++-
 .../manager_based/manipulation/deploy/mdp/__init__.pyi        | 4 ++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/__init__.pyi b/source/isaaclab_assets/isaaclab_assets/__init__.pyi
index cf607283682a..dc69e0b8c71c 100644
--- a/source/isaaclab_assets/isaaclab_assets/__init__.pyi
+++ b/source/isaaclab_assets/isaaclab_assets/__init__.pyi
@@ -55,6 +55,7 @@ __all__ = [
     "UR10e_ROBOTIQ_GRIPPER_CFG",
     "UR10e_ROBOTIQ_2F_85_CFG",
     "FLEXIV_RIZON4S_CFG",
+    "FLEXIV_RIZON4S_GRAV_GRIPPER_CFG",
     "GELSIGHT_R15_CFG",
     "GELSIGHT_MINI_CFG",
     "VELODYNE_VLP_16_RAYCASTER_CFG",
@@ -117,5 +118,6 @@ from .robots import (
     UR10e_ROBOTIQ_GRIPPER_CFG,
     UR10e_ROBOTIQ_2F_85_CFG,
     FLEXIV_RIZON4S_CFG,
+    FLEXIV_RIZON4S_GRAV_GRIPPER_CFG,
 )
 from .sensors import GELSIGHT_R15_CFG, GELSIGHT_MINI_CFG, VELODYNE_VLP_16_RAYCASTER_CFG
diff --git a/source/isaaclab_assets/isaaclab_assets/robots/__init__.pyi b/source/isaaclab_assets/isaaclab_assets/robots/__init__.pyi
index 9875e9244fa3..1a91afa213a7 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/__init__.pyi
+++ b/source/isaaclab_assets/isaaclab_assets/robots/__init__.pyi
@@ -55,6 +55,7 @@ __all__ = [
     "UR10e_ROBOTIQ_GRIPPER_CFG",
     "UR10e_ROBOTIQ_2F_85_CFG",
     "FLEXIV_RIZON4S_CFG",
+    "FLEXIV_RIZON4S_GRAV_GRIPPER_CFG",
 ]
 
 from .agibot import AGIBOT_A2D_CFG
@@ -105,4 +106,4 @@ from .universal_robots import (
     UR10e_ROBOTIQ_GRIPPER_CFG,
     UR10e_ROBOTIQ_2F_85_CFG,
 )
-from .flexiv import FLEXIV_RIZON4S_CFG
+from .flexiv import FLEXIV_RIZON4S_CFG, FLEXIV_RIZON4S_GRAV_GRIPPER_CFG
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.pyi b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.pyi
index 9f27fc7560c6..c7ad54cf44e1 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.pyi
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/__init__.pyi
@@ -17,6 +17,8 @@ __all__ = [
     "keypoint_command_error_exp",
     "keypoint_entity_error",
     "keypoint_entity_error_exp",
+    "keypoint_ee_gear_error",
+    "keypoint_ee_gear_error_exp",
     "reset_when_gear_dropped",
     "reset_when_gear_orientation_exceeds_threshold",
 ]
@@ -29,6 +31,8 @@ from .rewards import (
     keypoint_command_error_exp,
     keypoint_entity_error,
     keypoint_entity_error_exp,
+    keypoint_ee_gear_error,
+    keypoint_ee_gear_error_exp,
 )
 from .terminations import reset_when_gear_dropped, reset_when_gear_orientation_exceeds_threshold
 from isaaclab.envs.mdp import *

From 6c3c38b8faa4c7383800680799b598e0702c54b6 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 31 Mar 2026 14:58:54 -0700
Subject: [PATCH 54/87] Formatting fixes.

---
 .../gear_assembly/config/rizon_4s/joint_pos_env_cfg.py      | 1 -
 .../deploy/gear_assembly/gear_assembly_env_cfg.py           | 1 -
 .../manager_based/manipulation/deploy/mdp/events.py         | 6 +++---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 410c293433fd..9d1b03a614dc 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -8,7 +8,6 @@
 import torch
 
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import ImplicitActuatorCfg
 from isaaclab.actuators import IdealPDActuatorCfg
 from isaaclab.assets import ArticulationCfg, RigidObjectCfg
 from isaaclab.managers import EventTermCfg as EventTerm
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 8b2729d1e757..e6b09fc3e9ac 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -289,7 +289,6 @@ class RewardsCfg:
         },
     )
 
-
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
     # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
index a8cf6b306099..1daf69021b0d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/events.py
@@ -342,19 +342,19 @@ def __call__(
             joint_pos = joint_pos + delta_dof_pos
 
             # Wrap arm joint positions to fall within robot's actual joint limits
-            joint_pos_limits = wp.to_torch(self.robot_asset.data.joint_pos_limits)[env_ids, :self.num_arm_joints, :]
+            joint_pos_limits = wp.to_torch(self.robot_asset.data.joint_pos_limits)[env_ids, : self.num_arm_joints, :]
             joint_min = joint_pos_limits[:, :, 0]
             joint_max = joint_pos_limits[:, :, 1]
             joint_range = joint_max - joint_min
 
             # Wrap only the arm joint positions (not gripper joints)
-            arm_joint_pos = joint_pos[:, :self.num_arm_joints]
+            arm_joint_pos = joint_pos[:, : self.num_arm_joints]
             arm_joint_pos = torch.where(
                 joint_range > 0,
                 joint_min + torch.remainder(arm_joint_pos - joint_min, joint_range),
                 arm_joint_pos,
             )
-            joint_pos[:, :self.num_arm_joints] = arm_joint_pos
+            joint_pos[:, : self.num_arm_joints] = arm_joint_pos
 
             joint_vel = torch.zeros_like(joint_pos)
 

From a7875d96c18765991b12fafe5101cfc35dfe04d0 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 31 Mar 2026 16:09:02 -0700
Subject: [PATCH 55/87] Code clean up and name updates.

---
 .../gear_assembly/config/rizon_4s/__init__.py  |  6 +++---
 .../config/rizon_4s/joint_pos_env_cfg.py       | 12 ++++++------
 .../config/ur_10e/joint_pos_env_cfg.py         | 18 ------------------
 .../gear_assembly/gear_assembly_env_cfg.py     |  4 ++--
 4 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
index 328a5ca09d56..8ed735d68b9d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
@@ -14,7 +14,7 @@
 
 # Flexiv Rizon 4s
 gym.register(
-    id="Isaac-Deploy-GearAssembly-Rizon4s-v0",
+    id="Isaac-Deploy-GearAssembly-Rizon4s-Grav-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
     disable_env_checker=True,
     kwargs={
@@ -24,7 +24,7 @@
 )
 
 gym.register(
-    id="Isaac-Deploy-GearAssembly-Rizon4s-Play-v0",
+    id="Isaac-Deploy-GearAssembly-Rizon4s-Grav-Play-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
     disable_env_checker=True,
     kwargs={
@@ -34,7 +34,7 @@
 
 # Flexiv Rizon 4s - ROS Inference
 gym.register(
-    id="Isaac-Deploy-GearAssembly-Rizon4s-ROS-Inference-v0",
+    id="Isaac-Deploy-GearAssembly-Rizon4s-Grav-ROS-Inference-v0",
     entry_point="isaaclab.envs:ManagerBasedRLEnv",
     disable_env_checker=True,
     kwargs={
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 9d1b03a614dc..3a6a1e48a29f 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -407,13 +407,13 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
         # Populate reward term parameters for EE-gear keypoint tracking
-        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+        self.rewards.end_effector_base_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.end_effector_base_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.end_effector_base_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
 
-        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
+        self.rewards.end_effector_base_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.end_effector_base_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
+        self.rewards.end_effector_base_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
 
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
index d8de9818414a..3cb8eeae956e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/ur_10e/joint_pos_env_cfg.py
@@ -385,15 +385,6 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
-        # Populate reward term parameters for EE-gear keypoint tracking
-        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
-
-        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
-
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
         self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
@@ -492,15 +483,6 @@ def __post_init__(self):
         self.events.set_robot_to_grasp_pose.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.events.set_robot_to_grasp_pose.params["gripper_joint_setter_func"] = self.gripper_joint_setter_func
 
-        # Populate reward term parameters for EE-gear keypoint tracking
-        self.rewards.ee_gear_keypoint_tracking.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
-
-        self.rewards.ee_gear_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
-        self.rewards.ee_gear_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
-        self.rewards.ee_gear_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
-
         # Populate termination term parameters
         self.terminations.gear_dropped.params["gear_offsets_grasp"] = self.gear_offsets_grasp
         self.terminations.gear_dropped.params["end_effector_body_name"] = self.end_effector_body_name
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index e6b09fc3e9ac..b696516f226b 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -263,7 +263,7 @@ class RewardsCfg:
         },
     )
 
-    ee_gear_keypoint_tracking = RewTerm(
+    end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
         weight=-0.5,
         params={
@@ -275,7 +275,7 @@ class RewardsCfg:
         },
     )
 
-    ee_gear_keypoint_tracking_exp = RewTerm(
+    end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
         weight=0.5,
         params={

From 95eeff277822df91288e3539de03ac1f808b711d Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 31 Mar 2026 16:12:57 -0700
Subject: [PATCH 56/87] Updated gear assembly doc to include flexiv.

---
 .../02_gear_assembly/gear_assembly_policy.rst | 250 +++++++++++++-----
 1 file changed, 187 insertions(+), 63 deletions(-)

diff --git a/docs/source/policy_deployment/02_gear_assembly/gear_assembly_policy.rst b/docs/source/policy_deployment/02_gear_assembly/gear_assembly_policy.rst
index de8497f181b7..b846a4df9510 100644
--- a/docs/source/policy_deployment/02_gear_assembly/gear_assembly_policy.rst
+++ b/docs/source/policy_deployment/02_gear_assembly/gear_assembly_policy.rst
@@ -8,9 +8,12 @@ This tutorial walks you through how to train a gear insertion assembly reinforce
 1. **Simulation Training in Isaac Lab**: Train the policy in a high-fidelity physics simulation with domain randomization
 2. **Real Robot Deployment with Isaac ROS**: Deploy the trained policy on real hardware using Isaac ROS and a custom ROS inference node
 
-This walkthrough covers the key principles and best practices for sim-to-real transfer using Isaac Lab, illustrated with a real-world example:
+This walkthrough covers the key principles and best practices for sim-to-real transfer using Isaac Lab.
 
-- the Gear Assembly task for the UR10e robot with the Robotiq 2F-140 gripper or 2F-85 gripper
+**Supported Robots:**
+
+- **Universal Robots UR10e**: 6-DOF industrial robot arm with Robotiq 2F-140 or 2F-85 gripper
+- **Flexiv Rizon 4s**: 7-DOF collaborative robot arm with Grav parallel gripper
 
 **Task Details:**
 
@@ -29,7 +32,7 @@ The gear assembly policy operates as follows:
 
     Sim-to-real transfer: Gear assembly policy trained in Isaac Lab (left) successfully deployed on real UR10e robot (right).
 
-This environment has been successfully deployed on real UR10e robots without an IsaacLab dependency.
+This environment has been successfully deployed on real UR10e and Flexiv Rizon 4s robots without an IsaacLab dependency.
 
 **Scope of This Tutorial:**
 
@@ -62,35 +65,42 @@ Using Real-Robot-Available Observations
 Your simulation environment should only use observations that are available on the real robot and not use "privileged" information that wouldn't be available in deployment.
 
 
-Observation Specification: Isaac-Deploy-GearAssembly-UR10e-2F140-v0
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Observation Specification
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The Gear Assembly environment uses both proprioceptive and exteroceptive (vision) observations:
 
 .. list-table:: Gear Assembly Environment Observations
-   :widths: 25 25 25 25
+   :widths: 20 15 15 25 25
    :header-rows: 1
 
    * - Observation
-     - Dimension
+     - UR10e Dim
+     - Rizon 4s Dim
      - Real-World Source
      - Noise in Training
    * - ``joint_pos``
-     - 6 (UR10e arm joints)
-     - UR10e controller
+     - 6
+     - 7
+     - Robot controller
      - None (proprioceptive)
    * - ``joint_vel``
-     - 6 (UR10e arm joints)
-     - UR10e controller
+     - 6
+     - 7
+     - Robot controller
      - None (proprioceptive)
    * - ``gear_shaft_pos``
-     - 3 (x, y, z position)
+     - 3
+     - 3
      - FoundationPose + RealSense depth
-     - ±0.005 m (5mm, estimated error from FoundationPose + RealSense depth pipeline)
+     - ±0.005 m (5mm)
    * - ``gear_shaft_quat``
-     - 4 (quaternion orientation)
+     - 4
+     - 4
      - FoundationPose + RealSense depth
-     - ±0.01 per component (~5° angular error, estimated error from FoundationPose + RealSense depth pipeline)
+     - ±0.01 per component (~5°)
+
+**Total observation dimension:** 19 (UR10e) or 21 (Rizon 4s)
 
 **Implementation:**
 
@@ -271,21 +281,59 @@ For the UR10e deployment, we use an impedance controller interface. Using a simp
 - Have more predictable behavior that's easier to replicate
 - Reduce the controller complexity as a source of sim-real gap
 
-**Example: UR10e Actuator Configuration**
-
-.. code-block:: python
-
-    # Default UR10e actuator configuration
-    actuators = {
-        "arm": ImplicitActuatorCfg(
-            joint_names_expr=["shoulder_pan_joint", "shoulder_lift_joint",
-                            "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"],
-            effort_limit=87.0,           # From UR10e specifications
-            velocity_limit=2.0,          # From UR10e specifications
-            stiffness=800.0,             # Calibrated to match real behavior
-            damping=40.0,                # Calibrated to match real behavior
-        ),
-    }
+**Actuator Configurations:**
+
+.. tab-set::
+
+    .. tab-item:: UR10e
+
+        .. code-block:: python
+
+            # Default UR10e actuator configuration
+            actuators = {
+                "arm": ImplicitActuatorCfg(
+                    joint_names_expr=["shoulder_pan_joint", "shoulder_lift_joint",
+                                    "elbow_joint", "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"],
+                    effort_limit=87.0,           # From UR10e specifications
+                    velocity_limit=2.0,          # From UR10e specifications
+                    stiffness=800.0,             # Calibrated to match real behavior
+                    damping=40.0,                # Calibrated to match real behavior
+                ),
+            }
+
+    .. tab-item:: Flexiv Rizon 4s + Grav Gripper
+
+        The Rizon 4s uses ``IdealPDActuatorCfg`` with per-joint-group tuning, plus actuators for the Grav parallel gripper:
+
+        .. code-block:: python
+
+            actuators = {
+                "shoulder": IdealPDActuatorCfg(
+                    joint_names_expr=["joint[1-2]"],
+                    effort_limit=123.0, velocity_limit=2.094,
+                    stiffness=6000.0, damping=108.4,
+                ),
+                "elbow": IdealPDActuatorCfg(
+                    joint_names_expr=["joint[3-4]"],
+                    effort_limit=64.0, velocity_limit=2.443,
+                    stiffness=4200.0, damping=90.7,
+                ),
+                "wrist": IdealPDActuatorCfg(
+                    joint_names_expr=["joint[5-7]"],
+                    effort_limit=39.0, velocity_limit=4.887,
+                    stiffness=1500.0, damping=54.2,
+                ),
+                "gripper_drive": IdealPDActuatorCfg(
+                    joint_names_expr=["finger_joint"],
+                    effort_limit=2.0, velocity_limit=1.0,
+                    stiffness=2e3, damping=1e1,
+                ),
+                "gripper_passive": IdealPDActuatorCfg(
+                    joint_names_expr=[".*_knuckle_joint"],
+                    effort_limit=1.0, velocity_limit=1.0,
+                    stiffness=0.0, damping=0.0,
+                ),
+            }
 
 **Domain Randomization of Actuator Parameters**
 
@@ -330,6 +378,10 @@ To quantify this behavior, we plotted the step response of the impedance control
 
 **Why Joint Friction Matters**: Without modeling joint friction in simulation, the policy learns to expect that commanded joint positions are always reached. On the real robot, stiction prevents small movements and causes steady-state errors. By adding friction during training, the policy learns to account for these effects and commands appropriately larger motions to overcome friction.
 
+.. note::
+
+    **Flexiv Rizon 4s**: Domain randomization for actuator gains and joint friction is currently disabled for the Rizon 4s gear assembly configuration while the base simulation is being stabilized. Once the base simulation is verified, these can be re-enabled in the ``EventCfg`` class in ``config/rizon_4s/joint_pos_env_cfg.py``.
+
 **Compensating for Stiction with Action Scaling:**
 
 To help the policy overcome stiction on the real robot, we also increased the output action scaling. The Isaac ROS documentation notes that a higher action scale (0.0325 vs 0.025) is needed to overcome the higher static friction (stiction) compared to the 2F-85 gripper. This increased scaling ensures the policy commands are large enough to overcome the friction forces observed in the step response analysis.
@@ -339,20 +391,41 @@ Action Space Design
 
 Your action space should match what the real robot controller can execute. For this task we found that **incremental joint position control** is the most reliable approach.
 
-**Example: Gear Assembly Action Configuration**
+**Action Configuration:**
 
-.. code-block:: python
+.. tab-set::
 
-    # For contact-rich manipulation, smaller action scale for more precise control
-    self.joint_action_scale = 0.025  # ±2.5 degrees per step
+    .. tab-item:: UR10e
 
-    self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
-        asset_name="robot",
-        joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint",
-                    "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"],
-        scale=self.joint_action_scale,
-        use_zero_offset=True,
-    )
+        .. code-block:: python
+
+            self.joint_action_scale = 0.025  # ±1.4 degrees per step
+
+            self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
+                asset_name="robot",
+                joint_names=["shoulder_pan_joint", "shoulder_lift_joint", "elbow_joint",
+                            "wrist_1_joint", "wrist_2_joint", "wrist_3_joint"],
+                scale=self.joint_action_scale,
+                use_zero_offset=True,
+            )
+
+        **Action dimension:** 6
+
+    .. tab-item:: Flexiv Rizon 4s
+
+        .. code-block:: python
+
+            self.joint_action_scale = 0.025  # ±1.4 degrees per step
+
+            self.actions.arm_action = mdp.RelativeJointPositionActionCfg(
+                asset_name="robot",
+                joint_names=["joint1", "joint2", "joint3", "joint4",
+                            "joint5", "joint6", "joint7"],
+                scale=self.joint_action_scale,
+                use_zero_offset=True,
+            )
+
+        **Action dimension:** 7
 
 The action scale is a critical hyperparameter that should be tuned based on:
 
@@ -427,16 +500,34 @@ Step 1: Visualize the Environment
 
 First, launch the training with a small number of environments and visualization enabled to verify that the environment is set up correctly:
 
-.. code-block:: bash
+.. tab-set::
 
-    # Launch training with visualization
-    python scripts/reinforcement_learning/rsl_rl/train.py \
-        --task Isaac-Deploy-GearAssembly-UR10e-2F140-v0 \
-        --num_envs 4
+    .. tab-item:: UR10e (2F-140)
 
-.. note::
+        .. code-block:: bash
 
-   For the Robotiq 2F-85 gripper, use ``--task Isaac-Deploy-GearAssembly-UR10e-2F85-v0`` instead.
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-UR10e-2F140-ROS-Inference-v0 \
+                --num_envs 4 \
+                --visualizer kit
+
+    .. tab-item:: UR10e (2F-85)
+
+        .. code-block:: bash
+
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-UR10e-2F85-ROS-Inference-v0 \
+                --num_envs 4 \
+                --visualizer kit
+
+    .. tab-item:: Flexiv Rizon 4s + Grav
+
+        .. code-block:: bash
+
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-Rizon4s-Grav-ROS-Inference-v0 \
+                --num_envs 4 \
+                --visualizer kit
 
 This will open the Isaac Sim viewer where you can observe the training process in real-time.
 
@@ -456,21 +547,44 @@ Step 2: Full-Scale Training with Video Recording
 
 Now launch the full training run with more parallel environments in headless mode for faster training. We'll also enable video recording to monitor progress:
 
-.. code-block:: bash
+.. tab-set::
+
+    .. tab-item:: UR10e (2F-140)
+
+        .. code-block:: bash
+
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-UR10e-2F140-ROS-Inference-v0 \
+                --headless \
+                --num_envs 256 \
+                --video --video_length 200 --video_interval 76800
+
+    .. tab-item:: UR10e (2F-85)
 
-    # Full training with video recording
-    python scripts/reinforcement_learning/rsl_rl/train.py \
-        --task Isaac-Deploy-GearAssembly-UR10e-2F140-v0 \
-        --headless \
-        --num_envs 256 \
-        --video --video_length 800 --video_interval 5000
+        .. code-block:: bash
 
-This command will:
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-UR10e-2F85-ROS-Inference-v0 \
+                --headless \
+                --num_envs 256 \
+                --video --video_length 200 --video_interval 76800
 
-- Run 256 parallel environments for efficient training
-- Run in headless mode (no visualization) for maximum performance
-- Record videos every 5000 steps to monitor training progress
-- Save videos with 800 frames each
+    .. tab-item:: Flexiv Rizon 4s + Grav
+
+        .. code-block:: bash
+
+            python scripts/reinforcement_learning/rsl_rl/train.py \
+                --task Isaac-Deploy-GearAssembly-Rizon4s-Grav-ROS-Inference-v0 \
+                --headless \
+                --num_envs 256 \
+                --video --video_length 200 --video_interval 76800
+
+**Command breakdown:**
+
+- ``--headless``: Disables visualization for maximum training speed
+- ``--num_envs 256``: Runs 256 parallel environments for efficient training
+- ``--video_length 200``: Each video captures approximately one full episode (``episode_length_s / (sim.dt * decimation)`` = ``6.66 / (1/1000 * 33)`` ≈ 200 steps)
+- ``--video_interval 76800``: Records a video every 76,800 environment steps (~every 150 iterations), producing ~10 videos over full training
 
 Training typically takes ~12-24 hours for a robust insertion policy. The videos will be saved in the ``logs`` directory and can be reviewed to assess policy performance during training.
 
@@ -483,11 +597,21 @@ Training typically takes ~12-24 hours for a robust insertion policy. The videos
 
 You can monitor training metrics in real-time using TensorBoard. Open a new terminal and run:
 
-.. code-block:: bash
+.. tab-set::
+
+    .. tab-item:: UR10e
+
+        .. code-block:: bash
+
+            ./isaaclab.sh -p -m tensorboard.main --logdir logs/rsl_rl/gear_assembly_ur10e
+
+    .. tab-item:: Flexiv Rizon 4s
+
+        .. code-block:: bash
 
-    ./isaaclab.sh -p -m tensorboard.main --logdir <log_dir>
+            ./isaaclab.sh -p -m tensorboard.main --logdir logs/rsl_rl/gear_assembly_rizon4s_grav
 
-Replace ``<log_dir>`` with the path to your training logs (e.g., ``logs/rsl_rl/gear_assembly_ur10e/2025-11-19_19-31-01``). TensorBoard will display plots showing rewards, episode lengths, and other metrics. Verify that the rewards are increasing over iterations to ensure the policy is learning successfully.
+Replace the log directory path with your actual training log location if different. TensorBoard will display plots showing rewards, episode lengths, and other metrics. Verify that the rewards are increasing over iterations to ensure the policy is learning successfully.
 
 
 Step 3: Deploy on Real Robot

From a64389c0977b7042ea653ca9c72dc63b81626bc7 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 31 Mar 2026 16:19:30 -0700
Subject: [PATCH 57/87] Formatting fix.

---
 .../deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 3a6a1e48a29f..83a413d975da 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -411,7 +411,9 @@ def __post_init__(self):
         self.rewards.end_effector_base_keypoint_tracking.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.rewards.end_effector_base_keypoint_tracking.params["gear_offsets_grasp"] = self.gear_offsets_grasp
 
-        self.rewards.end_effector_base_keypoint_tracking_exp.params["end_effector_body_name"] = self.end_effector_body_name
+        self.rewards.end_effector_base_keypoint_tracking_exp.params["end_effector_body_name"] = (
+            self.end_effector_body_name
+        )
         self.rewards.end_effector_base_keypoint_tracking_exp.params["grasp_rot_offset"] = self.grasp_rot_offset
         self.rewards.end_effector_base_keypoint_tracking_exp.params["gear_offsets_grasp"] = self.gear_offsets_grasp
 

From ee8328d479cab2b88c69524ad0ca50393ce2143a Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Sat, 4 Apr 2026 17:49:48 -0700
Subject: [PATCH 58/87] Moved flexiv gear insertion to implicit actuator and
 previous working PD values.

---
 .../isaaclab_assets/robots/flexiv.py          | 48 +++++++++----------
 .../config/rizon_4s/joint_pos_env_cfg.py      | 14 +++---
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 15d2a9c267a8..18adf4312022 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -14,10 +14,8 @@
 Reference: https://www.flexiv.com/product/rizon
 """
 
-import math
-
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import IdealPDActuatorCfg, ImplicitActuatorCfg
+from isaaclab.actuators import ImplicitActuatorCfg
 from isaaclab.assets.articulation import ArticulationCfg
 from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
 
@@ -116,46 +114,46 @@
         rot=(0.0, 0.0, 0.0, 1.0),
     ),
     actuators={
-        "shoulder": IdealPDActuatorCfg(
+        "shoulder": ImplicitActuatorCfg(
             joint_names_expr=["joint[1-2]"],
-            effort_limit=123.0,
-            velocity_limit=2.094,
-            stiffness=6000.0,
-            damping=108.4,
+            effort_limit_sim=123.0,
+            velocity_limit_sim=2.094,
+            stiffness=1320.0,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
-        "elbow": IdealPDActuatorCfg(
+        "elbow": ImplicitActuatorCfg(
             joint_names_expr=["joint[3-4]"],
-            effort_limit=64.0,
-            velocity_limit=2.443,
-            stiffness=4200.0,
-            damping=90.7,
+            effort_limit_sim=64.0,
+            velocity_limit_sim=2.443,
+            stiffness=600.0,
+            damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
-        "wrist": IdealPDActuatorCfg(
+        "wrist": ImplicitActuatorCfg(
             joint_names_expr=["joint[5-7]"],
-            effort_limit=39.0,
-            velocity_limit=4.887,
-            stiffness=1500.0,
-            damping=54.2,
+            effort_limit_sim=39.0,
+            velocity_limit_sim=4.887,
+            stiffness=216.0,
+            damping=29.0,
             friction=0.0,
             armature=0.0,
         ),
-        "gripper_drive": IdealPDActuatorCfg(
+        "gripper_drive": ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit=200.0,
-            velocity_limit=0.6,
+            effort_limit_sim=200.0,
+            velocity_limit_sim=0.6,
             stiffness=2e3,
             damping=1e1,
             friction=0.0,
             armature=0.0,
         ),
-        "gripper_passive": IdealPDActuatorCfg(
+        "gripper_passive": ImplicitActuatorCfg(
             joint_names_expr=[".*_knuckle_joint"],
-            effort_limit=1.0,
-            velocity_limit=1.0,
+            effort_limit_sim=1.0,
+            velocity_limit_sim=1.0,
             stiffness=0.0,
             damping=0.0,
             friction=0.0,
@@ -163,7 +161,7 @@
         ),
     },
 )
-"""Configuration of Flexiv Rizon 4s arm with Grav gripper using IdealPD actuator models.
+"""Configuration of Flexiv Rizon 4s arm with Grav gripper using implicit actuator models.
 
 The Grav gripper is a parallel gripper with the following joint configuration:
 - finger_joint: Main actuation joint (opened: 45 deg, closed: -8.88 deg)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
index 83a413d975da..c93a78ee6da9 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/joint_pos_env_cfg.py
@@ -8,7 +8,7 @@
 import torch
 
 import isaaclab.sim as sim_utils
-from isaaclab.actuators import IdealPDActuatorCfg
+from isaaclab.actuators import ImplicitActuatorCfg
 from isaaclab.assets import ArticulationCfg, RigidObjectCfg
 from isaaclab.managers import EventTermCfg as EventTerm
 from isaaclab.managers import SceneEntityCfg
@@ -329,10 +329,10 @@ def __post_init__(self):
         )
 
         # Grav gripper actuator configuration for gear manipulation
-        self.scene.robot.actuators["gripper_drive"] = IdealPDActuatorCfg(
+        self.scene.robot.actuators["gripper_drive"] = ImplicitActuatorCfg(
             joint_names_expr=["finger_joint"],
-            effort_limit=2.0,
-            velocity_limit=1.0,
+            effort_limit_sim=2.0,
+            velocity_limit_sim=1.0,
             stiffness=2e3,
             damping=1e1,
             friction=0.0,
@@ -340,10 +340,10 @@ def __post_init__(self):
         )
 
         # Passive/mimic joints in the gripper - set to zero stiffness/damping
-        self.scene.robot.actuators["gripper_passive"] = IdealPDActuatorCfg(
+        self.scene.robot.actuators["gripper_passive"] = ImplicitActuatorCfg(
             joint_names_expr=[".*_knuckle_joint"],
-            effort_limit=1.0,
-            velocity_limit=1.0,
+            effort_limit_sim=1.0,
+            velocity_limit_sim=1.0,
             stiffness=0.0,
             damping=0.0,
             friction=0.0,

From 5165744889cc774323d2cd94bd21e0879cb90eed Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Sat, 4 Apr 2026 17:57:53 -0700
Subject: [PATCH 59/87] Formatting fix.

---
 scripts/reinforcement_learning/rsl_rl/eval.py | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/reinforcement_learning/rsl_rl/eval.py b/scripts/reinforcement_learning/rsl_rl/eval.py
index 865e2a5c466d..cb18ee489023 100644
--- a/scripts/reinforcement_learning/rsl_rl/eval.py
+++ b/scripts/reinforcement_learning/rsl_rl/eval.py
@@ -175,9 +175,9 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     extras_log_sums: dict[str, list[float]] = defaultdict(list)
 
     # -- evaluation loop ----------------------------------------------------------------
-    print(f"\n{'='*70}")
+    print(f"\n{'=' * 70}")
     print(f"  Evaluating for {total_episodes_target} episodes  |  {num_envs} parallel envs")
-    print(f"{'='*70}\n")
+    print(f"{'=' * 70}\n")
 
     obs = env.get_observations()
     start_time = time.time()
@@ -265,20 +265,20 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     step_dt = unwrapped.step_dt
     max_ep_len = unwrapped.max_episode_length
 
-    print(f"\n{'='*70}")
+    print(f"\n{'=' * 70}")
     print(f"  EVALUATION RESULTS  ({n} episodes, {elapsed_total:.1f}s)")
-    print(f"{'='*70}")
+    print(f"{'=' * 70}")
     print(f"  Checkpoint : {resume_path}")
     print(f"  Num envs   : {num_envs}")
     print(f"  Step dt    : {step_dt:.4f}s   |  Max episode steps: {max_ep_len}")
 
-    print(f"\n  --- Episode Returns ---")
+    print("\n  --- Episode Returns ---")
     print(f"    Mean   : {returns_t.mean().item():.4f}")
     print(f"    Std    : {returns_t.std().item():.4f}")
     print(f"    Min    : {returns_t.min().item():.4f}")
     print(f"    Max    : {returns_t.max().item():.4f}")
 
-    print(f"\n  --- Episode Length (steps) ---")
+    print("\n  --- Episode Length (steps) ---")
     print(f"    Mean   : {lengths_t.mean().item():.1f}")
     print(f"    Std    : {lengths_t.std().item():.1f}")
     print(f"    Min    : {lengths_t.min().item():.0f}")
@@ -288,21 +288,21 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
         print(f"\n  --- Success Rate (term: '{success_term}') ---")
         print(f"    Success: {success_count}/{n}  ({100.0 * success_count / n:.1f}%)")
 
-    print(f"\n  --- Termination Breakdown ---")
+    print("\n  --- Termination Breakdown ---")
     print(f"    Timeouts       : {timeout_count}/{n}  ({100.0 * timeout_count / n:.1f}%)")
     for t_name in term_names:
         cnt = termination_counts.get(t_name, 0)
         print(f"    {t_name:30s}: {cnt}/{n}  ({100.0 * cnt / n:.1f}%)")
 
     if reward_term_names:
-        print(f"\n  --- Mean Episodic Reward Terms ---")
+        print("\n  --- Mean Episodic Reward Terms ---")
         for name in reward_term_names:
             vals = reward_term_sums[name]
             mean_val = sum(vals) / len(vals) if vals else 0.0
             print(f"    {name:40s}: {mean_val:.6f}")
 
     if extras_log_sums:
-        print(f"\n  --- Mean Extras/Log Metrics (per-step averages) ---")
+        print("\n  --- Mean Extras/Log Metrics (per-step averages) ---")
         for key in sorted(extras_log_sums.keys()):
             # skip Episode_Reward/Episode_Termination keys (already covered above)
             if key.startswith("Episode_Reward/") or key.startswith("Episode_Termination/"):
@@ -311,7 +311,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
             mean_val = sum(vals) / len(vals) if vals else 0.0
             print(f"    {key:50s}: {mean_val:.6f}")
 
-    print(f"\n{'='*70}\n")
+    print(f"\n{'=' * 70}\n")
 
     env.close()
 

From 28c6b9ed7d1c547fdb31a0d1e43f2dd5a2b19ae8 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Sat, 4 Apr 2026 18:02:38 -0700
Subject: [PATCH 60/87] Reverted unnecessary change.

---
 .../isaaclab_tasks/direct/automate/factory_control.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py b/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
index cea15cdf6946..2d7d98e0fb0e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/automate/factory_control.py
@@ -116,7 +116,7 @@ def get_pose_error(
 
         fingertip_midpoint_quat_norm = quat_mul(fingertip_midpoint_quat, quat_conjugate(fingertip_midpoint_quat))[
             :, 3
-        ]  # scalar component (w component in XYZW format)
+        ]  # W component is at index 3 in XYZW format
         fingertip_midpoint_quat_inv = quat_conjugate(fingertip_midpoint_quat) / fingertip_midpoint_quat_norm.unsqueeze(
             -1
         )

From 458baf1dfe73a121d2f9ba9bd1ca863ef00795a0 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Sun, 5 Apr 2026 18:20:10 -0700
Subject: [PATCH 61/87] Removed eval script.

---
 scripts/reinforcement_learning/rsl_rl/eval.py | 321 ------------------
 1 file changed, 321 deletions(-)
 delete mode 100644 scripts/reinforcement_learning/rsl_rl/eval.py

diff --git a/scripts/reinforcement_learning/rsl_rl/eval.py b/scripts/reinforcement_learning/rsl_rl/eval.py
deleted file mode 100644
index cb18ee489023..000000000000
--- a/scripts/reinforcement_learning/rsl_rl/eval.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
-# All rights reserved.
-#
-# SPDX-License-Identifier: BSD-3-Clause
-
-"""Script to evaluate a trained RSL-RL policy over N episodes and report success rate and metrics."""
-
-"""Launch Isaac Sim Simulator first."""
-
-import argparse
-import sys
-
-from isaaclab.app import AppLauncher
-
-# local imports
-import cli_args  # isort: skip
-
-# add argparse arguments
-parser = argparse.ArgumentParser(description="Evaluate a trained RSL-RL policy.")
-parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
-parser.add_argument("--task", type=str, default=None, help="Name of the task.")
-parser.add_argument(
-    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
-)
-parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment.")
-parser.add_argument("--num_episodes", type=int, default=100, help="Total number of episodes to evaluate.")
-parser.add_argument(
-    "--success_term",
-    type=str,
-    default=None,
-    help=(
-        "Name of a termination term that indicates task success. "
-        "If not set, only timeout vs. early-termination statistics are reported."
-    ),
-)
-parser.add_argument(
-    "--use_pretrained_checkpoint",
-    action="store_true",
-    help="Use the pre-trained checkpoint from Nucleus.",
-)
-# append RSL-RL cli arguments
-cli_args.add_rsl_rl_args(parser)
-# append AppLauncher cli args
-AppLauncher.add_app_launcher_args(parser)
-args_cli, hydra_args = parser.parse_known_args()
-
-# clear out sys.argv for Hydra
-sys.argv = [sys.argv[0]] + hydra_args
-
-# launch omniverse app
-app_launcher = AppLauncher(args_cli)
-simulation_app = app_launcher.app
-
-"""Rest everything follows."""
-
-import os
-import time
-from collections import defaultdict
-
-import gymnasium as gym
-import torch
-from rsl_rl.runners import DistillationRunner, OnPolicyRunner
-
-from isaaclab.envs import (
-    DirectMARLEnv,
-    DirectMARLEnvCfg,
-    DirectRLEnvCfg,
-    ManagerBasedRLEnvCfg,
-    multi_agent_to_single_agent,
-)
-from isaaclab.utils.assets import retrieve_file_path
-
-from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper
-from isaaclab_rl.utils.pretrained_checkpoint import get_published_pretrained_checkpoint
-
-import isaaclab_tasks  # noqa: F401
-from isaaclab_tasks.utils import get_checkpoint_path
-from isaaclab_tasks.utils.hydra import hydra_task_config
-
-
-@hydra_task_config(args_cli.task, args_cli.agent)
-def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
-    """Evaluate a trained RSL-RL policy."""
-    # -- resolve task name and checkpoint -----------------------------------------------
-    task_name = args_cli.task.split(":")[-1]
-    train_task_name = task_name.replace("-Play", "")
-
-    agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
-    env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
-    env_cfg.seed = agent_cfg.seed
-    env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
-
-    log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
-    log_root_path = os.path.abspath(log_root_path)
-    print(f"[INFO] Loading experiment from directory: {log_root_path}")
-
-    if args_cli.use_pretrained_checkpoint:
-        resume_path = get_published_pretrained_checkpoint("rsl_rl", train_task_name)
-        if not resume_path:
-            print("[INFO] Unfortunately a pre-trained checkpoint is currently unavailable for this task.")
-            return
-    elif args_cli.checkpoint:
-        resume_path = retrieve_file_path(args_cli.checkpoint)
-    else:
-        resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
-
-    log_dir = os.path.dirname(resume_path)
-    env_cfg.log_dir = log_dir
-
-    # -- create environment -------------------------------------------------------------
-    env = gym.make(args_cli.task, cfg=env_cfg)
-
-    if isinstance(env.unwrapped, DirectMARLEnv):
-        env = multi_agent_to_single_agent(env)
-
-    env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
-    num_envs = env.unwrapped.num_envs
-
-    # -- load policy --------------------------------------------------------------------
-    print(f"[INFO]: Loading model checkpoint from: {resume_path}")
-    if agent_cfg.class_name == "OnPolicyRunner":
-        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    elif agent_cfg.class_name == "DistillationRunner":
-        runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
-    else:
-        raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
-    runner.load(resume_path)
-
-    policy = runner.get_inference_policy(device=env.unwrapped.device)
-    try:
-        policy_nn = runner.alg.policy
-    except AttributeError:
-        policy_nn = runner.alg.actor_critic
-
-    # -- resolve which termination terms exist ------------------------------------------
-    unwrapped = env.unwrapped
-    has_termination_manager = hasattr(unwrapped, "termination_manager")
-    term_names: list[str] = []
-    if has_termination_manager:
-        term_names = unwrapped.termination_manager.active_terms
-        print(f"[INFO] Termination terms: {term_names}")
-
-    success_term = args_cli.success_term
-    if success_term and success_term not in term_names:
-        print(
-            f"[WARN] --success_term '{success_term}' not found among termination terms {term_names}. "
-            "Success rate will not be tracked."
-        )
-        success_term = None
-
-    has_reward_manager = hasattr(unwrapped, "reward_manager")
-    reward_term_names: list[str] = []
-    if has_reward_manager:
-        reward_term_names = unwrapped.reward_manager.active_terms
-        print(f"[INFO] Reward terms: {reward_term_names}")
-
-    # -- tracking state -----------------------------------------------------------------
-    total_episodes_target = args_cli.num_episodes
-    completed_episodes = 0
-
-    # per-env accumulators (reset when that env's episode ends)
-    ep_returns = torch.zeros(num_envs, device=unwrapped.device)
-    ep_lengths = torch.zeros(num_envs, device=unwrapped.device, dtype=torch.long)
-    ep_reward_terms = {name: torch.zeros(num_envs, device=unwrapped.device) for name in reward_term_names}
-
-    # aggregate stats across finished episodes
-    all_returns: list[float] = []
-    all_lengths: list[int] = []
-    termination_counts: dict[str, int] = defaultdict(int)
-    timeout_count = 0
-    success_count = 0
-    reward_term_sums: dict[str, list[float]] = {name: [] for name in reward_term_names}
-
-    # extras-log keys seen across episodes (scalars injected by reward terms)
-    extras_log_sums: dict[str, list[float]] = defaultdict(list)
-
-    # -- evaluation loop ----------------------------------------------------------------
-    print(f"\n{'=' * 70}")
-    print(f"  Evaluating for {total_episodes_target} episodes  |  {num_envs} parallel envs")
-    print(f"{'=' * 70}\n")
-
-    obs = env.get_observations()
-    start_time = time.time()
-
-    while completed_episodes < total_episodes_target and simulation_app.is_running():
-        with torch.inference_mode():
-            actions = policy(obs)
-            obs, rewards, dones, extras = env.step(actions)
-            policy_nn.reset(dones)
-
-        ep_returns += rewards
-        ep_lengths += 1
-
-        # accumulate per-term rewards if the manager exposes step_reward
-        if has_reward_manager:
-            step_reward = unwrapped.reward_manager._step_reward  # (num_envs, num_terms)
-            for idx, name in enumerate(reward_term_names):
-                ep_reward_terms[name] += step_reward[:, idx]
-
-        # find which envs finished this step
-        done_mask = dones.bool()
-        done_ids = done_mask.nonzero(as_tuple=False).squeeze(-1)
-
-        if done_ids.numel() > 0:
-            for env_id in done_ids.tolist():
-                if completed_episodes >= total_episodes_target:
-                    break
-
-                # record episode stats
-                all_returns.append(ep_returns[env_id].item())
-                all_lengths.append(ep_lengths[env_id].item())
-
-                for name in reward_term_names:
-                    reward_term_sums[name].append(ep_reward_terms[name][env_id].item())
-
-                # determine termination cause
-                if has_termination_manager:
-                    for t_name in term_names:
-                        term_val = unwrapped.termination_manager.get_term(t_name)
-                        if term_val[env_id]:
-                            termination_counts[t_name] += 1
-
-                if extras.get("time_outs") is not None and extras["time_outs"][env_id]:
-                    timeout_count += 1
-
-                if success_term:
-                    term_val = unwrapped.termination_manager.get_term(success_term)
-                    if term_val[env_id]:
-                        success_count += 1
-
-                completed_episodes += 1
-
-                # reset per-env accumulators
-                ep_returns[env_id] = 0.0
-                ep_lengths[env_id] = 0
-                for name in reward_term_names:
-                    ep_reward_terms[name][env_id] = 0.0
-
-        # collect extras["log"] scalars (these are per-step averages across envs)
-        if "log" in extras:
-            for key, val in extras["log"].items():
-                if isinstance(val, (int, float)):
-                    extras_log_sums[key].append(val)
-
-        # periodic progress
-        if completed_episodes > 0 and completed_episodes % max(1, total_episodes_target // 10) == 0:
-            elapsed = time.time() - start_time
-            print(
-                f"  [{completed_episodes}/{total_episodes_target}] episodes  "
-                f"| mean return: {sum(all_returns) / len(all_returns):.3f}  "
-                f"| elapsed: {elapsed:.1f}s"
-            )
-
-    elapsed_total = time.time() - start_time
-
-    # -- report -------------------------------------------------------------------------
-    n = len(all_returns)
-    if n == 0:
-        print("[WARN] No episodes completed.")
-        env.close()
-        return
-
-    returns_t = torch.tensor(all_returns)
-    lengths_t = torch.tensor(all_lengths, dtype=torch.float)
-    step_dt = unwrapped.step_dt
-    max_ep_len = unwrapped.max_episode_length
-
-    print(f"\n{'=' * 70}")
-    print(f"  EVALUATION RESULTS  ({n} episodes, {elapsed_total:.1f}s)")
-    print(f"{'=' * 70}")
-    print(f"  Checkpoint : {resume_path}")
-    print(f"  Num envs   : {num_envs}")
-    print(f"  Step dt    : {step_dt:.4f}s   |  Max episode steps: {max_ep_len}")
-
-    print("\n  --- Episode Returns ---")
-    print(f"    Mean   : {returns_t.mean().item():.4f}")
-    print(f"    Std    : {returns_t.std().item():.4f}")
-    print(f"    Min    : {returns_t.min().item():.4f}")
-    print(f"    Max    : {returns_t.max().item():.4f}")
-
-    print("\n  --- Episode Length (steps) ---")
-    print(f"    Mean   : {lengths_t.mean().item():.1f}")
-    print(f"    Std    : {lengths_t.std().item():.1f}")
-    print(f"    Min    : {lengths_t.min().item():.0f}")
-    print(f"    Max    : {lengths_t.max().item():.0f}")
-
-    if success_term:
-        print(f"\n  --- Success Rate (term: '{success_term}') ---")
-        print(f"    Success: {success_count}/{n}  ({100.0 * success_count / n:.1f}%)")
-
-    print("\n  --- Termination Breakdown ---")
-    print(f"    Timeouts       : {timeout_count}/{n}  ({100.0 * timeout_count / n:.1f}%)")
-    for t_name in term_names:
-        cnt = termination_counts.get(t_name, 0)
-        print(f"    {t_name:30s}: {cnt}/{n}  ({100.0 * cnt / n:.1f}%)")
-
-    if reward_term_names:
-        print("\n  --- Mean Episodic Reward Terms ---")
-        for name in reward_term_names:
-            vals = reward_term_sums[name]
-            mean_val = sum(vals) / len(vals) if vals else 0.0
-            print(f"    {name:40s}: {mean_val:.6f}")
-
-    if extras_log_sums:
-        print("\n  --- Mean Extras/Log Metrics (per-step averages) ---")
-        for key in sorted(extras_log_sums.keys()):
-            # skip Episode_Reward/Episode_Termination keys (already covered above)
-            if key.startswith("Episode_Reward/") or key.startswith("Episode_Termination/"):
-                continue
-            vals = extras_log_sums[key]
-            mean_val = sum(vals) / len(vals) if vals else 0.0
-            print(f"    {key:50s}: {mean_val:.6f}")
-
-    print(f"\n{'=' * 70}\n")
-
-    env.close()
-
-
-if __name__ == "__main__":
-    main()
-    simulation_app.close()

From c6e2e77db074f470d8931129ad560ad761cedefe Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 10:40:35 -0700
Subject: [PATCH 62/87] Disabled gravity on rizon 4s with grav.

---
 source/isaaclab_assets/isaaclab_assets/robots/flexiv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 18adf4312022..43d98e434fce 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -87,7 +87,7 @@
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAACLAB_NUCLEUS_DIR}/Robots/Flexiv/Rizon4s/rizon4s_with_grav.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False,
+            disable_gravity=True,
             max_depenetration_velocity=5.0,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(

From 63f6c320124c618808991836dd34b9df5b7c3527 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 10:55:00 -0700
Subject: [PATCH 63/87] Fixed PD values.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 43d98e434fce..4b134b43dcd0 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -118,8 +118,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=1320.0,
-            damping=72.0,
+            stiffness=6000.0,
+            damping=108.5,
             friction=0.0,
             armature=0.0,
         ),
@@ -127,8 +127,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=600.0,
-            damping=35.0,
+            stiffness=4200.0,
+            damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
@@ -136,8 +136,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=216.0,
-            damping=29.0,
+            stiffness=1500.0,
+            damping=54.2,
             friction=0.0,
             armature=0.0,
         ),

From 8c5b4a5027619b82ad7f88c914ce38744c6c6391 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 10:58:23 -0700
Subject: [PATCH 64/87] Enabled action reward.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b696516f226b..5e5bef1daef8 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -290,7 +290,7 @@ class RewardsCfg:
     )
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 5a1ccfee87d36e062941a11557938ccd994979dd Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 11:08:34 -0700
Subject: [PATCH 65/87] Zero wt for ee vs gear base reward terms.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 5e5bef1daef8..f223bad8515e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.5,
+        weight=-0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.5,
+        weight=0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From 795c4258973f5303baba7ba86d5533f77e4e5096 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 11:11:03 -0700
Subject: [PATCH 66/87] Reverted PD values.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 4b134b43dcd0..43d98e434fce 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -118,8 +118,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=6000.0,
-            damping=108.5,
+            stiffness=1320.0,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -127,8 +127,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=4200.0,
-            damping=90.7,
+            stiffness=600.0,
+            damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -136,8 +136,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=1500.0,
-            damping=54.2,
+            stiffness=216.0,
+            damping=29.0,
             friction=0.0,
             armature=0.0,
         ),

From 955727ab07ff5bf3dfae9a1fa82700eaff20efba Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Mon, 6 Apr 2026 11:12:22 -0700
Subject: [PATCH 67/87] Non-zero wts for ee vs gear base reward terms.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index f223bad8515e..5e5bef1daef8 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.0,
+        weight=-0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.0,
+        weight=0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From ed163b71dbcde660fe76a7dbeec1a1671ddec7d5 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 12:27:31 -0700
Subject: [PATCH 68/87] Debug tools by hardcoding.

---
 .../rsl_rl/play_hardcoded.py                  | 297 ++++++++++++++++++
 .../gear_assembly/config/rizon_4s/__init__.py |  11 +
 .../rizon_4s/hardcoded_inference_env_cfg.py   |  95 ++++++
 3 files changed, 403 insertions(+)
 create mode 100644 scripts/reinforcement_learning/rsl_rl/play_hardcoded.py
 create mode 100644 source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/hardcoded_inference_env_cfg.py

diff --git a/scripts/reinforcement_learning/rsl_rl/play_hardcoded.py b/scripts/reinforcement_learning/rsl_rl/play_hardcoded.py
new file mode 100644
index 000000000000..717ce3e4d09f
--- /dev/null
+++ b/scripts/reinforcement_learning/rsl_rl/play_hardcoded.py
@@ -0,0 +1,297 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Script to run hardcoded / deterministic inference with an RSL-RL agent.
+
+This is a variant of ``play.py`` designed for the hardcoded-inference
+environment where all randomization is disabled.  The IK grasp-pose solver
+is still active so the robot properly grasps the gear; with every random
+input zeroed out the result is identical on every reset.
+
+Features on top of the standard play loop:
+
+1. **Observation overrides** – hardcode the shaft-position and/or shaft-
+   quaternion portions of the observation tensor so the policy always sees
+   fixed values regardless of the sim state.  Edit the constants in the
+   *OBSERVATION OVERRIDES* section below.
+
+2. **Debug prints** – the initial observation is printed broken down by
+   component so you can verify the values the policy will see.
+
+3. **Old checkpoint support** – automatically converts pre-v5 rsl-rl
+   checkpoints (single ``model_state_dict``) to the v5 split format.
+
+The gear type, gear base pose, and gear height are configured in the
+companion env-config file::
+
+    .../gear_assembly/config/rizon_4s/hardcoded_inference_env_cfg.py
+
+Usage::
+
+    python scripts/reinforcement_learning/rsl_rl/play_hardcoded.py \\
+        --num_envs 1 \\
+        --checkpoint logs/rsl_rl/gear_assembly/2026-03-13_16-28-11/model_500.pt \\
+        --visualizer kit
+"""
+
+import argparse
+import contextlib
+import importlib.metadata as metadata
+import os
+import sys
+import time
+
+import gymnasium as gym
+import torch
+from packaging import version
+from rsl_rl.runners import DistillationRunner, OnPolicyRunner
+
+from isaaclab.envs import DirectMARLEnvCfg, DirectRLEnvCfg, ManagerBasedRLEnvCfg
+from isaaclab.utils.assets import retrieve_file_path
+
+from isaaclab_rl.rsl_rl import (
+    RslRlBaseRunnerCfg,
+    RslRlVecEnvWrapper,
+    handle_deprecated_rsl_rl_cfg,
+)
+
+import isaaclab_tasks  # noqa: F401
+from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation
+from isaaclab_tasks.utils.hydra import hydra_task_config
+
+# local imports
+import cli_args  # isort: skip
+
+# PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
+
+
+# ╔══════════════════════════════════════════════════════════════════════════════╗
+# ║  OBSERVATION OVERRIDES                                                      ║
+# ║  Set a value to inject a fixed observation every step, or leave as None     ║
+# ║  to use the value computed from the simulation.                             ║
+# ║                                                                             ║
+# ║  Obs layout: [joint_pos(7) | joint_vel(7) | shaft_pos(3) | shaft_quat(4)]  ║
+# ╚══════════════════════════════════════════════════════════════════════════════╝
+
+OVERRIDE_SHAFT_POS = None   # e.g. [0.481, -0.073, -0.005]
+OVERRIDE_SHAFT_QUAT = None  # e.g. [0.0, 0.0, 0.70711, -0.70711]
+
+_SHAFT_POS_SLICE = slice(14, 17)
+_SHAFT_QUAT_SLICE = slice(17, 21)
+
+
+# -- argparse ----------------------------------------------------------------
+parser = argparse.ArgumentParser(description="Run hardcoded deterministic inference with RSL-RL.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to simulate.")
+parser.add_argument(
+    "--task",
+    type=str,
+    default="Isaac-Deploy-GearAssembly-Rizon4s-Grav-Hardcoded-Inference-v0",
+    help="Name of the task.",
+)
+parser.add_argument(
+    "--agent", type=str, default="rsl_rl_cfg_entry_point", help="Name of the RL agent configuration entry point."
+)
+parser.add_argument("--seed", type=int, default=None, help="Seed used for the environment")
+parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")
+cli_args.add_rsl_rl_args(parser)
+add_launcher_args(parser)
+args_cli, remaining_args = parser.parse_known_args()
+
+# clear out sys.argv for Hydra
+sys.argv = [sys.argv[0]] + remaining_args
+
+installed_version = metadata.version("rsl-rl-lib")
+
+
+# -- old checkpoint conversion -----------------------------------------------
+_OLD_TO_NEW_KEY_MAP = {
+    # Actor MLP
+    "actor.": "mlp.",
+    # Critic MLP
+    "critic.": "mlp.",
+    # Actor RNN
+    "memory_a.rnn.": "rnn.rnn.",
+    # Critic RNN
+    "memory_c.rnn.": "rnn.rnn.",
+    # Actor obs normalizer
+    "actor_obs_normalizer.": "obs_normalizer.",
+    # Critic obs normalizer
+    "critic_obs_normalizer.": "obs_normalizer.",
+}
+
+_ACTOR_PREFIXES = ("actor.", "memory_a.", "actor_obs_normalizer.")
+_CRITIC_PREFIXES = ("critic.", "memory_c.", "critic_obs_normalizer.")
+
+
+def _convert_old_checkpoint(loaded_dict: dict) -> dict:
+    """Convert a pre-v5 rsl-rl checkpoint to the v5 format.
+
+    Old format:  single ``model_state_dict`` with prefixes like ``actor.*``,
+    ``memory_a.*``, ``actor_obs_normalizer.*``, etc.
+
+    New format:  separate ``actor_state_dict`` and ``critic_state_dict`` with
+    prefixes ``mlp.*``, ``rnn.rnn.*``, ``obs_normalizer.*``.
+    """
+    old_sd = loaded_dict["model_state_dict"]
+    actor_sd: dict[str, torch.Tensor] = {}
+    critic_sd: dict[str, torch.Tensor] = {}
+
+    for old_key, tensor in old_sd.items():
+        is_actor = old_key.startswith(_ACTOR_PREFIXES)
+        is_critic = old_key.startswith(_CRITIC_PREFIXES)
+
+        new_key = old_key
+        for old_prefix, new_prefix in _OLD_TO_NEW_KEY_MAP.items():
+            if old_key.startswith(old_prefix):
+                new_key = new_prefix + old_key[len(old_prefix):]
+                break
+
+        if is_actor:
+            actor_sd[new_key] = tensor
+        elif is_critic:
+            critic_sd[new_key] = tensor
+
+    print(f"[INFO] Converted old checkpoint: {len(actor_sd)} actor keys, {len(critic_sd)} critic keys")
+
+    return {
+        "actor_state_dict": actor_sd,
+        "critic_state_dict": critic_sd,
+        "optimizer_state_dict": loaded_dict.get("optimizer_state_dict", {}),
+        "iter": loaded_dict.get("iter", 0),
+        "infos": loaded_dict.get("infos"),
+    }
+
+
+def _load_checkpoint(runner, resume_path: str):
+    """Load a checkpoint, automatically converting old (pre-v5) formats."""
+    loaded_dict = torch.load(resume_path, weights_only=False)
+    if "model_state_dict" in loaded_dict and "actor_state_dict" not in loaded_dict:
+        print("[INFO] Detected old (pre-v5) rsl-rl checkpoint format — converting …")
+        loaded_dict = _convert_old_checkpoint(loaded_dict)
+    load_cfg = {"actor": True, "critic": True, "optimizer": False, "iteration": True}
+    runner.alg.load(loaded_dict, load_cfg, strict=True)
+    if load_cfg.get("iteration") and "iter" in loaded_dict:
+        runner.current_learning_iteration = loaded_dict["iter"]
+
+
+def _get_policy_tensor(obs):
+    """Extract the flat policy tensor from obs (TensorDict or plain Tensor)."""
+    if isinstance(obs, torch.Tensor):
+        return obs
+    # TensorDict — the wrapper stores the concatenated obs under the "policy" key
+    return obs["policy"]
+
+
+def _apply_obs_overrides(obs):
+    """Optionally replace shaft_pos / shaft_quat in the observation."""
+    t = _get_policy_tensor(obs)
+    if OVERRIDE_SHAFT_POS is not None:
+        t[:, _SHAFT_POS_SLICE] = torch.tensor(OVERRIDE_SHAFT_POS, device=t.device, dtype=t.dtype)
+    if OVERRIDE_SHAFT_QUAT is not None:
+        t[:, _SHAFT_QUAT_SLICE] = torch.tensor(OVERRIDE_SHAFT_QUAT, device=t.device, dtype=t.dtype)
+    return obs
+
+
+def _print_obs(obs, label: str = "Observation"):
+    """Print observation components for the first environment."""
+    t = _get_policy_tensor(obs)
+    o = t[0]
+    print(f"\n[{label}]")
+    print(f"  joint_pos  : {o[0:7].tolist()}")
+    print(f"  joint_vel  : {o[7:14].tolist()}")
+    print(f"  shaft_pos  : {o[14:17].tolist()}")
+    print(f"  shaft_quat : {o[17:21].tolist()}")
+
+
+@hydra_task_config(args_cli.task, args_cli.agent)
+def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg):
+    """Run hardcoded inference with RSL-RL agent."""
+    with launch_simulation(env_cfg, args_cli):
+        # -- configure -------------------------------------------------------
+        agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli)
+        env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs
+        agent_cfg = handle_deprecated_rsl_rl_cfg(agent_cfg, installed_version)
+
+        env_cfg.seed = agent_cfg.seed
+        env_cfg.sim.device = args_cli.device if args_cli.device is not None else env_cfg.sim.device
+
+        # -- resolve checkpoint ----------------------------------------------
+        log_root_path = os.path.join("logs", "rsl_rl", agent_cfg.experiment_name)
+        log_root_path = os.path.abspath(log_root_path)
+        print(f"[INFO] Loading experiment from directory: {log_root_path}")
+
+        if args_cli.checkpoint:
+            resume_path = retrieve_file_path(args_cli.checkpoint)
+        else:
+            resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
+
+        log_dir = os.path.dirname(resume_path)
+        env_cfg.log_dir = log_dir
+
+        # -- create environment ----------------------------------------------
+        env = gym.make(args_cli.task, cfg=env_cfg)
+        if isinstance(env.unwrapped.cfg, DirectMARLEnvCfg):
+            from isaaclab.envs import multi_agent_to_single_agent
+
+            env = multi_agent_to_single_agent(env)
+        env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions)
+
+        # -- load policy -----------------------------------------------------
+        print(f"[INFO]: Loading model checkpoint from: {resume_path}")
+        if agent_cfg.class_name == "OnPolicyRunner":
+            runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+        elif agent_cfg.class_name == "DistillationRunner":
+            runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+        else:
+            raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}")
+        _load_checkpoint(runner, resume_path)
+
+        policy = runner.get_inference_policy(device=env.unwrapped.device)
+
+        if version.parse(installed_version) >= version.parse("5.0.0"):
+            policy_nn = runner.alg.actor
+        elif version.parse(installed_version) >= version.parse("2.3.0"):
+            policy_nn = runner.alg.policy
+        else:
+            policy_nn = runner.alg.actor_critic
+
+        dt = env.unwrapped.step_dt
+
+        # -- initial observation ---------------------------------------------
+        obs = env.get_observations()
+        obs = _apply_obs_overrides(obs)
+        _print_obs(obs, label="Initial observation (after overrides)")
+
+        # -- inference loop --------------------------------------------------
+        try:
+            while True:
+                start_time = time.time()
+                with torch.inference_mode():
+                    actions = policy(obs)
+                    obs, _, dones, _ = env.step(actions)
+                    obs = _apply_obs_overrides(obs)
+
+                    if version.parse(installed_version) >= version.parse("4.0.0"):
+                        policy.reset(dones)
+                    else:
+                        policy_nn.reset(dones)
+
+                sleep_time = dt - (time.time() - start_time)
+                if args_cli.real_time and sleep_time > 0:
+                    time.sleep(sleep_time)
+
+            env.close()
+        except KeyboardInterrupt:
+            pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
index 8ed735d68b9d..3787151df261 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/__init__.py
@@ -42,3 +42,14 @@
         "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:Rizon4sGearAssemblyRNNPPORunnerCfg",
     },
 )
+
+# Flexiv Rizon 4s - Hardcoded Inference (no randomization, deterministic)
+gym.register(
+    id="Isaac-Deploy-GearAssembly-Rizon4s-Grav-Hardcoded-Inference-v0",
+    entry_point="isaaclab.envs:ManagerBasedRLEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.hardcoded_inference_env_cfg:Rizon4sGearAssemblyHardcodedInferenceEnvCfg",
+        "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:Rizon4sGearAssemblyRNNPPORunnerCfg",
+    },
+)
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/hardcoded_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/hardcoded_inference_env_cfg.py
new file mode 100644
index 000000000000..759d9f34d2cc
--- /dev/null
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/hardcoded_inference_env_cfg.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2025-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Hardcoded deterministic inference configuration for Flexiv Rizon 4s with Grav gripper.
+
+Edit the constants below to match your real-world setup. All randomization is
+disabled and observation noise is turned off so the simulation is fully
+deterministic across resets.
+
+The IK grasp-pose solver (``set_robot_to_grasp_pose``) is kept so the robot
+properly grasps the gear. With all randomization zeroed out the IK solution
+is identical on every reset.
+
+To additionally override the robot's joint positions *after* IK, set
+``OVERRIDE_ROBOT_JOINT_POS`` in ``play_hardcoded.py``.
+"""
+
+from isaaclab.assets import RigidObjectCfg
+from isaaclab.utils import configclass
+
+from .ros_inference_env_cfg import Rizon4sGearAssemblyROSInferenceEnvCfg
+
+# ╔══════════════════════════════════════════════════════════════════════════════╗
+# ║  HARDCODED VALUES — Edit these to match your desired setup                 ║
+# ╚══════════════════════════════════════════════════════════════════════════════╝
+
+# Which gear the robot is grasping: "gear_small", "gear_medium", or "gear_large"
+GEAR_TYPE = "gear_large"
+
+# Gear base position (x, y, z) in meters, relative to env origin
+GEAR_BASE_POS = (0.481, -0.073, -0.005)
+
+# Gear base orientation as quaternion (x, y, z, w)
+GEAR_BASE_ROT = (0.0, 0.0, 0.70711, -0.70711)
+
+# Height of the grasped gear above the gear base (world-Z offset in meters).
+# Training midpoint is 0.0675 from range [0.0575, 0.0775].
+GEAR_Z_OFFSET = 0.0675
+
+# ╔══════════════════════════════════════════════════════════════════════════════╗
+# ║  Environment configuration                                                 ║
+# ╚══════════════════════════════════════════════════════════════════════════════╝
+
+
+@configclass
+class Rizon4sGearAssemblyHardcodedInferenceEnvCfg(Rizon4sGearAssemblyROSInferenceEnvCfg):
+    """Deterministic inference config — hardcoded poses, no randomization, no obs noise.
+
+    Inherits the full ROS-inference configuration and then:
+      1. Fixes the gear type (no random selection).
+      2. Zeros out all pose/orientation randomization ranges.
+      3. Sets gear-base and individual-gear initial states from the constants above.
+      4. Disables observation noise (``enable_corruption = False``).
+
+    The IK grasp-pose solver is kept from the parent so the robot properly
+    reaches and grasps the gear each reset.  Since all inputs to IK are
+    deterministic the resulting joint positions are the same every time.
+    """
+
+    def __post_init__(self):
+        super().__post_init__()
+
+        # ── 1. Fix gear type ──────────────────────────────────────────
+        self.events.randomize_gear_type.params["gear_types"] = [GEAR_TYPE]
+
+        # ── 2. Zero out all pose randomization ────────────────────────
+        self.events.randomize_gears_and_base_pose.params["pose_range"] = {
+            "x": [0.0, 0.0],
+            "y": [0.0, 0.0],
+            "z": [0.0, 0.0],
+            "roll": [0.0, 0.0],
+            "pitch": [0.0, 0.0],
+            "yaw": [0.0, 0.0],
+        }
+        self.events.randomize_gears_and_base_pose.params["gear_pos_range"] = {
+            "x": [0.0, 0.0],
+            "y": [0.0, 0.0],
+            "z": [GEAR_Z_OFFSET, GEAR_Z_OFFSET],
+        }
+
+        # ── 3. Hardcoded gear base and gear positions ─────────────────
+        self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
+            pos=GEAR_BASE_POS,
+            rot=GEAR_BASE_ROT,
+        )
+        for attr in ("factory_gear_small", "factory_gear_medium", "factory_gear_large"):
+            getattr(self.scene, attr).init_state = RigidObjectCfg.InitialStateCfg(
+                pos=GEAR_BASE_POS,
+                rot=GEAR_BASE_ROT,
+            )
+
+        # ── 4. Disable observation noise ──────────────────────────────
+        self.observations.policy.enable_corruption = False

From 522aa75a8c265f1690bff3a7dcb377e5ad557624 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 15:26:31 -0700
Subject: [PATCH 69/87] Fixed decimation.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 5e5bef1daef8..a6823115d0f5 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -341,9 +341,9 @@ def __post_init__(self):
         self.episode_length_s = 6.66
         self.viewer.eye = (3.5, 3.5, 3.5)
         # simulation settings
-        self.decimation = 33
+        self.decimation = 4
         self.sim.render_interval = self.decimation
-        self.sim.dt = 1.0 / 1000.0
+        self.sim.dt = 1.0 / 120.0
 
         self.gear_offsets = {
             "gear_small": [0.076125, 0.0, 0.0],

From 32d2eb47d665f9fc9797084ce13b9dea1f60621e Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 15:34:53 -0700
Subject: [PATCH 70/87] Zero wt for ee vs gear base reward terms.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index a6823115d0f5..c209fe40add6 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.5,
+        weight=-0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.5,
+        weight=0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From 7f3238a7caa6daf6372fd94d1ae23e3530dddac4 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 15:37:02 -0700
Subject: [PATCH 71/87] Enabled gravity on robot and non-zero wts for ee vs
 gear base reward terms.

---
 source/isaaclab_assets/isaaclab_assets/robots/flexiv.py     | 2 +-
 .../deploy/gear_assembly/gear_assembly_env_cfg.py           | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 43d98e434fce..18adf4312022 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -87,7 +87,7 @@
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAACLAB_NUCLEUS_DIR}/Robots/Flexiv/Rizon4s/rizon4s_with_grav.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=True,
+            disable_gravity=False,
             max_depenetration_velocity=5.0,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index c209fe40add6..b645a13c1841 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.0,
+        weight=-0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.0,
+        weight=0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
@@ -290,7 +290,7 @@ class RewardsCfg:
     )
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 3aee504c8b8207c8315d5af3b22408816299caea Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 15:39:36 -0700
Subject: [PATCH 72/87] Fixed decimation.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 18adf4312022..2a7ad1f1a7ca 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -118,8 +118,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=1320.0,
-            damping=72.0,
+            stiffness=6000.0,
+            damping=108.5,
             friction=0.0,
             armature=0.0,
         ),
@@ -127,8 +127,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=600.0,
-            damping=35.0,
+            stiffness=4200.0,
+            damping=90.7,
             friction=0.0,
             armature=0.0,
         ),
@@ -136,8 +136,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=216.0,
-            damping=29.0,
+            stiffness=1500.0,
+            damping=54.2,
             friction=0.0,
             armature=0.0,
         ),

From 45f98c0e59ab836850e85c8c0f3bfc13fdfb3b64 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 15:41:14 -0700
Subject: [PATCH 73/87] Disabled gravity on robot.

---
 source/isaaclab_assets/isaaclab_assets/robots/flexiv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 2a7ad1f1a7ca..4b134b43dcd0 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -87,7 +87,7 @@
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAACLAB_NUCLEUS_DIR}/Robots/Flexiv/Rizon4s/rizon4s_with_grav.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=False,
+            disable_gravity=True,
             max_depenetration_velocity=5.0,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(

From 2e6d49b845e997b74bcee4741b1b9045f4835a6a Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 16:06:24 -0700
Subject: [PATCH 74/87] Added action reward.

---
 .../manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b645a13c1841..a6823115d0f5 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -290,7 +290,7 @@ class RewardsCfg:
     )
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From 8f81564b0d8dfefad930d851731e7ce7bc77892f Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Tue, 7 Apr 2026 16:08:35 -0700
Subject: [PATCH 75/87] Zero wt for ee vs gear base reward terms.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index a6823115d0f5..c209fe40add6 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.5,
+        weight=-0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.5,
+        weight=0.0,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],

From 37fbe5fe4a50291a2697188abcf9c0f2ae7b64d9 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 8 Apr 2026 11:57:49 -0700
Subject: [PATCH 76/87] Reverted PD values, removed action reward and enabled
 ee vs gear base reward term.

---
 .../isaaclab_assets/isaaclab_assets/robots/flexiv.py | 12 ++++++------
 .../deploy/gear_assembly/gear_assembly_env_cfg.py    |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 4b134b43dcd0..43d98e434fce 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -118,8 +118,8 @@
             joint_names_expr=["joint[1-2]"],
             effort_limit_sim=123.0,
             velocity_limit_sim=2.094,
-            stiffness=6000.0,
-            damping=108.5,
+            stiffness=1320.0,
+            damping=72.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -127,8 +127,8 @@
             joint_names_expr=["joint[3-4]"],
             effort_limit_sim=64.0,
             velocity_limit_sim=2.443,
-            stiffness=4200.0,
-            damping=90.7,
+            stiffness=600.0,
+            damping=35.0,
             friction=0.0,
             armature=0.0,
         ),
@@ -136,8 +136,8 @@
             joint_names_expr=["joint[5-7]"],
             effort_limit_sim=39.0,
             velocity_limit_sim=4.887,
-            stiffness=1500.0,
-            damping=54.2,
+            stiffness=216.0,
+            damping=29.0,
             friction=0.0,
             armature=0.0,
         ),
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index c209fe40add6..b645a13c1841 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -265,7 +265,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking = RewTerm(
         func=mdp.keypoint_ee_gear_error,
-        weight=-0.0,
+        weight=-0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "keypoint_scale": 0.15,
@@ -277,7 +277,7 @@ class RewardsCfg:
 
     end_effector_base_keypoint_tracking_exp = RewTerm(
         func=mdp.keypoint_ee_gear_error_exp,
-        weight=0.0,
+        weight=0.5,
         params={
             "robot_asset_cfg": SceneEntityCfg("robot"),
             "kp_exp_coeffs": [(50, 0.0001), (300, 0.0001)],
@@ -290,7 +290,7 @@ class RewardsCfg:
     )
 
     action_rate = RewTerm(func=mdp.action_rate_l2, weight=-5.0e-06)
-    action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
+    # action = RewTerm(func=mdp.action_l2, weight=-5.0e-06)
 
 
 @configclass

From a0cc855a363cd07448bb6bbcf240bc8d4c188222 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Wed, 8 Apr 2026 16:52:58 -0700
Subject: [PATCH 77/87] Adding observation noise to quaternion/rotation.

---
 .../isaaclab_assets/robots/flexiv.py          |  2 +-
 .../gear_assembly/gear_assembly_env_cfg.py    | 14 +++-
 .../manipulation/deploy/mdp/noise_models.py   | 84 ++++++++++++++++++-
 3 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
index 43d98e434fce..18adf4312022 100644
--- a/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
+++ b/source/isaaclab_assets/isaaclab_assets/robots/flexiv.py
@@ -87,7 +87,7 @@
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAACLAB_NUCLEUS_DIR}/Robots/Flexiv/Rizon4s/rizon4s_with_grav.usd",
         rigid_props=sim_utils.RigidBodyPropertiesCfg(
-            disable_gravity=True,
+            disable_gravity=False,
             max_depenetration_velocity=5.0,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index b645a13c1841..eb33b8bbbc8f 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -26,7 +26,10 @@
 
 import isaaclab_tasks.manager_based.manipulation.deploy.mdp as mdp
 import isaaclab_tasks.manager_based.manipulation.deploy.mdp.terminations as gear_assembly_terminations
-from isaaclab_tasks.manager_based.manipulation.deploy.mdp.noise_models import ResetSampledConstantNoiseModelCfg
+from isaaclab_tasks.manager_based.manipulation.deploy.mdp.noise_models import (
+    ResetSampledConstantNoiseModelCfg,
+    ResetSampledQuaternionNoiseModelCfg,
+)
 
 # Get the directory where this configuration file is located
 CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -194,7 +197,14 @@ class PolicyCfg(ObsGroup):
                 noise_cfg=UniformNoiseCfg(n_min=-0.01, n_max=0.01, operation="add")
             ),
         )
-        gear_shaft_quat = ObsTerm(func=mdp.gear_shaft_quat_w)
+        gear_shaft_quat = ObsTerm(
+            func=mdp.gear_shaft_quat_w,
+            noise=ResetSampledQuaternionNoiseModelCfg(
+                roll_range=(-0.0349, 0.0349),
+                pitch_range=(-0.0349, 0.0349),
+                yaw_range=(-0.0349, 0.0349),
+            ),
+        )
 
         def __post_init__(self):
             self.enable_corruption = True
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
index 2d5411e96977..a980979c75aa 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
@@ -7,7 +7,12 @@
 
 from __future__ import annotations
 
-__all__ = ["ResetSampledConstantNoiseModel", "ResetSampledConstantNoiseModelCfg"]
+__all__ = [
+    "ResetSampledConstantNoiseModel",
+    "ResetSampledConstantNoiseModelCfg",
+    "ResetSampledQuaternionNoiseModel",
+    "ResetSampledQuaternionNoiseModelCfg",
+]
 
 from collections.abc import Sequence
 from dataclasses import MISSING
@@ -16,7 +21,8 @@
 import torch
 
 from isaaclab.utils import configclass
-from isaaclab.utils.noise import NoiseModel, NoiseModelCfg
+from isaaclab.utils.math import quat_from_euler_xyz, quat_mul
+from isaaclab.utils.noise import ConstantNoiseCfg, NoiseModel, NoiseModelCfg
 
 if TYPE_CHECKING:
     from isaaclab.utils.noise import NoiseCfg
@@ -107,3 +113,77 @@ class ResetSampledConstantNoiseModelCfg(NoiseModelCfg):
 
     Based on this configuration, the noise is sampled at every reset of the noise model.
     """
+
+
+class ResetSampledQuaternionNoiseModel(NoiseModel):
+    """Noise model that applies a rotation perturbation to quaternion observations.
+
+    At each episode reset, small Euler angle perturbations (roll, pitch, yaw) are sampled
+    uniformly from configurable ranges and converted to a perturbation quaternion. This
+    perturbation is then applied via quaternion multiplication at every step, producing a
+    geometrically valid rotated quaternion (unlike additive noise on raw components).
+
+    The perturbation is held constant for the entire episode until the next reset.
+    """
+
+    def __init__(self, noise_model_cfg: NoiseModelCfg, num_envs: int, device: str):
+        super().__init__(noise_model_cfg, num_envs, device)
+        self._roll_range = noise_model_cfg.roll_range
+        self._pitch_range = noise_model_cfg.pitch_range
+        self._yaw_range = noise_model_cfg.yaw_range
+        # Identity quaternion in (x, y, z, w) format
+        self._perturbation_quat = torch.zeros((num_envs, 4), device=device)
+        self._perturbation_quat[:, 3] = 1.0
+
+    def reset(self, env_ids: Sequence[int] | None = None):
+        """Sample new rotation perturbations for the specified environments.
+
+        Args:
+            env_ids: The environment ids to reset. Defaults to None (all environments).
+        """
+        if env_ids is None:
+            env_ids = slice(None)
+
+        num_resets = env_ids.stop - env_ids.start if isinstance(env_ids, slice) else len(env_ids)
+
+        roll = torch.empty(num_resets, device=self._device).uniform_(*self._roll_range)
+        pitch = torch.empty(num_resets, device=self._device).uniform_(*self._pitch_range)
+        yaw = torch.empty(num_resets, device=self._device).uniform_(*self._yaw_range)
+
+        self._perturbation_quat[env_ids] = quat_from_euler_xyz(roll, pitch, yaw)
+
+    def __call__(self, data: torch.Tensor) -> torch.Tensor:
+        """Apply the pre-sampled rotation perturbation to the quaternion data.
+
+        Args:
+            data: Quaternion observations in (x, y, z, w) format. Shape is (num_envs, 4).
+
+        Returns:
+            Perturbed quaternions in (x, y, z, w) format. Shape is (num_envs, 4).
+        """
+        return quat_mul(self._perturbation_quat, data)
+
+
+@configclass
+class ResetSampledQuaternionNoiseModelCfg(NoiseModelCfg):
+    """Configuration for a quaternion noise model that samples rotation perturbations at reset.
+
+    The perturbation is specified as independent uniform ranges for roll, pitch, and yaw
+    (in radians). At each episode reset, Euler angles are sampled and converted to a
+    perturbation quaternion that is multiplied with the observed quaternion.
+    """
+
+    class_type: type = ResetSampledQuaternionNoiseModel
+
+    noise_cfg: ConstantNoiseCfg = ConstantNoiseCfg(bias=0.0)
+    """Unused placeholder inherited from NoiseModelCfg. Quaternion perturbation is
+    controlled by roll_range, pitch_range, and yaw_range instead."""
+
+    roll_range: tuple[float, float] = (-0.0349, 0.0349)
+    """Uniform range for roll perturbation in radians. Default is ~2 degrees."""
+
+    pitch_range: tuple[float, float] = (-0.0349, 0.0349)
+    """Uniform range for pitch perturbation in radians. Default is ~2 degrees."""
+
+    yaw_range: tuple[float, float] = (-0.0349, 0.0349)
+    """Uniform range for yaw perturbation in radians. Default is ~2 degrees."""

From d5075fd41a9ee15dd42f427886257287d5003cdf Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 9 Apr 2026 13:55:40 -0700
Subject: [PATCH 78/87] Reduced rotation obs noise from 2 to 1 degree.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py           | 6 +++---
 .../manager_based/manipulation/deploy/mdp/noise_models.py   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index eb33b8bbbc8f..5848e68e74c2 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -200,9 +200,9 @@ class PolicyCfg(ObsGroup):
         gear_shaft_quat = ObsTerm(
             func=mdp.gear_shaft_quat_w,
             noise=ResetSampledQuaternionNoiseModelCfg(
-                roll_range=(-0.0349, 0.0349),
-                pitch_range=(-0.0349, 0.0349),
-                yaw_range=(-0.0349, 0.0349),
+                roll_range=(-0.01745, 0.01745),
+                pitch_range=(-0.01745, 0.01745),
+                yaw_range=(-0.01745, 0.01745),
             ),
         )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
index a980979c75aa..0aa6cba17e8d 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
@@ -179,11 +179,11 @@ class ResetSampledQuaternionNoiseModelCfg(NoiseModelCfg):
     """Unused placeholder inherited from NoiseModelCfg. Quaternion perturbation is
     controlled by roll_range, pitch_range, and yaw_range instead."""
 
-    roll_range: tuple[float, float] = (-0.0349, 0.0349)
+    roll_range: tuple[float, float] = (-0.01745, 0.01745)
     """Uniform range for roll perturbation in radians. Default is ~2 degrees."""
 
-    pitch_range: tuple[float, float] = (-0.0349, 0.0349)
+    pitch_range: tuple[float, float] = (-0.01745, 0.01745)
     """Uniform range for pitch perturbation in radians. Default is ~2 degrees."""
 
-    yaw_range: tuple[float, float] = (-0.0349, 0.0349)
+    yaw_range: tuple[float, float] = (-0.01745, 0.01745)
     """Uniform range for yaw perturbation in radians. Default is ~2 degrees."""

From fffb38699e872bac4af4f42657ca84861f62091b Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 9 Apr 2026 16:17:31 -0700
Subject: [PATCH 79/87] Fixed ee vs gear base scheduling.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 5848e68e74c2..910945a83c3e 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -281,7 +281,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 250_000,
+            "weight_ramp_steps": 512_000,
         },
     )
 
@@ -295,7 +295,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 250_000,
+            "weight_ramp_steps": 512_000,
         },
     )
 

From 4dfee2dc03758e58c5d65a0390623c371894a02a Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 9 Apr 2026 16:38:44 -0700
Subject: [PATCH 80/87] Zero noise in rotation obs.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py    |  6 +++---
 .../manipulation/deploy/mdp/noise_models.py          | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 910945a83c3e..eb5a6b98bce1 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -200,9 +200,9 @@ class PolicyCfg(ObsGroup):
         gear_shaft_quat = ObsTerm(
             func=mdp.gear_shaft_quat_w,
             noise=ResetSampledQuaternionNoiseModelCfg(
-                roll_range=(-0.01745, 0.01745),
-                pitch_range=(-0.01745, 0.01745),
-                yaw_range=(-0.01745, 0.01745),
+                roll_range=(0.0, 0.0),
+                pitch_range=(0.0, 0.0),
+                yaw_range=(0.0, 0.0),
             ),
         )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
index 0aa6cba17e8d..8af793708095 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
@@ -179,11 +179,11 @@ class ResetSampledQuaternionNoiseModelCfg(NoiseModelCfg):
     """Unused placeholder inherited from NoiseModelCfg. Quaternion perturbation is
     controlled by roll_range, pitch_range, and yaw_range instead."""
 
-    roll_range: tuple[float, float] = (-0.01745, 0.01745)
-    """Uniform range for roll perturbation in radians. Default is ~2 degrees."""
+    roll_range: tuple[float, float] = (0.0, 0.0)
+    """Uniform range for roll perturbation in radians. Default is 0 (no noise)."""
 
-    pitch_range: tuple[float, float] = (-0.01745, 0.01745)
-    """Uniform range for pitch perturbation in radians. Default is ~2 degrees."""
+    pitch_range: tuple[float, float] = (0.0, 0.0)
+    """Uniform range for pitch perturbation in radians. Default is 0 (no noise)."""
 
-    yaw_range: tuple[float, float] = (-0.01745, 0.01745)
-    """Uniform range for yaw perturbation in radians. Default is ~2 degrees."""
+    yaw_range: tuple[float, float] = (0.0, 0.0)
+    """Uniform range for yaw perturbation in radians. Default is 0 (no noise)."""

From a0e2c410710c9e20c8c7f153919094380e037129 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 9 Apr 2026 17:21:03 -0700
Subject: [PATCH 81/87] 2 degree rotation obs noise.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py    |  6 +++---
 .../manipulation/deploy/mdp/noise_models.py          | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index eb5a6b98bce1..49c201563773 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -200,9 +200,9 @@ class PolicyCfg(ObsGroup):
         gear_shaft_quat = ObsTerm(
             func=mdp.gear_shaft_quat_w,
             noise=ResetSampledQuaternionNoiseModelCfg(
-                roll_range=(0.0, 0.0),
-                pitch_range=(0.0, 0.0),
-                yaw_range=(0.0, 0.0),
+                roll_range=(-0.03491, 0.03491),
+                pitch_range=(-0.03491, 0.03491),
+                yaw_range=(-0.03491, 0.03491),
             ),
         )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
index 8af793708095..1625890be668 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/mdp/noise_models.py
@@ -179,11 +179,11 @@ class ResetSampledQuaternionNoiseModelCfg(NoiseModelCfg):
     """Unused placeholder inherited from NoiseModelCfg. Quaternion perturbation is
     controlled by roll_range, pitch_range, and yaw_range instead."""
 
-    roll_range: tuple[float, float] = (0.0, 0.0)
-    """Uniform range for roll perturbation in radians. Default is 0 (no noise)."""
+    roll_range: tuple[float, float] = (-0.03491, 0.03491)
+    """Uniform range for roll perturbation in radians. Default is ±2 degrees."""
 
-    pitch_range: tuple[float, float] = (0.0, 0.0)
-    """Uniform range for pitch perturbation in radians. Default is 0 (no noise)."""
+    pitch_range: tuple[float, float] = (-0.03491, 0.03491)
+    """Uniform range for pitch perturbation in radians. Default is ±2 degrees."""
 
-    yaw_range: tuple[float, float] = (0.0, 0.0)
-    """Uniform range for yaw perturbation in radians. Default is 0 (no noise)."""
+    yaw_range: tuple[float, float] = (-0.03491, 0.03491)
+    """Uniform range for yaw perturbation in radians. Default is ±2 degrees."""

From a46ca140ff50faa5b900575b4b89db9066677453 Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Mon, 6 Apr 2026 15:24:17 -0700
Subject: [PATCH 82/87] update for robot position in hubble

---
 .../config/rizon_4s/ros_inference_env_cfg.py  | 71 +++++++++++--------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index 6ff7f2cb3d7d..fe85b97cb362 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -13,13 +13,20 @@
 
 @configclass
 class Rizon4sGearAssemblyROSInferenceEnvCfg(Rizon4sGearAssemblyEnvCfg):
-    """Configuration for ROS inference with Flexiv Rizon 4s and Grav gripper.
+    """ROS / Isaac Manipulator inference fields plus deployment alignment for NVIDIA Hubble Lab.
 
     This configuration:
     - Exposes variables needed for ROS inference
     - Overrides robot and gear initial poses for fixed/deterministic setup
+    - Aligns robot mounting pose with the Flexiv Rizon 4s installation at NVIDIA Hubble Lab
     """
 
+    # Single source for base + all gear rigid bodies (Rizon: closer to robot, centered)
+    ros_inference_factory_gears_init_state: RigidObjectCfg.InitialStateCfg = RigidObjectCfg.InitialStateCfg(
+        pos=(0.75, 0.0, -0.2),
+        rot=(0.0, 0.0, 0.70711, -0.70711),
+    )
+
     def __post_init__(self):
         # post init of parent
         super().__post_init__()
@@ -46,35 +53,39 @@ def __post_init__(self):
         # Dynamically generate action_scale_joint_space based on action_space
         self.action_scale_joint_space = [self.joint_action_scale] * self.action_space
 
-        # Override robot initial pose for ROS inference (fixed pose, no randomization)
-        # Joint positions and pos are inherited from parent, only override rotation to be deterministic
-        self.scene.robot.init_state.rot = (0.0, 0.0, 0.0, 1.0)  # Identity quaternion (x, y, z, w)
-
-        # Override gear base initial pose (fixed pose for ROS inference)
-        # Position configured for Rizon 4s workspace
-        self.scene.factory_gear_base.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.481, -0.073, -0.005),
-            rot=(0.0, 0.0, 0.70711, -0.70711),
-        )
-
-        # Override gear initial poses (fixed poses for ROS inference)
-        # Small gear
-        self.scene.factory_gear_small.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.481, -0.073, -0.005),
-            rot=(0.0, 0.0, 0.70711, -0.70711),
-        )
-
-        # Medium gear
-        self.scene.factory_gear_medium.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.481, -0.073, -0.005),
-            rot=(0.0, 0.0, 0.70711, -0.70711),
-        )
-
-        # Large gear
-        self.scene.factory_gear_large.init_state = RigidObjectCfg.InitialStateCfg(
-            pos=(0.481, -0.073, -0.005),
-            rot=(0.0, 0.0, 0.70711, -0.70711),
-        )
+        # --- NVIDIA Hubble Lab: Flexiv Rizon 4s mount ---
+        # Remove vertical mount stand since Hubble deployment does not use the sim stand asset
+        self.scene.stand = None
+
+        # Lab home joint pose (radians); aligns sim defaults / reset with the physical stand
+        self.scene.robot.init_state.joint_pos = {
+            "joint1": math.radians(-90.0),
+            "joint2": math.radians(90.0),
+            "joint3": 0.0,
+            "joint4": math.radians(90.0),
+            "joint5": 0.0,
+            "joint6": 0.0,
+            "joint7": 0.0,
+        }
+
+        # Orientation of robot is based on the Flexiv Rizon 4s mount in the Hubble Lab
+        self.scene.robot.init_state.pos = (0.0, 0.0, 0.0)
+        self.scene.robot.init_state.rot = (0.5, 0.5, 0.5, 0.5)
+
+        # Override gear base + all gears from one template (fixed pose for ROS inference)
+        _g = self.ros_inference_factory_gears_init_state
+        for _name in (
+            "factory_gear_base",
+            "factory_gear_small",
+            "factory_gear_medium",
+            "factory_gear_large",
+        ):
+            getattr(self.scene, _name).init_state = RigidObjectCfg.InitialStateCfg(
+                pos=_g.pos,
+                rot=_g.rot,
+                lin_vel=_g.lin_vel,
+                ang_vel=_g.ang_vel,
+            )
 
         # Fixed asset parameters for ROS inference - derived from configuration
         # These parameters are used by the ROS inference node to validate the environment setup

From b4651eb629d2b8c5a2a6e012d06bbd47253813fa Mon Sep 17 00:00:00 2001
From: Ashwin Varghese Kuruttukulam <ashwinvk@nvidia.com>
Date: Tue, 14 Apr 2026 15:59:51 -0700
Subject: [PATCH 83/87] update gear base posiiton based on new srand

---
 .../gear_assembly/config/rizon_4s/ros_inference_env_cfg.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index fe85b97cb362..d102539ee9bb 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -23,7 +23,7 @@ class Rizon4sGearAssemblyROSInferenceEnvCfg(Rizon4sGearAssemblyEnvCfg):
 
     # Single source for base + all gear rigid bodies (Rizon: closer to robot, centered)
     ros_inference_factory_gears_init_state: RigidObjectCfg.InitialStateCfg = RigidObjectCfg.InitialStateCfg(
-        pos=(0.75, 0.0, -0.2),
+        pos=(0.927, 0.046, -0.109),
         rot=(0.0, 0.0, 0.70711, -0.70711),
     )
 

From b42ec43500137be3f883f8900c7d349e49e2730d Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Thu, 16 Apr 2026 09:09:01 -0700
Subject: [PATCH 84/87] Changed ee vs gear base reward terms scheduling for 512
 envs.

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 49c201563773..1bc640ae4195 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -281,7 +281,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 512_000,
+            "weight_ramp_steps": 250_000,
         },
     )
 
@@ -295,7 +295,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 512_000,
+            "weight_ramp_steps": 250_000,
         },
     )
 

From 1ea97c6920d6b3c6d29322648f12bb5cad2e21c8 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 24 Apr 2026 15:23:31 -0700
Subject: [PATCH 85/87] Fixed gear base pose as per real world.

---
 .../gear_assembly/config/rizon_4s/ros_inference_env_cfg.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index d102539ee9bb..ba93d12c4781 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -23,7 +23,7 @@ class Rizon4sGearAssemblyROSInferenceEnvCfg(Rizon4sGearAssemblyEnvCfg):
 
     # Single source for base + all gear rigid bodies (Rizon: closer to robot, centered)
     ros_inference_factory_gears_init_state: RigidObjectCfg.InitialStateCfg = RigidObjectCfg.InitialStateCfg(
-        pos=(0.927, 0.046, -0.109),
+        pos=(0.938, 0.0826, -0.1),
         rot=(0.0, 0.0, 0.70711, -0.70711),
     )
 

From f2c380b5a0e388a08d090794bd06e3c5426b1611 Mon Sep 17 00:00:00 2001
From: shauryadNv <shauryad@nvidia.com>
Date: Fri, 24 Apr 2026 15:24:57 -0700
Subject: [PATCH 86/87] Fixed gear base pose as per real world.

---
 .../gear_assembly/config/rizon_4s/ros_inference_env_cfg.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
index ba93d12c4781..ecaab0abd4ff 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/config/rizon_4s/ros_inference_env_cfg.py
@@ -23,7 +23,7 @@ class Rizon4sGearAssemblyROSInferenceEnvCfg(Rizon4sGearAssemblyEnvCfg):
 
     # Single source for base + all gear rigid bodies (Rizon: closer to robot, centered)
     ros_inference_factory_gears_init_state: RigidObjectCfg.InitialStateCfg = RigidObjectCfg.InitialStateCfg(
-        pos=(0.938, 0.0826, -0.1),
+        pos=(0.938, 0.0826, -0.15),
         rot=(0.0, 0.0, 0.70711, -0.70711),
     )
 

From b05d22d5368b6862363970cbfd8da3bd05cada2b Mon Sep 17 00:00:00 2001
From: curiep <curiep@nvidia.com>
Date: Mon, 27 Apr 2026 10:47:10 -0700
Subject: [PATCH 87/87] increase weight ramp steps to 512k for end effector
 base keypoint tracking for 256 envs

---
 .../deploy/gear_assembly/gear_assembly_env_cfg.py             | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
index 1bc640ae4195..49c201563773 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/deploy/gear_assembly/gear_assembly_env_cfg.py
@@ -281,7 +281,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 250_000,
+            "weight_ramp_steps": 512_000,
         },
     )
 
@@ -295,7 +295,7 @@ class RewardsCfg:
             "keypoint_scale": 0.15,
             "ee_gear_threshold": 0.00,
             "weight_ramp_start": 0.0,  # Set to 0.0 to enable ramp-up
-            "weight_ramp_steps": 250_000,
+            "weight_ramp_steps": 512_000,
         },
     )