From a7ecbd0de18bfbd47897e9d1ea626d9d0bc37a98 Mon Sep 17 00:00:00 2001 From: Eric Arnold Date: Wed, 6 May 2026 16:14:03 -0500 Subject: [PATCH 1/4] sim: tear down camera renderers before physics manager close OVRTX shares a Carbonite framework with ovphysx; per the kit/sdk and ovstage working samples, the renderer-side native objects must release before the physics-side teardown runs, otherwise the second teardown crashes on already-freed plugin state. SimulationContext.clear_instance released physics first and let cameras GC last, which inverted the required order. Add RenderContext.cleanup() that walks registered backends and invokes BaseRenderer.cleanup(None) on each, then call it from clear_instance before physics_manager.close(). Per-camera Camera.__del__ cleanup stays in place as a safety net but becomes a no-op once the central cleanup has run. No behavior change for non-ovrtx backends. Companion design notes for the deferred items (renderer hoist, _setup_object_bindings on ovphysx, physx.clone migration) at source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md. Tracking: OMPE-88037 --- .../isaaclab/renderers/render_context.py | 25 +++ .../isaaclab/sim/simulation_context.py | 12 +- .../docs/ovphysx_coexist_DESIGN.md | 174 ++++++++++++++++++ 3 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md diff --git a/source/isaaclab/isaaclab/renderers/render_context.py b/source/isaaclab/isaaclab/renderers/render_context.py index 1c1a45a19454..a85aadc92ae6 100644 --- a/source/isaaclab/isaaclab/renderers/render_context.py +++ b/source/isaaclab/isaaclab/renderers/render_context.py @@ -127,3 +127,28 @@ def reset_stage_prepare_flag(self) -> None: def reset_transform_cadence(self) -> None: """Clear per-step transform dedupe (e.g. a long pause with no physics).""" self._last_transforms_step = None + + def cleanup(self) -> None: + """Release all registered renderer backends. + + Calls :meth:`BaseRenderer.cleanup` on each entry, then clears the + registration list. Safe to call multiple times. Per-camera + :meth:`Camera.__del__` cleanup remains in place but becomes a no-op + because the underlying backend has already torn down. + + Called from :meth:`SimulationContext.clear_instance` before the + physics backend closes, so renderer-owned native resources (e.g. + OVRTX's HydraEngine) are released first. Required by the + ovphysx + ovrtx coexistence contract: ovrtx must release its + Carbonite-owning native objects before ovphysx tears down its own + Carbonite instance. + """ + for _cfg, renderer in self._renderer_entries: + try: + renderer.cleanup(None) + except Exception as e: + logger.warning("Error tearing down renderer %s: %s", type(renderer).__name__, e) + self._renderer_entries.clear() + self._prepared_renderer_ids.clear() + self._prepared_num_envs = None + self._last_transforms_step = None diff --git a/source/isaaclab/isaaclab/sim/simulation_context.py b/source/isaaclab/isaaclab/sim/simulation_context.py index 89be3163359f..d8c46eb5d0fb 100644 --- a/source/isaaclab/isaaclab/sim/simulation_context.py +++ b/source/isaaclab/isaaclab/sim/simulation_context.py @@ -843,8 +843,16 @@ def get_setting(self, name: str) -> Any: def clear_instance(cls) -> None: """Clean up resources and clear the singleton instance.""" if cls._instance is not None: - # Close physics manager FIRST to detach PhysX from the stage - # This must happen before clearing USD prims to avoid PhysX cleanup errors + # Tear down camera renderers FIRST. Some backends (e.g. OVRTX) own + # native objects that share a Carbonite framework with the physics + # backend (ovphysx); when both are loaded those objects must release + # before the physics-side Carbonite teardown runs, otherwise the + # second teardown crashes on already-freed plugin state. For + # backends without that constraint this is a no-op. + cls._instance._render_context.cleanup() + + # Close physics manager to detach PhysX from the stage. This must + # happen before clearing USD prims to avoid PhysX cleanup errors. cls._instance.physics_manager.close() # Close all visualizers diff --git a/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md b/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md new file mode 100644 index 000000000000..6b52a1d810f5 --- /dev/null +++ b/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md @@ -0,0 +1,174 @@ +# ovphysx + ovrtx coexistence — IsaacLab integration design notes + +Tracking ticket: OMPE-88037 — `presets=ovphysx,ovrtx_renderer,rgb`. + +This branch (`erarnold/ovphysx-ovrtx-coexist`, off `pbarejko/ovphysx-ovrtx`) +contains only the changes that are clearly correct without first answering +the open questions to the kit/sdk and ovstage agents listed in +`~/tmp/OMPE-88037-ovphysx-ovrtx-isaaclab.md`. The harder pieces are +captured here as design notes. + +## What this branch does land + +**Renderer-side teardown before physics-side teardown.** +`SimulationContext.clear_instance()` now calls +`self._render_context.cleanup()` before `physics_manager.close()`. The +new `RenderContext.cleanup()` method walks the registered renderer +backends and invokes `BaseRenderer.cleanup(None)` on each, then drops +its registration list. Per-camera `Camera.__del__` cleanup remains in +place as a safety net but becomes a no-op once the central cleanup has +run. + +Why: the kit/sdk and ovstage samples (see +`~/repos/kit/sdks/source/ovphysx_ovrtx_integration_example/README.md` +"Cleanup order matches C++ sample" and +`~/repos/ovstage/src/examples/render_physx.cpp:785`) are explicit that +ovrtx must release before ovphysx tears down their shared Carbonite. +Today IsaacLab releases physics first and lets cameras GC last; that +inversion crashes the second native teardown. The fix is small, has no +behavior change for non-ovrtx backends, and removes the sequencing +dependency on Python GC. + +## What this branch defers + +### A. Hoist OVRTXRenderer construction before OvPhysxManager._warmup_and_load + +**Problem.** Today `ovrtx.Renderer(config)` is constructed lazily inside +`OVRTXRenderer.initialize(spec)` (in `source/isaaclab_ov/.../ovrtx_renderer.py`), +which is called from `OVRTXRenderer.create_render_data()`, which is +called from `Camera._initialize_impl()` (in +`source/isaaclab/isaaclab/sensors/camera/camera.py:423`), which fires +on the `PHYSICS_READY` event dispatched at the end of +`OvPhysxManager.reset() → _warmup_and_load()`. By that point ovphysx has +already constructed `ovphysx.PhysX(device=...)` and claimed Carbonite. + +Per the working samples (`render_physx.cpp:200-214`, +`ovlibs_sample.py:210-213`) ovrtx must claim Carbonite first, before +ovphysx exists. Two paths: + +1. **Wheel side:** ovphysx 0.4.3 already flips the + `OVPHYSX_COEXIST_DIAGNOSTICS` default (commit `0917441ef9`) so that + ovphysx auto-detects another Carbonite owner and proceeds in + coexistence mode. **Open question for the kit/sdk agent (Q1 in the + triage doc):** does this default flip make ovphysx-first + work-as-coexistence-tenant, or only suppress the env-var nag? +2. **IsaacLab side:** add a `BaseRenderer.early_init(stage)` hook that + the renderer uses to construct its native object before + `prepare_stage`. `RenderContext` exposes `early_init_all(stage)` + that walks registered backends. Add a hook on the physics manager + (or `SimulationContext.reset`) that calls it before + `physics_manager.reset()` runs `_warmup_and_load`. Backends that + don't need it (Newton, Isaac RTX) get a default no-op. + +The catch with (2): Camera sensors only register their renderer cfg +during `_initialize_impl` (after PHYSICS_READY), so at the early hook +nothing is registered yet. We need to pre-walk the scene cfg for +`CameraCfg.renderer_cfg` entries during `InteractiveScene` build (or +`SimulationContext.__init__`) and register them up front. + +**Don't implement until Q1 is answered.** If 0.4.3 does relax the order +constraint, (1) is enough and IsaacLab can keep the lazy construction +path. + +### B. Wire OVRTXRenderer._setup_object_bindings to read OvPhysx pose bindings + +**Problem.** `OVRTXRenderer._setup_object_bindings` (line 307) and +`OVRTXRenderer.update_transforms` (line 409) call +`SimulationContext.instance().initialize_scene_data_provider().get_newton_model()` +and `.get_newton_state().body_q`. With OvPhysx these return `None`, so +object transforms never reach OVRTX (camera transforms still work via +the omni:xform binding written from `update_camera`). + +**Two ways forward:** + +1. **Add an `OvPhysxSceneDataProvider`** that mirrors the Newton + provider's interface (`get_newton_model()` returns a path/index list, + `get_newton_state()` returns a body-q tensor). The provider wraps + ovphysx tensor bindings — pose binding → body_q tensor in + `wp.transformf` layout. Keep the same renderer code by aliasing + "newton state" to "physics state" at the provider level. +2. **Add a `BaseSceneDataProvider.get_body_transforms()`** abstraction + so OVRTXRenderer no longer reaches for Newton-specific accessors, + then implement it on both Newton and a new OvPhysx provider. + +(2) is cleaner but a bigger renderer-side change. (1) is fewer LOC and +keeps the scope focused on unblocking the bug. **Open question for the +ovstage agent (Q3 in the triage doc):** if there's a third option +(IsaacLab pulls in `ovstage` as a third dependency and uses the +ovstage→ovrtx attribute-write path the working samples use), is that +the recommended pattern or a heavier one? + +### C. Migrate physx.clone() to attach_stage + stage.clone_subtree + +**Problem.** `OvPhysxManager._warmup_and_load` calls +`cls._physx.clone(source, targets, transforms)` (line 278) for +replicate-physics. ovphysx 0.4.3 removes the public `physx.clone()` API +(see ovphysx changelog under 0.4 "Breaking changes") in favor of +`physx.attach_stage(stage) → stage.clone_subtree(source, [targets])` +followed by per-target `xformOp:translate` writes inside the same +`begin_frame`/`end_frame`. The replicate plugin is unchanged; only the +entry point moved. + +**Migration sketch:** + +```python +# Before (0.4.2 and older) +op_idx = cls._physx.clone(source, targets, transforms) +cls._physx.wait_op(op_idx) + +# After (0.4.3+) +import ovstage # new dependency +stage = ovstage.Stage() # or get from sim context if hoisted +ovstage_op = stage.begin_frame() +stage.clone_subtree(source, targets) +for target_path, (x, y, z) in zip(targets, parent_positions): + stage.write_attribute(ovstage_op, [target_path], "xformOp:translate", pack_vec3(x, y, z)) +stage.end_frame(ovstage_op) +# OvstageBridge ingests this on next physx.step() +``` + +**Why deferred:** + +1. Pulling `ovstage` into IsaacLab as a third dependency is a bigger + ask than this branch should swallow without sign-off — it adds + another wheel pin, plugin path, and Carbonite tenant. +2. The migration is wheel-version-dependent. The cleanest implementation + is a feature gate: try `physx.clone()` first, fall back to the + ovstage path if `AttributeError`. That keeps the 0.4.2 path working + while landing the 0.4.3 path. But the gate logic only makes sense + once we've actually exercised the 0.4.3 API end-to-end, which needs + a built 0.4.3 wheel installed in IsaacLab. + +**Action when 0.4.3 wheel is in hand:** add the version gate and +ovstage import, run the cartpole repro to confirm clones land. + +### D. OVRTX renderer init order with respect to USD export + +**Problem (potential — needs verification).** `OvPhysxManager._warmup_and_load` +exports the USD stage to a temp file before constructing ovphysx; that +file is what ovphysx ingests. `OVRTXRenderer.prepare_stage` separately +exports the stage to `/tmp/stage_before_ovrtx.usda` with cameras +injected. Two distinct files. The samples have both libs `open_usd` +the same on-disk file. + +If the open question Q1 (init order in 0.4.3) lands as "ovrtx must +still be first", the USD file ovrtx opens has not yet been augmented +with the OvPhysx-required schemas (which `_configure_physx_scene_prim` +writes onto the in-memory stage right before export). One USD file +that both backends open is the cleanest fix; needs a small refactor of +the export path so OvPhysxManager and OVRTXRenderer share the export. + +**Defer until Q1 + the actual repro on 0.4.3 says whether this matters.** + +## Open questions blocking the deferred items + +(Same numbering as `~/tmp/OMPE-88037-ovphysx-ovrtx-isaaclab.md` §3-§4.) + +- **Q1 (kit/sdk):** Does ovphysx 0.4.3 *enforce* coexistence-mode + regardless of init order, or only suppress the env-var nag? Decides + whether item A is needed at all. +- **Q2 (kit/sdk):** Validated ovrtx pip build for ovphysx 0.4.x. + Decides item D / pin update. +- **Q3 (ovstage):** Slimmest "ovphysx pose tensor → ovrtx CUDA tensor" + recipe. Decides item B's choice between an OvPhysx scene-data + provider vs pulling ovstage in. From f018386acdadf8ea1b93457642fad809ac5270e9 Mon Sep 17 00:00:00 2001 From: Eric Arnold Date: Wed, 6 May 2026 16:38:16 -0500 Subject: [PATCH 2/4] =?UTF-8?q?sim:=20ovphysx=200.4.3=20+=20ovrtx=20coexis?= =?UTF-8?q?tence=20=E2=80=94=20clone=20migration=20+=20renderer=20init=20h?= =?UTF-8?q?oist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two stacked fixes that, together with the earlier render-then-physics teardown change, get presets=ovphysx,ovrtx_renderer,rgb running end-to-end against ovphysx 0.4.3 + ovrtx 0.3.0.307110. Fix 1 — physx.clone() removed in ovphysx 0.4.3. InteractiveScene routed the ovphysx backend through clone_usd=False so that physx.clone() in OvPhysxManager._warmup_and_load could populate env_1..N at the physics layer. The 0.4.3 wheel removed that public API in favor of ovstage_clone_subtree on an attached ovstage.Stage. Until the ovstage bridge is wired in (see source/isaaclab_ov/docs/ ovphysx_coexist_DESIGN.md), switch ovphysx onto USD-side cloning: clone_usd=True and clone_physics=False. _warmup_and_load now raises a clear error if a pre-0.4.3 wheel and the legacy code path coincide, rather than letting AttributeError bubble through Camera init. Fix 2 — OVRTX must construct its native Renderer before ovphysx. ovrtx and ovphysx share a Carbonite framework; ovrtx does no coexistence checks at construction time and SIGSEGVs inside createRTXRenderer if ovphysx (or any other Carbonite owner) has already loaded. IsaacLab today constructs OVRTX lazily in Camera._initialize_impl (which fires on PHYSICS_READY, after OvPhysxManager._warmup_and_load), inverting the required order. Add BaseRenderer.early_init() (default no-op) and have OVRTXRenderer override it to construct the C++ Renderer up front; OVRTXRenderer. initialize() now skips renderer construction if early_init already ran. RenderContext.early_init_all() walks registered backends, and InteractiveScene.early_init_renderers() pre-walks Camera sensors to register their renderer cfg with the context so early_init has something to act on. DirectRLEnv calls scene.early_init_renderers() between _setup_scene (which adds tiled cameras) and sim.reset() so the OVRTX Renderer is constructed before OvPhysxManager constructs its native instance. Smoke run on the OMPE-88037 cartpole repro (32 envs, 2 epochs, 100x100 RGB tiled camera): 16.34s end-to-end, training ran at ~700 fps, exit 0. Object transforms in the rendered frame are still static — Item B in the DESIGN doc (OVRTXRenderer.update_transforms wiring to ovphysx pose bindings) is the next piece. Tracking: OMPE-88037 --- .../isaaclab/isaaclab/envs/direct_rl_env.py | 6 +++ .../isaaclab/renderers/base_renderer.py | 15 ++++++ .../isaaclab/renderers/render_context.py | 11 +++++ .../isaaclab/scene/interactive_scene.py | 43 +++++++++++++++-- .../isaaclab_ov/renderers/ovrtx_renderer.py | 46 +++++++++++++++---- .../physics/ovphysx_manager.py | 28 ++++++++--- 6 files changed, 128 insertions(+), 21 deletions(-) diff --git a/source/isaaclab/isaaclab/envs/direct_rl_env.py b/source/isaaclab/isaaclab/envs/direct_rl_env.py index 9251eb0fe817..0ff5c090a8c4 100644 --- a/source/isaaclab/isaaclab/envs/direct_rl_env.py +++ b/source/isaaclab/isaaclab/envs/direct_rl_env.py @@ -192,6 +192,12 @@ def _init_sim(self, render_mode: str | None = None, **kwargs): # note: this activates the physics simulation view that exposes TensorAPIs # note: when started in extension mode, first call sim.reset_async() and then initialize the managers print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...") + # Pre-register Camera renderer cfgs and run BaseRenderer.early_init on each + # backend. Required for ovphysx + OVRTX coexistence: OVRTX must claim + # Carbonite before ovphysx constructs its native instance (sim.reset → + # OvPhysxManager._warmup_and_load), otherwise createRTXRenderer SIGSEGVs. + # No-op for backends without an early_init override. + self.scene.early_init_renderers() with Timer("[INFO]: Time taken for simulation start", "simulation_start"): # since the reset can trigger callbacks which use the stage, # we need to set the stage context here diff --git a/source/isaaclab/isaaclab/renderers/base_renderer.py b/source/isaaclab/isaaclab/renderers/base_renderer.py index 2fc498eae8e3..c563a88f69e8 100644 --- a/source/isaaclab/isaaclab/renderers/base_renderer.py +++ b/source/isaaclab/isaaclab/renderers/base_renderer.py @@ -33,6 +33,21 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: """ pass + def early_init(self) -> None: + """Construct any native objects that must be initialized before the + physics backend. + + Default no-op. Override in renderers whose native runtime must claim + process-global resources (e.g. Carbonite framework) before another + co-loaded library does. Called from + :meth:`RenderContext.early_init_all` ahead of the first + :meth:`PhysicsManager.reset` so the renderer can establish ownership + before the physics backend constructs its own native instance. + + Idempotent: callers may invoke this multiple times; subsequent calls + on a renderer that has already done its early init must no-op. + """ + @abstractmethod def prepare_stage(self, stage: Any, num_envs: int) -> None: """Prepare the stage for rendering before :meth:`create_render_data` is called. diff --git a/source/isaaclab/isaaclab/renderers/render_context.py b/source/isaaclab/isaaclab/renderers/render_context.py index a85aadc92ae6..f930b69c1f1c 100644 --- a/source/isaaclab/isaaclab/renderers/render_context.py +++ b/source/isaaclab/isaaclab/renderers/render_context.py @@ -128,6 +128,17 @@ def reset_transform_cadence(self) -> None: """Clear per-step transform dedupe (e.g. a long pause with no physics).""" self._last_transforms_step = None + def early_init_all(self) -> None: + """Run :meth:`BaseRenderer.early_init` on every registered backend. + + Used by ``InteractiveScene`` to give renderers (e.g. OVRTX) a chance + to claim process-global resources before the physics backend + constructs its own native instance. Backends that don't override + :meth:`BaseRenderer.early_init` are no-op. + """ + for _cfg, renderer in self._renderer_entries: + renderer.early_init() + def cleanup(self) -> None: """Release all registered renderer backends. diff --git a/source/isaaclab/isaaclab/scene/interactive_scene.py b/source/isaaclab/isaaclab/scene/interactive_scene.py index ce744fe4bffe..a65a257a79b1 100644 --- a/source/isaaclab/isaaclab/scene/interactive_scene.py +++ b/source/isaaclab/isaaclab/scene/interactive_scene.py @@ -159,15 +159,20 @@ def __init__(self, cfg: InteractiveSceneCfg): # prepare cloner for environment replication self.env_prim_paths = [f"{self.env_ns}/env_{i}" for i in range(self.cfg.num_envs)] + # ovphysx 0.4.3 removed the public `physx.clone()` API (replaced by + # `attach_stage` + `ovstage_clone_subtree`). Until that ovstage bridge + # is wired into IsaacLab, USD-side cloning is the supported ovphysx + # path: USD-replicate env_1..N so ovphysx ingests every env's physics + # prims directly, and skip the physics-runtime clone hook entirely. + # See source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md item C. + is_ovphysx = self.physics_backend.startswith("ovphysx") self.cloner_cfg = cloner.TemplateCloneCfg( clone_regex=self.env_regex_ns, clone_in_fabric=self.cfg.clone_in_fabric, device=self.device, - physics_clone_fn=physics_clone_fn, - # For ovphysx: env_1..N are created by physx.clone() in the physics - # runtime after add_usd(). USD replication of the asset hierarchy - # to env_1..N is skipped — only env_0 needs physics prims in the USD. - clone_usd=not self.physics_backend.startswith("ovphysx"), + physics_clone_fn=None if is_ovphysx else physics_clone_fn, + clone_physics=not is_ovphysx, + clone_usd=True, ) # create source prim @@ -209,6 +214,34 @@ def __init__(self, cfg: InteractiveSceneCfg): if self.cfg.filter_collisions and "physx" in self.physics_backend: self.filter_collisions(self._global_prim_paths) + def early_init_renderers(self) -> None: + """Pre-register every Camera sensor's renderer cfg and call + :meth:`BaseRenderer.early_init` on each backend. + + Must be called before the physics manager constructs its native + instance: renderers like OVRTX share a Carbonite framework with + ovphysx and SIGSEGV inside ``createRTXRenderer`` if they're + constructed second. Camera sensors normally register their + renderer lazily during ``Camera._initialize_impl`` (which fires on + PHYSICS_READY, after :meth:`OvPhysxManager._warmup_and_load`); + this method flips that order. + + Idempotent. Backends that don't override + :meth:`BaseRenderer.early_init` are no-op. Tasks that add Camera + sensors after :class:`InteractiveScene` construction (e.g. via + ``DirectRLEnv._setup_scene``) should call this method themselves + between sensor construction and ``sim.reset()`` — the framework's + :class:`DirectRLEnv` does this for built-in camera tasks. + """ + from isaaclab.sensors.camera.camera import Camera as _Camera + + for _name, sensor in self._sensors.items(): + if isinstance(sensor, _Camera): + renderer_cfg = getattr(sensor.cfg, "renderer_cfg", None) + if renderer_cfg is not None: + self.sim.render_context.get_renderer(renderer_cfg) + self.sim.render_context.early_init_all() + def clone_environments(self, copy_from_source: bool = False): """Creates clones of the environment ``/World/envs/env_0``. diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py index 5d1782373d87..cac6d7d36001 100644 --- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py +++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py @@ -147,6 +147,7 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: def __init__(self, cfg: OVRTXRendererCfg): self.cfg = cfg + self._renderer: Renderer | None = None self._usd_handles = [] self._render_product_paths = [] self._camera_binding = None @@ -157,6 +158,30 @@ def __init__(self, cfg: OVRTXRendererCfg): self._camera_rel_path: str | None = None self._output_semantic_color_buffer: wp.array | None = None + def early_init(self) -> None: + """Construct the OVRTX :class:`Renderer` to claim Carbonite ahead of + the physics backend. + + ovrtx and ovphysx share a Carbonite framework; the ovrtx side does no + coexistence checks at construction time and SIGSEGVs inside + ``createRTXRenderer`` if ovphysx (or any other Carbonite owner) has + already loaded its plugins. Pre-constructing here lets ovrtx own + Carbonite first; ovphysx's 0.4.3 coexistence-default-flip then takes + the second-tenant path cleanly. + + Idempotent — subsequent calls are a no-op. + """ + if self._renderer is not None: + return + logger.info("OVRTXRenderer.early_init: constructing Renderer to claim Carbonite first") + ovrtx_config = RendererConfig( + log_file_path=self.cfg.log_file_path, + log_level=self.cfg.log_level, + read_gpu_transforms=_IS_OVRTX_0_3_0_OR_NEWER, + ) + self._renderer = Renderer(ovrtx_config) + assert self._renderer, "Renderer should be valid after construction" + def prepare_stage(self, stage: Any, num_envs: int) -> None: """Export the USD stage for OVRTX before create_render_data. @@ -196,15 +221,18 @@ def initialize(self, spec: CameraRenderSpec): usd_scene_path = self._exported_usd_path use_cloning = self.cfg.use_cloning - logger.info("Creating OVRTX renderer...") - OVRTX_CONFIG = RendererConfig( - log_file_path=self.cfg.log_file_path, - log_level=self.cfg.log_level, - read_gpu_transforms=_IS_OVRTX_0_3_0_OR_NEWER, - ) - self._renderer = Renderer(OVRTX_CONFIG) - assert self._renderer, "Renderer should be valid after creation" - logger.info("OVRTX renderer created successfully") + if self._renderer is None: + logger.info("Creating OVRTX renderer...") + ovrtx_config = RendererConfig( + log_file_path=self.cfg.log_file_path, + log_level=self.cfg.log_level, + read_gpu_transforms=_IS_OVRTX_0_3_0_OR_NEWER, + ) + self._renderer = Renderer(ovrtx_config) + assert self._renderer, "Renderer should be valid after creation" + logger.info("OVRTX renderer created successfully") + else: + logger.info("OVRTX renderer already constructed via early_init; skipping re-creation") if usd_scene_path is not None: logger.info("Injecting camera definitions...") diff --git a/source/isaaclab_ovphysx/isaaclab_ovphysx/physics/ovphysx_manager.py b/source/isaaclab_ovphysx/isaaclab_ovphysx/physics/ovphysx_manager.py index b16d659d6cac..5dc2a8bd7896 100644 --- a/source/isaaclab_ovphysx/isaaclab_ovphysx/physics/ovphysx_manager.py +++ b/source/isaaclab_ovphysx/isaaclab_ovphysx/physics/ovphysx_manager.py @@ -255,14 +255,28 @@ def _warmup_and_load(cls) -> None: logger.info("OvPhysxManager: loaded USD into ovphysx (device=%s)", ovphysx_device) # Replay pending physics clones registered by ovphysx_replicate(). - # The USD stage contains only env_0's physics; env_1..N are empty - # Xform containers. physx.clone() creates the remaining environments - # in the physics runtime without modifying the USD file. + # ovphysx 0.4.3 removed the public `physx.clone()` API in favor of + # `physx.attach_stage(stage)` + `ovstage_clone_subtree(...)`. Until + # that ovstage bridge is wired into IsaacLab (see + # source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md item C), + # InteractiveScene routes ovphysx through USD-side cloning instead + # (clone_usd=True), and `_pending_clones` should always be empty + # under that path. Defensive branch: if the older `clone_usd=False` + # path were re-enabled and pre-0.4.3 wheels are in use, fall back + # to the legacy entrypoint; on 0.4.3+ surface a clear error + # pointing at the design note rather than letting the + # AttributeError bubble through Camera _initialize_impl. if cls._pending_clones: - # ovphysx_replicate() only registers pending clones when clone_usd=False, - # meaning the USD contains only env_0 physics and physx.clone() is required - # to populate env_1..N in the physics runtime. Execute unconditionally — - # no USD content heuristic is needed. + if not hasattr(cls._physx, "clone"): + raise RuntimeError( + "OvPhysxManager: ovphysx_replicate() registered pending physics clones, " + f"but the loaded ovphysx wheel ({type(cls._physx).__module__}) no longer " + "exposes `physx.clone()`. ovphysx 0.4.3+ replaced it with " + "`physx.attach_stage(stage)` + `ovstage_clone_subtree(...)`. Either set " + "`InteractiveSceneCfg.replicate_physics=False` (USD-side cloning) or land " + "the ovstage bridge migration described in " + "source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md." + ) for source, targets, parent_positions in cls._pending_clones: logger.info( "OvPhysxManager: cloning %s -> %d targets (%s ... %s)", From 9039f0adb1a80e2e76e71690aa1c60015b56df9e Mon Sep 17 00:00:00 2001 From: Eric Arnold Date: Wed, 6 May 2026 16:49:57 -0500 Subject: [PATCH 3/4] ovrtx: forward OvPhysx rigid-body poses to OVRTX object bindings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OVRTXRenderer's object-binding path was Newton-only: it asked SceneDataProvider.get_newton_model() and bailed when None (which is always the case under an OvPhysx physics manager). Result: with presets=ovphysx,ovrtx_renderer the rendered frames had moving cameras but static objects. Add an OvPhysx fallback that: 1. Walks the USD stage for prims with PhysicsRigidBodyAPI under /World/envs/env_*, filters out the camera and any ground plane. 2. Creates an ovphysx RIGID_BODY_POSE tensor binding over those paths (CUDA-resident, [N, 7] = px,py,pz,qx,qy,qz,qw). 3. Binds the same flat list of prim paths to ovrtx omni:xform with PrimMode.EXISTING_ONLY and writes omni:resetXformStack=True so the matrix is interpreted as the world transform regardless of parent xforms — same pattern as the camera binding in OVRTXRenderer.initialize. Per-frame update_transforms now reads the ovphysx pose tensor on GPU, runs sync_ovphysx_pose_to_mat44d_kernel to construct ovrtx-format mat44d rows (column-major rotation, translation in row 3, ``1.0`` at ``[3,3]`` — matches create_camera_transforms_kernel), then wp.copy's into the ovrtx attribute mapping. Zero host roundtrip, single ovrtx write per frame. Smoke run on the OMPE-88037 cartpole repro (32 envs, 2 epochs, 100x100 RGB tiled camera, ovphysx 0.4.3 + ovrtx 0.3.0.307110): 16.59s, training ran at ~670 fps, exit 0; renderer reports "OvPhysx: created RIGID_BODY_POSE binding for 96 ovrtx prims" for 32 envs × 3 articulation links each. Recipe credit: kit/sdk team (notes saved to /tmp/ovstage-answer.md). The Newton path is unchanged; the fallback only fires when get_newton_model() returns None. Tracking: OMPE-88037 --- .../isaaclab_ov/renderers/ovrtx_renderer.py | 158 +++++++++++++++++- .../renderers/ovrtx_renderer_kernels.py | 55 ++++++ 2 files changed, 207 insertions(+), 6 deletions(-) diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py index cac6d7d36001..99df7590428a 100644 --- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py +++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py @@ -53,6 +53,7 @@ generate_random_colors_from_ids_kernel, generate_random_colors_from_ids_kernel_legacy, sync_newton_transforms_kernel, + sync_ovphysx_pose_to_mat44d_kernel, ) from .ovrtx_usd import ( create_cloning_attributes, @@ -153,6 +154,12 @@ def __init__(self, cfg: OVRTXRendererCfg): self._camera_binding = None self._object_binding = None self._object_newton_indices: wp.array | None = None + # OvPhysx pose-binding state (alternative to the Newton path; populated + # by :meth:`_setup_object_bindings` when an OvPhysx physics manager is + # active and Newton's scene-data provider returns no model). + self._object_ovphysx_binding = None + self._object_ovphysx_pose_buf: wp.array | None = None + self._object_ovphysx_mat4_buf: wp.array | None = None self._initialized_scene = False self._exported_usd_path: str | None = None self._camera_rel_path: str | None = None @@ -333,14 +340,22 @@ def _update_scene_partitions_after_clone(self, usd_file_path: str, num_envs: int logger.warning("Failed to write scene partitions: %s", e, exc_info=True) def _setup_object_bindings(self): - """Setup OVRTX bindings for scene objects to sync with Newton physics.""" + """Setup OVRTX bindings for scene objects. + + Tries the Newton scene-data-provider path first; on backends that do + not provide a Newton model (notably OvPhysx) falls through to a direct + ovphysx ``RIGID_BODY_POSE`` tensor binding. Per + ``/tmp/ovstage-answer.md`` the slim ovphysx-pose → ovrtx-binding recipe + avoids pulling ovstage in for the steady-state pose-forward path. + """ try: from isaaclab.sim import SimulationContext provider = SimulationContext.instance().initialize_scene_data_provider() newton_model = provider.get_newton_model() if newton_model is None: - logger.info("Newton model not available, skipping object bindings") + logger.info("Newton model not available; trying OvPhysx fallback") + self._setup_object_bindings_ovphysx() return all_body_paths = getattr(newton_model, "body_label", None) @@ -381,10 +396,110 @@ def _setup_object_bindings(self): else: logger.warning("Object binding is None") except ImportError: - logger.info("Newton not available, skipping object bindings") + logger.info("Newton not available, trying OvPhysx fallback") + self._setup_object_bindings_ovphysx() except Exception as e: logger.warning("Error setting up object bindings: %s", e) + def _setup_object_bindings_ovphysx(self): + """Setup OVRTX object bindings against an OvPhysx physics backend. + + Walks the USD stage for prims with ``PhysicsRigidBodyAPI`` under + ``/World/envs/env_*`` (excluding the camera and any ground plane), + creates a single ovphysx ``RIGID_BODY_POSE`` binding for them, and + binds the same flat list of paths to ovrtx ``omni:xform``. Each frame + :meth:`update_transforms` reads the [N, 7] pose tensor on GPU, + converts to mat44d via :func:`sync_ovphysx_pose_to_mat44d_kernel`, and + writes into the ovrtx attribute mapping — no host roundtrip. + + The ``omni:resetXformStack=True`` write makes ovrtx interpret the + written matrix as the world transform regardless of any parent xform + stack, mirroring the camera-binding pattern at + :meth:`OVRTXRenderer.initialize`. This sidesteps the + ``omni:xform``-is-LOCAL parent-strip math the kit/sdk recipe in + ``/tmp/ovstage-answer.md`` describes for prims that don't reset their + xform stack. + """ + try: + from isaaclab.sim import SimulationContext + from isaaclab_ovphysx.physics.ovphysx_manager import OvPhysxManager + from ovphysx import TensorType + except ImportError: + logger.info("OvPhysx not available; skipping object bindings") + return + + sim_ctx = SimulationContext.instance() + if sim_ctx is None: + return + physx_instance = OvPhysxManager.get_physx_instance() + if physx_instance is None: + logger.info("OvPhysx PhysX instance not yet constructed; skipping object bindings") + return + + from pxr import Usd, UsdPhysics + + stage = sim_ctx.stage + object_paths: list[str] = [] + for prim in stage.Traverse(Usd.PrimIsActive & Usd.PrimIsDefined & Usd.PrimIsLoaded): + if not prim.HasAPI(UsdPhysics.RigidBodyAPI): + continue + path = prim.GetPath().pathString + if "/World/envs/" not in path: + continue + if self._camera_rel_path and self._camera_rel_path in path: + continue + if "GroundPlane" in path or "ground_plane" in path: + continue + object_paths.append(path) + + if not object_paths: + logger.info("OvPhysx: no rigid bodies found for OVRTX object binding") + return + + try: + self._object_ovphysx_binding = physx_instance.create_tensor_binding( + prim_paths=object_paths, + tensor_type=TensorType.RIGID_BODY_POSE, + ) + except Exception as e: + logger.warning("OvPhysx: create_tensor_binding(RIGID_BODY_POSE) failed: %s", e) + return + + binding_shape = tuple(self._object_ovphysx_binding.shape) + logger.info( + "OvPhysx: created RIGID_BODY_POSE binding for %d ovrtx prims (shape=%s)", + len(object_paths), + binding_shape, + ) + + # Pre-allocate GPU pose + mat4 buffers sized to the binding's reported + # row count (which may be smaller than ``len(object_paths)`` if any + # prim path didn't match a physics body in the loaded stage). + n = int(binding_shape[0]) + self._object_ovphysx_pose_buf = wp.zeros((n, 7), dtype=wp.float32, device=DEVICE) + self._object_ovphysx_mat4_buf = wp.zeros(n, dtype=wp.mat44d, device=DEVICE) + + self._object_binding = self._renderer.bind_attribute( + prim_paths=object_paths[:n], + attribute_name="omni:xform", + semantic=Semantic.XFORM_MAT4x4, + prim_mode=PrimMode.EXISTING_ONLY, + ) + if self._object_binding is None: + logger.warning("OvPhysx: ovrtx omni:xform binding returned None") + return + + try: + self._renderer.write_attribute( + prim_paths=object_paths[:n], + attribute_name="omni:resetXformStack", + tensor=np.full(n, True, dtype=np.bool_), + ) + except Exception as e: + logger.warning("OvPhysx: failed to write omni:resetXformStack: %s", e) + + logger.info("OvPhysx object bindings ready (%d prims)", n) + def create_render_data(self, spec: CameraRenderSpec) -> OVRTXRenderData: """Create OVRTX-specific RenderData with GPU buffers. @@ -435,10 +550,17 @@ def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, torch ) def update_transforms(self) -> None: - """Sync physics objects to OVRTX.""" - if self._object_binding is None or self._object_newton_indices is None: + """Sync physics objects to OVRTX (Newton or OvPhysx, whichever is wired).""" + if self._object_binding is None: + return + + if self._object_ovphysx_binding is not None: + self._update_transforms_ovphysx() return + if self._object_newton_indices is not None: + self._update_transforms_newton() + def _update_transforms_newton(self) -> None: try: from isaaclab.sim import SimulationContext @@ -459,7 +581,31 @@ def update_transforms(self) -> None: device=DEVICE, ) except Exception as e: - logger.warning("Failed to update object transforms: %s", e) + logger.warning("Failed to update object transforms (Newton): %s", e) + + def _update_transforms_ovphysx(self) -> None: + """OvPhysx pose binding → CUDA mat44d → ovrtx omni:xform write. + + Reads the ovphysx ``RIGID_BODY_POSE`` tensor (CUDA-resident, [N, 7]) + into a pre-allocated GPU buffer, runs + :func:`sync_ovphysx_pose_to_mat44d_kernel` to construct the + ovrtx-format ``mat44d`` rows, then copies into the ovrtx attribute + mapping. No host roundtrip. + """ + try: + n = int(self._object_ovphysx_pose_buf.shape[0]) + self._object_ovphysx_binding.read(self._object_ovphysx_pose_buf) + wp.launch( + kernel=sync_ovphysx_pose_to_mat44d_kernel, + dim=n, + inputs=[self._object_ovphysx_pose_buf, self._object_ovphysx_mat4_buf], + device=DEVICE, + ) + with self._object_binding.map(device=Device.CUDA, device_id=0) as attr_mapping: + ovrtx_transforms = wp.from_dlpack(attr_mapping.tensor, dtype=wp.mat44d) + wp.copy(ovrtx_transforms, self._object_ovphysx_mat4_buf) + except Exception as e: + logger.warning("Failed to update object transforms (OvPhysx): %s", e) def update_camera( self, diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer_kernels.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer_kernels.py index c287f1257632..b87082b88419 100644 --- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer_kernels.py +++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer_kernels.py @@ -328,3 +328,58 @@ def sync_newton_transforms_kernel( body_idx = newton_body_indices[i] transform = newton_body_q[body_idx] ovrtx_transforms[i] = wp.transpose(wp.mat44d(wp.math.transform_to_matrix(transform))) + + +@wp.kernel +def sync_ovphysx_pose_to_mat44d_kernel( + pose: wp.array2d(dtype=wp.float32), # type: ignore # shape (N, 7) — px,py,pz,qx,qy,qz,qw + ovrtx_transforms: wp.array(dtype=wp.mat44d), # type: ignore # shape (N,) +): + """Convert ovphysx ``RIGID_BODY_POSE`` rows ``[px,py,pz, qx,qy,qz,qw]`` into + OVRTX-format ``mat44d`` (column-major double matrix, translation in row 3). + + ovrtx writes ``omni:xform`` as a 4x4 double matrix; the renderer's prims + use ``omni:resetXformStack=True`` so the value is interpreted as the world + transform regardless of any parent xform stack. Output layout matches + :func:`create_camera_transforms_kernel` (column-major rotation, translation + in row 3 cols 0-2, ``1.0`` at ``[3,3]``). + """ + i = wp.tid() + px = pose[i, 0] + py = pose[i, 1] + pz = pose[i, 2] + qx = pose[i, 3] + qy = pose[i, 4] + qz = pose[i, 5] + qw = pose[i, 6] + + r00 = 1.0 - 2.0 * (qy * qy + qz * qz) + r01 = 2.0 * (qx * qy - qw * qz) + r02 = 2.0 * (qx * qz + qw * qy) + r10 = 2.0 * (qx * qy + qw * qz) + r11 = 1.0 - 2.0 * (qx * qx + qz * qz) + r12 = 2.0 * (qy * qz - qw * qx) + r20 = 2.0 * (qx * qz - qw * qy) + r21 = 2.0 * (qy * qz + qw * qx) + r22 = 1.0 - 2.0 * (qx * qx + qy * qy) + + _0 = wp.float64(0.0) + _1 = wp.float64(1.0) + ovrtx_transforms[i] = wp.mat44d( # type: ignore + wp.float64(r00), + wp.float64(r10), + wp.float64(r20), + _0, + wp.float64(r01), + wp.float64(r11), + wp.float64(r21), + _0, + wp.float64(r02), + wp.float64(r12), + wp.float64(r22), + _0, + wp.float64(float(px)), + wp.float64(float(py)), + wp.float64(float(pz)), + _1, + ) From 560ad29e2a185dfcde7ce933378cbb02693599bc Mon Sep 17 00:00:00 2001 From: Eric Arnold Date: Wed, 6 May 2026 16:55:05 -0500 Subject: [PATCH 4/4] docs(ovphysx_coexist): rewrite DESIGN as self-contained PR companion Rewrite source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md so that PR reviewers can read it without external references: - Drop ~/tmp/OMPE-88037-ovphysx-ovrtx-isaaclab.md and /tmp/ovstage-answer.md links (those are scratch files on the patch author's machine, not visible to reviewers). - Inline the answers to the three open questions to the kit/sdk and ovstage teams that informed the design (init order, ovrtx pin, pose-forwarding recipe). - Tag each item with the commit that landed it (a7ecbd0de18, f018386acda, 9039f0adb1a). - Note known caveats / future work per item, and the cross-cutting bits (Fabric IStageReaderWriter v0.16/v0.15 ABI warnings, the surviving os._exit(0) HACK in OvPhysxManager). Tracking: OMPE-88037 --- .../docs/ovphysx_coexist_DESIGN.md | 350 +++++++++--------- 1 file changed, 180 insertions(+), 170 deletions(-) diff --git a/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md b/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md index 6b52a1d810f5..406b2359898b 100644 --- a/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md +++ b/source/isaaclab_ov/docs/ovphysx_coexist_DESIGN.md @@ -2,173 +2,183 @@ Tracking ticket: OMPE-88037 — `presets=ovphysx,ovrtx_renderer,rgb`. -This branch (`erarnold/ovphysx-ovrtx-coexist`, off `pbarejko/ovphysx-ovrtx`) -contains only the changes that are clearly correct without first answering -the open questions to the kit/sdk and ovstage agents listed in -`~/tmp/OMPE-88037-ovphysx-ovrtx-isaaclab.md`. The harder pieces are -captured here as design notes. - -## What this branch does land - -**Renderer-side teardown before physics-side teardown.** -`SimulationContext.clear_instance()` now calls -`self._render_context.cleanup()` before `physics_manager.close()`. The -new `RenderContext.cleanup()` method walks the registered renderer -backends and invokes `BaseRenderer.cleanup(None)` on each, then drops -its registration list. Per-camera `Camera.__del__` cleanup remains in -place as a safety net but becomes a no-op once the central cleanup has -run. - -Why: the kit/sdk and ovstage samples (see -`~/repos/kit/sdks/source/ovphysx_ovrtx_integration_example/README.md` -"Cleanup order matches C++ sample" and -`~/repos/ovstage/src/examples/render_physx.cpp:785`) are explicit that -ovrtx must release before ovphysx tears down their shared Carbonite. -Today IsaacLab releases physics first and lets cameras GC last; that -inversion crashes the second native teardown. The fix is small, has no -behavior change for non-ovrtx backends, and removes the sequencing -dependency on Python GC. - -## What this branch defers - -### A. Hoist OVRTXRenderer construction before OvPhysxManager._warmup_and_load - -**Problem.** Today `ovrtx.Renderer(config)` is constructed lazily inside -`OVRTXRenderer.initialize(spec)` (in `source/isaaclab_ov/.../ovrtx_renderer.py`), -which is called from `OVRTXRenderer.create_render_data()`, which is -called from `Camera._initialize_impl()` (in -`source/isaaclab/isaaclab/sensors/camera/camera.py:423`), which fires -on the `PHYSICS_READY` event dispatched at the end of -`OvPhysxManager.reset() → _warmup_and_load()`. By that point ovphysx has -already constructed `ovphysx.PhysX(device=...)` and claimed Carbonite. - -Per the working samples (`render_physx.cpp:200-214`, -`ovlibs_sample.py:210-213`) ovrtx must claim Carbonite first, before -ovphysx exists. Two paths: - -1. **Wheel side:** ovphysx 0.4.3 already flips the - `OVPHYSX_COEXIST_DIAGNOSTICS` default (commit `0917441ef9`) so that - ovphysx auto-detects another Carbonite owner and proceeds in - coexistence mode. **Open question for the kit/sdk agent (Q1 in the - triage doc):** does this default flip make ovphysx-first - work-as-coexistence-tenant, or only suppress the env-var nag? -2. **IsaacLab side:** add a `BaseRenderer.early_init(stage)` hook that - the renderer uses to construct its native object before - `prepare_stage`. `RenderContext` exposes `early_init_all(stage)` - that walks registered backends. Add a hook on the physics manager - (or `SimulationContext.reset`) that calls it before - `physics_manager.reset()` runs `_warmup_and_load`. Backends that - don't need it (Newton, Isaac RTX) get a default no-op. - -The catch with (2): Camera sensors only register their renderer cfg -during `_initialize_impl` (after PHYSICS_READY), so at the early hook -nothing is registered yet. We need to pre-walk the scene cfg for -`CameraCfg.renderer_cfg` entries during `InteractiveScene` build (or -`SimulationContext.__init__`) and register them up front. - -**Don't implement until Q1 is answered.** If 0.4.3 does relax the order -constraint, (1) is enough and IsaacLab can keep the lazy construction -path. - -### B. Wire OVRTXRenderer._setup_object_bindings to read OvPhysx pose bindings - -**Problem.** `OVRTXRenderer._setup_object_bindings` (line 307) and -`OVRTXRenderer.update_transforms` (line 409) call -`SimulationContext.instance().initialize_scene_data_provider().get_newton_model()` -and `.get_newton_state().body_q`. With OvPhysx these return `None`, so -object transforms never reach OVRTX (camera transforms still work via -the omni:xform binding written from `update_camera`). - -**Two ways forward:** - -1. **Add an `OvPhysxSceneDataProvider`** that mirrors the Newton - provider's interface (`get_newton_model()` returns a path/index list, - `get_newton_state()` returns a body-q tensor). The provider wraps - ovphysx tensor bindings — pose binding → body_q tensor in - `wp.transformf` layout. Keep the same renderer code by aliasing - "newton state" to "physics state" at the provider level. -2. **Add a `BaseSceneDataProvider.get_body_transforms()`** abstraction - so OVRTXRenderer no longer reaches for Newton-specific accessors, - then implement it on both Newton and a new OvPhysx provider. - -(2) is cleaner but a bigger renderer-side change. (1) is fewer LOC and -keeps the scope focused on unblocking the bug. **Open question for the -ovstage agent (Q3 in the triage doc):** if there's a third option -(IsaacLab pulls in `ovstage` as a third dependency and uses the -ovstage→ovrtx attribute-write path the working samples use), is that -the recommended pattern or a heavier one? - -### C. Migrate physx.clone() to attach_stage + stage.clone_subtree - -**Problem.** `OvPhysxManager._warmup_and_load` calls -`cls._physx.clone(source, targets, transforms)` (line 278) for -replicate-physics. ovphysx 0.4.3 removes the public `physx.clone()` API -(see ovphysx changelog under 0.4 "Breaking changes") in favor of -`physx.attach_stage(stage) → stage.clone_subtree(source, [targets])` -followed by per-target `xformOp:translate` writes inside the same -`begin_frame`/`end_frame`. The replicate plugin is unchanged; only the -entry point moved. - -**Migration sketch:** - -```python -# Before (0.4.2 and older) -op_idx = cls._physx.clone(source, targets, transforms) -cls._physx.wait_op(op_idx) - -# After (0.4.3+) -import ovstage # new dependency -stage = ovstage.Stage() # or get from sim context if hoisted -ovstage_op = stage.begin_frame() -stage.clone_subtree(source, targets) -for target_path, (x, y, z) in zip(targets, parent_positions): - stage.write_attribute(ovstage_op, [target_path], "xformOp:translate", pack_vec3(x, y, z)) -stage.end_frame(ovstage_op) -# OvstageBridge ingests this on next physx.step() -``` - -**Why deferred:** - -1. Pulling `ovstage` into IsaacLab as a third dependency is a bigger - ask than this branch should swallow without sign-off — it adds - another wheel pin, plugin path, and Carbonite tenant. -2. The migration is wheel-version-dependent. The cleanest implementation - is a feature gate: try `physx.clone()` first, fall back to the - ovstage path if `AttributeError`. That keeps the 0.4.2 path working - while landing the 0.4.3 path. But the gate logic only makes sense - once we've actually exercised the 0.4.3 API end-to-end, which needs - a built 0.4.3 wheel installed in IsaacLab. - -**Action when 0.4.3 wheel is in hand:** add the version gate and -ovstage import, run the cartpole repro to confirm clones land. - -### D. OVRTX renderer init order with respect to USD export - -**Problem (potential — needs verification).** `OvPhysxManager._warmup_and_load` -exports the USD stage to a temp file before constructing ovphysx; that -file is what ovphysx ingests. `OVRTXRenderer.prepare_stage` separately -exports the stage to `/tmp/stage_before_ovrtx.usda` with cameras -injected. Two distinct files. The samples have both libs `open_usd` -the same on-disk file. - -If the open question Q1 (init order in 0.4.3) lands as "ovrtx must -still be first", the USD file ovrtx opens has not yet been augmented -with the OvPhysx-required schemas (which `_configure_physx_scene_prim` -writes onto the in-memory stage right before export). One USD file -that both backends open is the cleanest fix; needs a small refactor of -the export path so OvPhysxManager and OVRTXRenderer share the export. - -**Defer until Q1 + the actual repro on 0.4.3 says whether this matters.** - -## Open questions blocking the deferred items - -(Same numbering as `~/tmp/OMPE-88037-ovphysx-ovrtx-isaaclab.md` §3-§4.) - -- **Q1 (kit/sdk):** Does ovphysx 0.4.3 *enforce* coexistence-mode - regardless of init order, or only suppress the env-var nag? Decides - whether item A is needed at all. -- **Q2 (kit/sdk):** Validated ovrtx pip build for ovphysx 0.4.x. - Decides item D / pin update. -- **Q3 (ovstage):** Slimmest "ovphysx pose tensor → ovrtx CUDA tensor" - recipe. Decides item B's choice between an OvPhysx scene-data - provider vs pulling ovstage in. +Branch: `erarnold/ovphysx-ovrtx-coexist`, off `pbarejko/ovphysx-ovrtx`. + +Validated against ovphysx 0.4.3 (`dev/erarnold/ovstage-attach`, +commit `c728f7e740`) and ovrtx 0.3.0.307110 on Linux x86_64. The +cartpole repro +(`Isaac-Cartpole-Camera-Presets-Direct-v0`, 32 envs, 100×100 RGB tiled +camera, 2 epochs) runs end-to-end at ~670 fps in 16-17 seconds. + +This doc tracks the four pieces the integration needed and links the +specific commits that landed each. Three questions to the kit/sdk and +ovstage teams informed the design: + +1. **kit/sdk:** "does ovphysx 0.4.3's coexistence default flip relax + the init-order requirement, or only suppress the env-var nag?" + → **Only suppresses the nag.** ovrtx is the side that can't go + second; it does no checks at construction time. Hence item 3 below. +2. **kit/sdk:** "validated ovrtx pip build for ovphysx 0.4.x?" + → `0.3.0.307110` (kit demos) or `0.3.0.304843` (ovstage demo) — + either works against the same shared deps. +3. **ovstage:** "slim ovphysx-pose → ovrtx-binding recipe without + pulling ovstage in?" → "Don't pull ovstage in for the steady-state + pose-forward path." Recipe: ovphysx pose binding → CUDA quat→mat4 + kernel → ovrtx `write_attribute(omni:xform, XFORM_MAT4x4)`. Used + unmodified for item 4 below. + +## 1. Renderer teardown before physics teardown — landed + +**What.** `SimulationContext.clear_instance()` calls +`self._render_context.cleanup()` before `physics_manager.close()`. New +`RenderContext.cleanup()` walks registered backends, invokes +`BaseRenderer.cleanup(None)` on each, drops the registration list. +Per-camera `Camera.__del__` cleanup stays as a safety net but is a +no-op after central cleanup. + +**Why.** ovrtx and ovphysx share a Carbonite framework; ovrtx-side +native objects must release before ovphysx tears down its Carbonite, +otherwise the second teardown crashes on freed plugin state. IsaacLab +released physics first and let cameras GC last, which inverted the +required order. + +**Commit:** `a7ecbd0de18` "sim: tear down camera renderers before physics manager close". + +## 2. ovphysx 0.4.3 clone-API removal — landed + +**What.** ovphysx 0.4.3 removed the public `physx.clone()` (replaced +by `attach_stage` + `ovstage_clone_subtree`). Until that ovstage +bridge is wired into IsaacLab, `InteractiveScene` routes ovphysx +through USD-side cloning: `clone_usd=True`, `clone_physics=False`, +`physics_clone_fn=None`. Every env's physics prims live in the USD +that ovphysx ingests; no physics-runtime clone is needed. +`OvPhysxManager._warmup_and_load` now raises a clear error if a +pre-0.4.3 wheel and the legacy `clone_physics=True` path coincide, +rather than letting `AttributeError` bubble through `Camera` +initialization. + +**Why.** Previously the ovphysx branch routed `clone_usd=False` so +that `physx.clone()` could populate env_1..N in the physics runtime; +the 0.4.3 wheel removed that entry point. + +**Commit:** `f018386acda` "sim: ovphysx 0.4.3 + ovrtx coexistence — clone migration + renderer init hoist". + +**Future work.** Pulling `ovstage` into IsaacLab as a third dependency ++ wiring `physx.attach_stage(stage)` + `stage.clone_subtree(...)` +restores physics-runtime cloning. Per the ovstage team's guidance, +this is *not* recommended for the steady-state pose-forward path +(item 4 below already does that without ovstage); it would only matter +if someone hits a perf or memory limit on USD-side cloning at large +`num_envs`. + +## 3. OVRTXRenderer init order — landed + +**What.** New `BaseRenderer.early_init()` hook (default no-op). +`OVRTXRenderer` overrides it to construct the C++ `Renderer(config)` +up front. `OVRTXRenderer.initialize(spec)` skips renderer construction +when `early_init` already ran. New `RenderContext.early_init_all()` +walks registered backends. New `InteractiveScene.early_init_renderers()` +pre-walks `Camera` sensors to register their `renderer_cfg` with the +context so `early_init` has something to act on. `DirectRLEnv._init_sim` +calls `scene.early_init_renderers()` between `_setup_scene` (which is +where the cartpole task adds its tiled camera) and `sim.reset()`. + +**Why.** ovrtx does no coexistence checks at construction time and +SIGSEGVs inside `createRTXRenderer` if ovphysx (or any other Carbonite +owner) has already loaded its plugins. IsaacLab constructed OVRTX +lazily in `Camera._initialize_impl` (which fires on PHYSICS_READY, +*after* `OvPhysxManager._warmup_and_load`), inverting the required +order. kit/sdk Q1 confirmed: even with ovphysx 0.4.3's coexistence +default flip, ovrtx is the side that can't go second; the wheel-side +flip only suppresses the env-var nag, it doesn't make ovrtx tolerant +of being constructed second. + +**Commit:** `f018386acda` (same as item 2; the renderer hoist and the +clone migration share the commit). + +**Caveat.** The `early_init_renderers()` hook is currently called +explicitly by `DirectRLEnv._init_sim`. Other env types (e.g. +manager-based envs) that build cameras after `InteractiveScene.__init__` +need to call it themselves between sensor construction and +`sim.reset()`, otherwise the lazy renderer construction comes back and +the SIGSEGV returns under ovphysx. Worth a follow-up that hooks it +from a more central place (likely `SimulationContext.reset` walking the +scene cfg, but that needs scene-cfg discovery from the sim context, +which doesn't exist yet). + +## 4. OvPhysx pose binding → OVRTX object transforms — landed + +**What.** `OVRTXRenderer._setup_object_bindings` now falls through to +an OvPhysx path when the Newton scene-data provider returns no model. +The fallback walks the USD stage for `PhysicsRigidBodyAPI` prims under +`/World/envs/env_*` (excluding the camera and any ground plane), +creates a single ovphysx `RIGID_BODY_POSE` tensor binding for them, +and binds the same flat list to ovrtx `omni:xform` +(`PrimMode.EXISTING_ONLY`, `omni:resetXformStack=True` so writes are +treated as world transforms — same pattern as the camera binding). + +`update_transforms` now branches on which fallback fired. The OvPhysx +path: read the ovphysx pose tensor on GPU into a pre-allocated +`[N, 7]` `wp.array`, run `sync_ovphysx_pose_to_mat44d_kernel` to +construct ovrtx-format `mat44d` rows (column-major rotation, +translation in row 3 — matches `create_camera_transforms_kernel`), +then `wp.copy` into the ovrtx attribute mapping. Zero host roundtrip, +single ovrtx write per frame. + +**Why.** Without it, the renderer ran clean on ovphysx but every +object in the rendered frame was static — the renderer had no source +of per-frame pose updates because `get_newton_model()` returned `None` +under ovphysx and the Newton path was the only one wired. + +**Recipe credit.** kit/sdk team — "don't pull ovstage in for the +steady-state pose-forward path" recommendation, the `omni:xform` +LOCAL/WORLD note, and the `PrimMode` caveat. We sidestepped the +parent-strip math by using `omni:resetXformStack=True` (matches what +the camera binding already did; the kit/sdk sample uses the same +trick). `PrimMode.EXISTING_ONLY` worked here; the kit/sdk note about +`EXISTING_ONLY` skipping the first write silently when the bucket has +no `omni:xform` column didn't bite for us because the cloned env +subtrees inherit the column from env_0. + +**Commit:** `9039f0adb1a` "ovrtx: forward OvPhysx rigid-body poses to OVRTX object bindings". + +**Caveats / future work.** + +- Articulation-link poses for cartpole are read via `RIGID_BODY_POSE` + rather than `ARTICULATION_LINK_POSE`. ovphysx 0.4.3 accepts this for + the cartpole layout (32 envs × 3 bodies = 96 prims bound), but if a + task hits a layout where `RIGID_BODY_POSE` rejects an articulation + link, the fallback should switch to a per-articulation + `ARTICULATION_LINK_POSE` binding, walk the USD for `PhysicsArticulationRootAPI` + prims to get the articulation list, and use `binding.body_names` to + map back to USD prim paths. Not implemented because it isn't needed + yet. +- The visual smoke is by absence of update warnings + non-trivial + RGB output, not by frame-by-frame inspection. A short `--video` + capture across N steps would close that gap. +- Fixed-base assets (e.g. ground plane) are filtered by name. A + task that names its ground plane something other than + `GroundPlane`/`ground_plane` would slip through and try to bind a + static body, which ovphysx may reject. Move the filter to a + schema check (`PhysicsRigidBodyAPI.GetRigidBodyEnabledAttr`) if + this becomes an issue. + +## Cross-cutting notes + +**ovrtx wheel pin.** kit/sdk's CMake comment about needing +"Fabric IStageReaderWriter v0.16+" remains relevant: the run prints +repeated `Warning: Possible version incompatibility. Attempting to +load omni::fabric::IStageReaderWriter with version v0.16 against v0.15`. +Non-fatal, but the kit/sdk team probably wants to know which dep is +still on v0.15 in the 0.4.3 + 0.3.0.307110 combo. + +**`os._exit(0)` shutdown hack.** `OvPhysxManager._construct_physx` +still installs an `atexit` handler that calls `os._exit(0)` to +sidestep the dual-Carbonite static-destructor race. Keep until +ovphysx ships a namespace-isolated Carbonite (the existing HACK +comment in `ovphysx_manager.py` covers the rationale). + +**Kit visualizer suspension.** Not touched in this branch. The +`SimulationContext` already has the kit-visualizer-aware paths +gated on `has_kit()`, so kitless ovphysx + ovrtx runs go through +the right branches without changes.