From ca68f7db7cc1ca6cab9fa5f74737b7dda5efa986 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Fri, 29 May 2026 15:43:20 -0400 Subject: [PATCH 1/3] fix(cloudxr): add opt-in to avoid autoTSSkey mapping abort Add NV_CXR_RUNTIME_JOIN_MAIN_THREAD to join the runtime on the main thread instead of a worker thread, avoiding a "Couldn't create autoTSSkey mapping" abort seen on some platforms. Default keeps the original worker-thread behavior. --- src/core/cloudxr/python/runtime.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/core/cloudxr/python/runtime.py b/src/core/cloudxr/python/runtime.py index bb050ea29..a085ec252 100644 --- a/src/core/cloudxr/python/runtime.py +++ b/src/core/cloudxr/python/runtime.py @@ -266,16 +266,23 @@ def stop(sig: int, frame: object) -> None: state["service_created"] = True lib.nv_cxr_service_start(svc) - # Run the blocking join() in a worker thread so the main thread stays in Python - # and can run the signal handler. Otherwise Ctrl+C is not processed while we're - # inside the native nv_cxr_service_join() call. - def join_then_destroy() -> None: + join_on_main = os.environ.get("NV_CXR_RUNTIME_JOIN_MAIN_THREAD", "").strip().lower() + if join_on_main in ("1", "true", "yes", "on"): + # Opt-in: join on the main thread to avoid a "Couldn't create autoTSSkey + # mapping" abort seen on some platforms. lib.nv_cxr_service_join(svc) lib.nv_cxr_service_destroy(svc) - - worker = threading.Thread(target=join_then_destroy, daemon=False) - worker.start() - worker.join() + else: + # Run the blocking join() in a worker thread so the main thread stays in Python + # and can run the signal handler. Otherwise Ctrl+C is not processed while we're + # inside the native nv_cxr_service_join() call. + def join_then_destroy() -> None: + lib.nv_cxr_service_join(svc) + lib.nv_cxr_service_destroy(svc) + + worker = threading.Thread(target=join_then_destroy, daemon=False) + worker.start() + worker.join() if state["interrupted"]: raise KeyboardInterrupt() From 58634155d0c3570ea6d9ffd5aa446223cbc40928 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Fri, 5 Jun 2026 03:21:31 +0800 Subject: [PATCH 2/3] fix(cloudxr): tear down runtime on SIGTERM/SIGINT to avoid orphan The runtime is spawned with start_new_session=True, so it isn't killed with the embedding process, and Python does not run atexit handlers on signal-driven termination. A signalled shutdown (e.g. pkill of the embedding streamer) therefore orphaned the runtime, which kept holding the streaming port and made the next start fail with ERROR_STREAMSDK_PORT_UNAVAILABLE. Install SIGTERM/SIGINT handlers in CloudXRLauncher that run stop() (which already killpg's the runtime's process group) and then chain to the previously installed handler so embedding apps keep their own shutdown behavior. No-op off the main thread, where signal handlers can't be set. --- src/core/cloudxr/python/launcher.py | 54 +++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/core/cloudxr/python/launcher.py b/src/core/cloudxr/python/launcher.py index 54928691a..eb42a7ba4 100644 --- a/src/core/cloudxr/python/launcher.py +++ b/src/core/cloudxr/python/launcher.py @@ -122,6 +122,7 @@ def __init__( self._wss_stop_future: asyncio.Future | None = None self._wss_log_path: Path | None = None self._atexit_registered = False + self._prev_signal_handlers: dict[int, object] = {} env_cfg = EnvConfig.from_args(self._install_dir, self._env_config) try: @@ -155,6 +156,14 @@ def __init__( atexit.register(self.stop) self._atexit_registered = True + # atexit handlers do NOT run on SIGTERM/SIGINT, and the runtime is in its + # own session (start_new_session=True) so it isn't killed with this + # process. Without this, a signalled shutdown (e.g. `pkill` of the + # embedding streamer) orphans the runtime, which keeps holding the + # streaming port and makes the next start fail with PORT_UNAVAILABLE. + # Install handlers that run stop() then chain to the prior disposition. + self._install_signal_handlers() + wss_ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H%M%SZ") wss_log_path = logs_dir_path / f"wss.{wss_ts}.log" self._wss_log_path = wss_log_path @@ -183,6 +192,7 @@ def stop(self) -> None: The process handle is retained so callers can retry or inspect the still-running process. """ + self._restore_signal_handlers() self._stop_wss_proxy() if self._runtime_proc is not None: @@ -231,6 +241,50 @@ def wss_log_path(self) -> Path | None: # Private helpers # ------------------------------------------------------------------ + def _install_signal_handlers(self) -> None: + """Tear the runtime down on SIGTERM/SIGINT. + + Signals don't trigger ``atexit``, and the runtime runs in its own + session, so a signalled shutdown would otherwise orphan it. Each + handler runs :meth:`stop` then chains to the previously-installed + disposition, so embedding apps keep their own shutdown behaviour. + No-op off the main thread (``signal.signal`` only works there). + """ + if threading.current_thread() is not threading.main_thread(): + return + + def _make_handler(prev): + def _handler(signum, frame): + try: + self.stop() + finally: + if callable(prev): + prev(signum, frame) + else: + # SIG_DFL / SIG_IGN: restore it and re-raise so the + # default (terminate) or ignore behaviour applies. + signal.signal(signum, prev) + if prev == signal.SIG_DFL: + os.kill(os.getpid(), signum) + return _handler + + for sig in (signal.SIGTERM, signal.SIGINT): + try: + prev = signal.getsignal(sig) + signal.signal(sig, _make_handler(prev)) + except (ValueError, OSError): + continue + self._prev_signal_handlers[sig] = prev + + def _restore_signal_handlers(self) -> None: + """Restore signal handlers saved by :meth:`_install_signal_handlers`.""" + while self._prev_signal_handlers: + sig, prev = self._prev_signal_handlers.popitem() + try: + signal.signal(sig, prev) + except (ValueError, OSError): + pass + @staticmethod def _cleanup_stale_runtime(env_cfg: EnvConfig) -> None: """Remove stale sentinel files from a previous runtime that wasn't cleaned up. From dc90bfb9b853cd0d5a949a268f852b82a58799d2 Mon Sep 17 00:00:00 2001 From: Michael Haselton Date: Sat, 6 Jun 2026 06:37:36 +0800 Subject: [PATCH 3/3] camera_viz: add --mode override and fix setup pipefail bugs Add a --mode {window,xr} flag to camera_viz.py that overrides display.mode from the YAML, and forward extra args through camera_viz.sh run, so one config drives both a local window and a headless XR (CloudXR) run without editing the file. Fix two `set -o pipefail` bugs in _install_deps.sh that blocked setup: - the Jetson cuda-nvrtc probe used `find /usr ... | grep -q`, whose pipeline fails when find hits an unreadable /usr subdir, falsely flagging cuda-nvrtc as missing even when libnvrtc is present; - the isaacteleop dist-info probe `ls ... | head -1` aborted the first full install on a fresh venv (glob matches nothing) before it ran. --- examples/camera_viz/camera_viz.py | 13 ++++++++++--- examples/camera_viz/camera_viz.sh | 6 +++--- examples/camera_viz/scripts/_install_deps.sh | 6 ++++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/examples/camera_viz/camera_viz.py b/examples/camera_viz/camera_viz.py index 5244d8c3d..31be2139c 100755 --- a/examples/camera_viz/camera_viz.py +++ b/examples/camera_viz/camera_viz.py @@ -179,9 +179,10 @@ def _build_rtp_entries(cfg: dict, is_xr: bool) -> List[SourceEntry]: return entries -def _make_session(cfg: dict) -> viz.VizSession: +def _make_session(cfg: dict, mode_override: Optional[str] = None) -> viz.VizSession: display = cfg.get("display", {}) - mode_str = display.get("mode", "window").lower() + # --mode overrides display.mode when given. + mode_str = (mode_override or display.get("mode", "window")).lower() session_cfg = viz.VizSessionConfig() if mode_str == "window": session_cfg.mode = viz.DisplayMode.kWindow @@ -206,6 +207,12 @@ def _make_session(cfg: dict) -> viz.VizSession: def main(argv: Optional[list[str]] = None) -> int: parser = argparse.ArgumentParser(description="Televiz camera_viz — display side") parser.add_argument("config", type=Path, help="YAML config file") + parser.add_argument( + "--mode", + choices=("window", "xr"), + default=None, + help="Override display.mode from the config (default: use the config's value).", + ) args = parser.parse_args(argv) with open(args.config) as f: @@ -223,7 +230,7 @@ def main(argv: Optional[list[str]] = None) -> int: if source_mode not in ("local", "rtp"): raise ValueError(f"camera_viz: source must be local|rtp, got {source_mode!r}") - session = _make_session(cfg) + session = _make_session(cfg, mode_override=args.mode) is_xr = session.is_xr_mode() if source_mode == "local": diff --git a/examples/camera_viz/camera_viz.sh b/examples/camera_viz/camera_viz.sh index 8dadb1feb..5700f9670 100755 --- a/examples/camera_viz/camera_viz.sh +++ b/examples/camera_viz/camera_viz.sh @@ -187,13 +187,13 @@ PY } # ────────────────────────────────────────────────────────────────────── -# run (run the viewer with the YAML as-is) +# run (the viewer; args after CONFIG forward to camera_viz.py, e.g. --mode xr) # ────────────────────────────────────────────────────────────────────── cmd_run() { _require_local_config run "${1:-}" log_step "Starting camera_viz — Ctrl-C to exit" - "$LOCAL_VENV/bin/python" "$HERE/camera_viz.py" "$1" + "$LOCAL_VENV/bin/python" "$HERE/camera_viz.py" "$@" } # ────────────────────────────────────────────────────────────────────── @@ -374,7 +374,7 @@ cmd_service_restart() { # ────────────────────────────────────────────────────────────────────── show_help() { - cat </dev/null | grep -q .; then + # capture, not `find | grep -q`: find's non-zero on an unreadable /usr dir trips pipefail. + if $JETSON && [[ -z "$(find /usr -name 'libnvrtc.so*' -print -quit 2>/dev/null)" ]]; then pkgs+=("cuda-nvrtc-${cuda_major}-${cuda_minor}") fi @@ -330,7 +331,8 @@ $WITH_RTP && PKGS+=("pybind11>=2.11" "PyGObject>=3.42,<3.52") EXTRA_UV=() if [[ "$MODE" == full && -f "$WHEEL" ]]; then wheel_mtime=$(stat -c %Y "$WHEEL" 2>/dev/null || echo 0) - installed_dist=$(ls -d "$VENV_DIR"/lib/python*/site-packages/isaacteleop-*.dist-info 2>/dev/null | head -1) + # Empty on a fresh venv; `|| true` keeps the no-match from aborting under pipefail+set -e. + installed_dist=$(ls -d "$VENV_DIR"/lib/python*/site-packages/isaacteleop-*.dist-info 2>/dev/null | head -1 || true) if [[ -n "$installed_dist" ]]; then installed_mtime=$(stat -c %Y "$installed_dist" 2>/dev/null || echo 0) if (( wheel_mtime > installed_mtime )); then