From fbb592b5b4a8f790dc80c236ec74625e724b7b61 Mon Sep 17 00:00:00 2001 From: YuTeh Shen Date: Thu, 7 May 2026 10:15:36 +0800 Subject: [PATCH] Make NVTX Python hook reinstallable. --- .agents/skills/nvtx-python/SKILL.md | 27 ++++++ .../nvtx-python/scripts/sitecustomize.py | 92 ++++++++++++------- 2 files changed, 84 insertions(+), 35 deletions(-) diff --git a/.agents/skills/nvtx-python/SKILL.md b/.agents/skills/nvtx-python/SKILL.md index e7bd275..d209a13 100644 --- a/.agents/skills/nvtx-python/SKILL.md +++ b/.agents/skills/nvtx-python/SKILL.md @@ -56,6 +56,33 @@ uv run python scripts/reinforcement_learning/skrl/train.py \ - Use `NVTX_PROFILE_INCLUDE` to limit scope to modules of interest - To disable: unset `NVTX_PROFILE_PYTHON` or remove the skill's `scripts/` directory from `PYTHONPATH` +## Caveat: Isaac Lab / Kit-based hosts clobber `sys.setprofile` + +Isaac Lab 3.0+ standalone *does* boot Kit/Carb under the hood via `AppLauncher`. Just running `from isaaclab.app import AppLauncher` (or any import that transitively pulls `carb` / `isaacsim`) registers Carb's own Python profile callback during module load, **silently overwriting the hook this skill installed at interpreter startup**. The resulting `.nsys-rep` then shows Python NVTX only for the brief window between interpreter startup and the first Carb-pulling import — typically just `argparse`, then nothing. + +**Fix:** call `sitecustomize.install()` after the import to re-arm. `install()` is idempotent, re-reads `NVTX_PROFILE_INCLUDE` / `NVTX_PROFILE_EXCLUDE` each call, and re-installs both `sys.setprofile` and `threading.setprofile`. + +```python +import sys +print(f"[NVTX] before AppLauncher: {sys.getprofile()!r}", flush=True) # likely None +app_launcher = AppLauncher(args_cli) +simulation_app = app_launcher.app +print(f"[NVTX] after AppLauncher: {sys.getprofile()!r}", flush=True) # still None + +import sitecustomize +sitecustomize.install() +print(f"[NVTX] re-installed: {sys.getprofile()!r}", flush=True) # callback restored +``` + +The probes are how you confirm the issue and the fix in one run. Drop them once verified. + +### Limitations even after re-install + +- `sys.setprofile` is **per-thread**. `threading.setprofile` only catches threads created via Python's `threading` module — Kit / PhysX / Hydra / Fabric / TBB native threads stay invisible. Most of `env.step()` runs in C++ on those threads, so even with the hook re-armed you'll see relatively few Python NVTX zones during the step loop. +- Tracing every module floods the trace and can swamp nsys's NVTX backend; **always set `NVTX_PROFILE_INCLUDE`** to your own packages (e.g. `simulation,isaaclab.envs,isaaclab_tasks`) for Isaac Lab workloads. +- For native-side coverage of Kit/PhysX/Hydra, use `CARB_PROFILING_PYTHON=1` + Carb's NVTX backend (see `profiling` skill) instead of, or alongside, this skill. +- For a clean, low-overhead view of specific hot paths, drop the auto-tracer entirely and decorate the functions you care about with `@nvtx.annotate` (`env.step`, IK solve, observation manager, camera readout). + ## Alternative: nsys Built-in Python Tracing If you know exactly which modules/functions to trace, nsys has a built-in option: diff --git a/.agents/skills/nvtx-python/scripts/sitecustomize.py b/.agents/skills/nvtx-python/scripts/sitecustomize.py index 9d2d1a8..02237cb 100644 --- a/.agents/skills/nvtx-python/scripts/sitecustomize.py +++ b/.agents/skills/nvtx-python/scripts/sitecustomize.py @@ -2,51 +2,73 @@ Enable with NVTX_PROFILE_PYTHON=1 and put this directory on PYTHONPATH. This avoids modifying an environment's site-packages/sitecustomize.py. + +If a host (e.g. Omniverse Kit / Carb) replaces ``sys.setprofile`` after import +time, call :func:`install` again to re-arm the NVTX hook on the current thread. """ import os +import sys +import threading -if os.environ.get("NVTX_PROFILE_PYTHON") == "1": - import sys - import threading +_module_cache = {} +_pushed_frames = set() +_include = () +_exclude = () + + +def _module_enabled(module_name): + cached = _module_cache.get(module_name) + if cached is not None: + return cached + if any(module_name.startswith(prefix) for prefix in _exclude): + enabled = False + else: + enabled = not _include or any(module_name.startswith(prefix) for prefix in _include) + _module_cache[module_name] = enabled + return enabled + + +def install(): + """(Re)install the NVTX profile callback on the current thread. + + Returns the installed callback on success, or ``None`` if ``nvtx`` isn't + importable. Re-reads ``NVTX_PROFILE_INCLUDE`` / ``NVTX_PROFILE_EXCLUDE`` on + each call, so the scope can be tightened between calls. + """ + global _include, _exclude try: import nvtx + except Exception as exc: + print(f"[NVTX] install failed: {exc}", file=sys.stderr) + return None + + _include = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",") if part.strip()) + _exclude = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",") if part.strip()) + _module_cache.clear() + + def _profile_callback(frame, event, arg): + frame_id = id(frame) + if event == "call": + module_name = frame.f_globals.get("__name__", "") + if _module_enabled(module_name): + nvtx.push_range(f"{module_name}.{frame.f_code.co_name}") + _pushed_frames.add(frame_id) + elif event == "return" and frame_id in _pushed_frames: + nvtx.pop_range() + _pushed_frames.remove(frame_id) + return _profile_callback + + sys.setprofile(_profile_callback) + threading.setprofile(_profile_callback) + return _profile_callback - _include = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",") if part.strip()) - _exclude = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",") if part.strip()) - _module_cache = {} - _pushed_frames = set() - - def _module_enabled(module_name): - cached = _module_cache.get(module_name) - if cached is not None: - return cached - if any(module_name.startswith(prefix) for prefix in _exclude): - enabled = False - else: - enabled = not _include or any(module_name.startswith(prefix) for prefix in _include) - _module_cache[module_name] = enabled - return enabled - - def _profile_callback(frame, event, arg): - frame_id = id(frame) - if event == "call": - module_name = frame.f_globals.get("__name__", "") - if _module_enabled(module_name): - nvtx.push_range(f"{module_name}.{frame.f_code.co_name}") - _pushed_frames.add(frame_id) - elif event == "return" and frame_id in _pushed_frames: - nvtx.pop_range() - _pushed_frames.remove(frame_id) - return _profile_callback - - sys.setprofile(_profile_callback) - threading.setprofile(_profile_callback) + +if os.environ.get("NVTX_PROFILE_PYTHON") == "1": + if install() is not None: print( f"[NVTX] Python profiling enabled (include={_include or 'all'}, exclude={_exclude})", file=sys.stderr, ) - except Exception as exc: - print(f"[NVTX] Failed: {exc}", file=sys.stderr)