NVIDIA · fatimaanes · May 6, 2026 · May 3, 2026 · May 3, 2026 · May 4, 2026
diff --git a/.agents/skills/install-profilers/SKILL.md b/.agents/skills/install-profilers/SKILL.md
@@ -164,6 +164,18 @@ Using the bundled binary guarantees version compatibility with the Tracy protoco
 
 ### Option B: Build Tracy 0.11.1 from source (recommended fallback)
 
+Before building, check Kit's `all-deps.packman.xml` for the `carb_sdk_plugins`
+version so the capture protocol matches the profiled app:
+
+| `carb_sdk_plugins` version | Tracy version |
+|---|---|
+| `< 178` | `0.9.1` legacy protocol |
+| `>= 178` | `0.11.1+nv1` current protocol |
+
+The commands below build Tracy `v0.11.1`, which matches current Kit builds
+using `carb_sdk_plugins >= 178`. For older Kit builds, check out the matching
+legacy Tracy tag instead.
+
 ```bash
 sudo apt-get install -y build-essential cmake git libcapstone-dev
 

diff --git a/.agents/skills/nsys-analyze/SKILL.md b/.agents/skills/nsys-analyze/SKILL.md
@@ -155,14 +155,16 @@ Do not compare a single frame unless the issue is known to occur in one frame an
 csvexport profile.tracy > zones.csv
 ```
 
-**CSV columns:** `name,src_file,src_line,total_ns,total_perc,counts,mean_ns,min_ns,max_ns,std_ns`
+Inspect the header before scripting against `csvexport` output. Tracy versions and builds can differ:
+- Guide examples use `name`, `mean`, `count`, and `total_time`.
+- Other builds emit nanosecond-specific names such as `total_ns`, `counts`, and `mean_ns`.
+
+Normalize the column names in scripts instead of assuming one schema.
 
 Data is **pre-aggregated** — one row per unique zone, covering the entire trace (no phase separation).
 
 ```bash
-# Top zones, noise filtered
-tail -n+2 zones.csv | grep -v -E '^(Carbonite|carb::|Thread waiting|Executing task|Running fiber)' \
-  | sort -t',' -k4 -rn | head -30
+head -1 zones.csv
 ```
 
 > **Tracy CSV limitation:** No per-invocation timestamps — only aggregates. For phase-aware analysis, prefer the nsys SQLite path.
@@ -192,14 +194,24 @@ Compare with Python:
 ```python
 import csv
 
+def number(row, *names):
+    for name in names:
+        value = row.get(name)
+        if value not in (None, ""):
+            return float(value)
+    return 0.0
+
 def load_zones(path):
     zones = {}
     with open(path) as f:
         for row in csv.DictReader(f):
-            zones[row['name']] = {
-                'total_ms': int(row['total_ns']) / 1e6,
-                'count': int(row['counts']),
-                'mean_ms': int(row['mean_ns']) / 1e6,
+            name = row.get('name') or row.get('zone_name')
+            if not name:
+                continue
+            zones[name] = {
+                'total_ms': number(row, 'total_ns', 'total_time') / 1e6,
+                'count': int(number(row, 'counts', 'count')),
+                'mean_ms': number(row, 'mean_ns', 'mean') / 1e6,
             }
     return zones
 

diff --git a/.agents/skills/nvtx-python/SKILL.md b/.agents/skills/nvtx-python/SKILL.md
@@ -16,8 +16,9 @@ Do not write into an environment's existing `sitecustomize.py`. Load the bundled
 uv pip install nvtx
 
 # Resolve this skill's directory, then put its scripts/ directory on PYTHONPATH.
-# Replace the path if the skills directory is installed somewhere else.
-NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
+# From this repository, the default below points at the bundled helper.
+NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
+test -f "$NVTX_SKILL_DIR/scripts/sitecustomize.py"
 export PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}"
 ```
 
@@ -33,15 +34,15 @@ export PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}"
 
 ```bash
 # Capture all Python modules
-NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
+NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
 PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}" \
 NVTX_PROFILE_PYTHON=1 \
 nsys profile -t nvtx,cuda,osrt \
 uv run python scripts/reinforcement_learning/skrl/train.py \
   --task=Isaac-Velocity-Flat-Anymal-C-v0 --num_envs=1024 --max_iterations=10
 
 # Capture specific modules only (recommended — reduces overhead)
-NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
+NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
 PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}" \
 NVTX_PROFILE_PYTHON=1 NVTX_PROFILE_INCLUDE=isaaclab,skrl \
 nsys profile -t nvtx,cuda,osrt \

diff --git a/.agents/skills/nvtx-python/scripts/sitecustomize.py b/.agents/skills/nvtx-python/scripts/sitecustomize.py
@@ -14,8 +14,8 @@
     try:
         import nvtx
 
-        _include = tuple(filter(None, os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",")))
-        _exclude = tuple(filter(None, os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",")))
+        _include = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",") if part.strip())
+        _exclude = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",") if part.strip())
         _module_cache = {}
         _pushed_frames = set()
 

diff --git a/.agents/skills/perf-tuning/SKILL.md b/.agents/skills/perf-tuning/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: perf-tuning
-description: Resolve common Kit/Isaac Sim/Isaac Lab performance issues using specific settings and configuration changes. Covers PresentFrame stalls, resolveSamplerFeedback, headless mode, multi-GPU tradeoffs, DLSS/DLSS-G, PhysX tuning, RTX presets (isaaclab_performance/balanced/quality), viewport gizmos, HydraEngine waitIdle, fsWatcher overhead, and CPU governor. Use when profiling data shows a specific bottleneck and you need the fix, when someone asks "why is it slow" and you have Tracy/nsys evidence, or when tuning RTX settings for GPU-bound workloads. NOT for: initial triage (use diagnose-perf), capturing profiles (use profiling), or analyzing traces (use nsys-analyze).
+description: 'Resolve common Kit/Isaac Sim/Isaac Lab performance issues using specific settings and configuration changes. Covers PresentFrame stalls, resolveSamplerFeedback, headless mode, multi-GPU tradeoffs, DLSS/DLSS-G, PhysX tuning, RTX presets (isaaclab_performance/balanced/quality), viewport gizmos, HydraEngine waitIdle, fsWatcher overhead, and CPU governor. Use when profiling data shows a specific bottleneck and you need the fix, when someone asks "why is it slow" and you have Tracy/nsys evidence, or when tuning RTX settings for GPU-bound workloads. NOT for: initial triage (use diagnose-perf), capturing profiles (use profiling), or analyzing traces (use nsys-analyze).'
 ---
 
 # Performance Tuning for Kit / Isaac Sim / Isaac Lab

diff --git a/.agents/skills/profiling-api/SKILL.md b/.agents/skills/profiling-api/SKILL.md
@@ -101,6 +101,7 @@ profiler.instant(mask, type, name)              # instant event
 profiler.flow(mask, type, id, name)             # cross-thread flow
 profiler.frame(mask, name)                      # frame marker
 profiler.set_python_profiling_enabled(bool)     # toggle auto-profiling
+profiler.is_python_profiling_enabled() -> bool
 ```
 
 Types:
@@ -118,8 +119,11 @@ carb.profiler.FlowType.BEGIN / END  # flow start/end
 constexpr uint64_t kCaptureMaskNone    = 0;              // nothing
 constexpr uint64_t kCaptureMaskAll     = (uint64_t)-1;   // everything (default when no mask arg)
 constexpr uint64_t kCaptureMaskDefault = uint64_t(1);    // bit 0
+constexpr uint64_t kCaptureMaskProfiler = uint64_t(1) << 63; // profiler internals
 ```
 
+If a zone uses mask `0`, Carbonite treats it as `kCaptureMaskDefault` (`1`).
+
 **Workflow:** Start with `--/app/profilerMask=1` (major spans only, minimal overhead). If more detail needed, remove the arg (defaults to ALL). Always start coarse, then zoom in.
 
 ## Profiler Channels
@@ -164,6 +168,12 @@ CARB_PROFILE_VALUE(gpuFrameTimeMs, 1, "GPU Frame Time (ms)");
 
 int32_t triangleCount = 1500000;
 CARB_PROFILE_VALUE(triangleCount, 1, "Triangle Count");
+
+uint32_t gpuMemoryMB = 4096;
+CARB_PROFILE_VALUE(gpuMemoryMB, 1, "GPU Memory (MB)");
+
+int gpuIndex = 0;
+CARB_PROFILE_VALUE(gpuFrameTimeMs, 1, "GPU %d Frame Time", gpuIndex);
 ```
 
 ### Python

diff --git a/.agents/skills/profiling/SKILL.md b/.agents/skills/profiling/SKILL.md
@@ -54,6 +54,7 @@ profiler.set_capture_mask(0)  # stop targeted capture
 ```bash
 export TRACY_NO_SYS_TRACE=1
 export TRACY_NO_CALLSTACK=1
+export TRACY_PORT="${TRACY_PORT:-8086}"  # Isaac Sim 6.0+ commonly uses 8087 to avoid OV Hub
 
 # TRACY analysis phase only. Do not set during COLD/WARM benchmark measurement.
 export CARB_PROFILING_PYTHON=1
@@ -81,7 +82,14 @@ Tracy capture is error-prone. Follow this exact sequence to avoid port conflicts
 
 **Tracy port:** default is `8086`; Isaac Sim 6.0+ commonly uses `8087` to avoid OV Hub. Kit auto-increments to `8087`, `8088`, etc. on conflict. Set `TRACY_PORT` when you know the port.
 
-**Tracy capture binary:** use the bundled `omni.kit.profiler.tracy` capture binary when available, or build Tracy 0.11.1 from source (`capture/build/unix/capture-release`).
+**Tracy capture binary:** use the bundled `omni.kit.profiler.tracy` capture binary when available, or build the Tracy version that matches Kit's `carb_sdk_plugins` from source.
+
+| `carb_sdk_plugins` version | Tracy version |
+|---|---|
+| `< 178` | `0.9.1` legacy protocol |
+| `>= 178` | `0.11.1+nv1` current protocol |
+
+Check Kit's `all-deps.packman.xml` before building a fallback capture binary. For current Kit builds, use Tracy `v0.11.1` and the headless binary at `capture/build/unix/capture-release`.
 
 #### Step-by-step:
 ```bash

diff --git a/.agents/skills/tracy-memory/SKILL.md b/.agents/skills/tracy-memory/SKILL.md
@@ -17,6 +17,7 @@ The `omni.cpumemorytracking` extension uses LD_PRELOAD to intercept malloc/free.
 export LD_PRELOAD=~/.cache/packman/chk/allocmemwrapper/<version>/liballocwrapper.so
 export TRACY_USE_LIB_UNWIND_FOR_BT=1   # libunwind-based backtrace
 export TRACY_NO_SYS_TRACE=1            # reduce overhead
+export TRACY_PORT="${TRACY_PORT:-8086}" # use 8087 for Isaac Sim 6.0+ when needed
 ```
 
 ## Step 2: Kit Flags

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Tests
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  unittest:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Run unit tests
+        run: uv run python -m unittest discover -s tests
diff --git a/README.md b/README.md
@@ -83,13 +83,17 @@ To enable GitHub Pages for a fresh clone:
 This repo ships a set of [Claude Code agent skills](.agents/skills/) for working with Omniverse, Isaac Sim, Isaac Lab, and their profiling tools. They encode install steps, benchmark recipes, and profile-analysis workflows validated from live testing.
 
 - [install-isaacsim](.agents/skills/install-isaacsim/SKILL.md) — install Isaac Sim via pip or source build
-- [install-isaaclab](.agents/skills/install-isaaclab/SKILL.md) — install Isaac Lab and link it to Isaac Sim
+- [install-isaaclab](.agents/skills/install-isaaclab/SKILL.md) — install Isaac Lab for Isaac Sim-backed or kit-less/Newton workflows
 - [install-profilers](.agents/skills/install-profilers/SKILL.md) — set up Nsight Systems, Tracy, and related tooling
 - [benchmark-isaacsim](.agents/skills/benchmark-isaacsim/SKILL.md) — run Isaac Sim benchmarks
 - [benchmark-isaaclab](.agents/skills/benchmark-isaaclab/SKILL.md) — run Isaac Lab RL and environment benchmarks
 - [profiling](.agents/skills/profiling/SKILL.md) — capture traces with Tracy and Nsight Systems
+- [profiling-api](.agents/skills/profiling-api/SKILL.md) — add profiling zones, metrics, and annotations to Kit code
 - [nsys-analyze](.agents/skills/nsys-analyze/SKILL.md) — analyze Kit-based `.nsys-rep` profiles and compare versions
+- [tracy-memory](.agents/skills/tracy-memory/SKILL.md) — profile CPU and GPU memory allocations in Tracy
+- [nvtx-python](.agents/skills/nvtx-python/SKILL.md) — trace Python functions with NVTX outside Kit/Carbonite
 - [diagnose-perf](.agents/skills/diagnose-perf/SKILL.md) — first-responder triage for slow FPS, stutter, or latency
+- [perf-tuning](.agents/skills/perf-tuning/SKILL.md) — apply guide-backed fixes for known performance bottlenecks
 
 ## Security
 

diff --git a/tests/test_skill_docs.py b/tests/test_skill_docs.py
@@ -0,0 +1,121 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+import re
+import unittest
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SKILLS_ROOT = REPO_ROOT / ".agents" / "skills"
+
+
+class SkillDocsTests(unittest.TestCase):
+    def skill_files(self):
+        return sorted(SKILLS_ROOT.glob("*/SKILL.md"))
+
+    def test_skill_frontmatter_names_match_directories(self):
+        for path in self.skill_files():
+            with self.subTest(path=path.relative_to(REPO_ROOT)):
+                text = path.read_text()
+                self.assertTrue(text.startswith("---\n"), "missing YAML frontmatter")
+                end = text.find("\n---\n", 4)
+                self.assertNotEqual(end, -1, "unterminated YAML frontmatter")
+                frontmatter = text[4:end]
+                fields = dict(re.findall(r"^(name|description):\s*(.*)$", frontmatter, re.MULTILINE))
+                self.assertEqual(path.parent.name, fields.get("name"))
+                self.assertTrue(fields.get("description"))
+
+    def test_skill_frontmatter_plain_scalars_do_not_contain_mapping_delimiters(self):
+        for path in self.skill_files():
+            with self.subTest(path=path.relative_to(REPO_ROOT)):
+                text = path.read_text()
+                end = text.find("\n---\n", 4)
+                frontmatter = text[4:end]
+                fields = dict(re.findall(r"^(name|description):\s*(.*)$", frontmatter, re.MULTILINE))
+                for key, value in fields.items():
+                    stripped = value.strip()
+                    if stripped.startswith(("'", '"', "|", ">")):
+                        continue
+                    self.assertNotRegex(
+                        stripped,
+                        r":(?:\s|$)",
+                        f"{key} contains an unquoted YAML mapping delimiter",
+                    )
+
+    def test_skill_indexes_cover_all_skills(self):
+        skills = [path.parent.name for path in self.skill_files()]
+        root_readme = (REPO_ROOT / "README.md").read_text()
+        skills_readme = (SKILLS_ROOT / "README.md").read_text()
+
+        for skill in skills:
+            with self.subTest(skill=skill):
+                self.assertIn(f".agents/skills/{skill}/SKILL.md", root_readme)
+                self.assertIn(f"[{skill}]({skill}/)", skills_readme)
+
+    def test_markdown_links_resolve(self):
+        files = [
+            REPO_ROOT / "README.md",
+            SKILLS_ROOT / "README.md",
+            *self.skill_files(),
+        ]
+        for path in files:
+            text = path.read_text()
+            for _, target in re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text):
+                if "://" in target or target.startswith("#"):
+                    continue
+                target = target.split("#", 1)[0]
+                if not target:
+                    continue
+                with self.subTest(path=path.relative_to(REPO_ROOT), target=target):
+                    self.assertTrue((path.parent / target).exists())
+
+    def test_no_user_absolute_paths_in_skill_docs(self):
+        stale_path = re.compile(r"/Users/|/home/abaillet")
+        for path in [REPO_ROOT / "README.md", *self.skill_files()]:
+            with self.subTest(path=path.relative_to(REPO_ROOT)):
+                self.assertIsNone(stale_path.search(path.read_text()))
+
+    def test_profiling_guide_alignment_markers(self):
+        expected_markers = {
+            ".agents/skills/profiling/SKILL.md": [
+                "TRACY_PORT",
+                "carb_sdk_plugins",
+                "0.11.1+nv1",
+            ],
+            ".agents/skills/install-profilers/SKILL.md": [
+                "carb_sdk_plugins",
+                "all-deps.packman.xml",
+                "0.11.1+nv1",
+            ],
+            ".agents/skills/nsys-analyze/SKILL.md": [
+                "total_time",
+                "total_ns",
+                "Normalize the column names",
+            ],
+            ".agents/skills/profiling-api/SKILL.md": [
+                "kCaptureMaskProfiler",
+                "is_python_profiling_enabled",
+                "mask `0`",
+            ],
+            ".agents/skills/tracy-memory/SKILL.md": [
+                "TRACY_PORT",
+                "LD_PRELOAD",
+                "TRACY_USE_LIB_UNWIND_FOR_BT",
+            ],
+            ".agents/skills/nvtx-python/SKILL.md": [
+                'NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"',
+                "sitecustomize.py",
+                "PYTHONPATH",
+            ],
+        }
+
+        for rel_path, markers in expected_markers.items():
+            text = (REPO_ROOT / rel_path).read_text()
+            for marker in markers:
+                with self.subTest(path=rel_path, marker=marker):
+                    self.assertIn(marker, text)
+
+
+if __name__ == "__main__":
+    unittest.main()