Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .agents/skills/install-profilers/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,18 @@ Using the bundled binary guarantees version compatibility with the Tracy protoco

### Option B: Build Tracy 0.11.1 from source (recommended fallback)

Before building, check Kit's `all-deps.packman.xml` for the `carb_sdk_plugins`
version so the capture protocol matches the profiled app:

| `carb_sdk_plugins` version | Tracy version |
|---|---|
| `< 178` | `0.9.1` legacy protocol |
| `>= 178` | `0.11.1+nv1` current protocol |

The commands below build Tracy `v0.11.1`, which matches current Kit builds
using `carb_sdk_plugins >= 178`. For older Kit builds, check out the matching
legacy Tracy tag instead.

```bash
sudo apt-get install -y build-essential cmake git libcapstone-dev

Expand Down
28 changes: 20 additions & 8 deletions .agents/skills/nsys-analyze/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,16 @@ Do not compare a single frame unless the issue is known to occur in one frame an
csvexport profile.tracy > zones.csv
```

**CSV columns:** `name,src_file,src_line,total_ns,total_perc,counts,mean_ns,min_ns,max_ns,std_ns`
Inspect the header before scripting against `csvexport` output. Tracy versions and builds can differ:
- Guide examples use `name`, `mean`, `count`, and `total_time`.
- Other builds emit nanosecond-specific names such as `total_ns`, `counts`, and `mean_ns`.

Normalize the column names in scripts instead of assuming one schema.

Data is **pre-aggregated** — one row per unique zone, covering the entire trace (no phase separation).

```bash
# Top zones, noise filtered
tail -n+2 zones.csv | grep -v -E '^(Carbonite|carb::|Thread waiting|Executing task|Running fiber)' \
| sort -t',' -k4 -rn | head -30
head -1 zones.csv
```

> **Tracy CSV limitation:** No per-invocation timestamps — only aggregates. For phase-aware analysis, prefer the nsys SQLite path.
Expand Down Expand Up @@ -192,14 +194,24 @@ Compare with Python:
```python
import csv

def number(row, *names):
for name in names:
value = row.get(name)
if value not in (None, ""):
return float(value)
return 0.0

def load_zones(path):
zones = {}
with open(path) as f:
for row in csv.DictReader(f):
zones[row['name']] = {
'total_ms': int(row['total_ns']) / 1e6,
'count': int(row['counts']),
'mean_ms': int(row['mean_ns']) / 1e6,
name = row.get('name') or row.get('zone_name')
if not name:
continue
zones[name] = {
'total_ms': number(row, 'total_ns', 'total_time') / 1e6,
'count': int(number(row, 'counts', 'count')),
'mean_ms': number(row, 'mean_ns', 'mean') / 1e6,
}
return zones

Expand Down
9 changes: 5 additions & 4 deletions .agents/skills/nvtx-python/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ Do not write into an environment's existing `sitecustomize.py`. Load the bundled
uv pip install nvtx

# Resolve this skill's directory, then put its scripts/ directory on PYTHONPATH.
# Replace the path if the skills directory is installed somewhere else.
NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
# From this repository, the default below points at the bundled helper.
NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
test -f "$NVTX_SKILL_DIR/scripts/sitecustomize.py"
export PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}"
```

Expand All @@ -33,15 +34,15 @@ export PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}"

```bash
# Capture all Python modules
NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}" \
NVTX_PROFILE_PYTHON=1 \
nsys profile -t nvtx,cuda,osrt \
uv run python scripts/reinforcement_learning/skrl/train.py \
--task=Isaac-Velocity-Flat-Anymal-C-v0 --num_envs=1024 --max_iterations=10

# Capture specific modules only (recommended — reduces overhead)
NVTX_SKILL_DIR=/Users/abaillet/src/omniperf/.agents/skills/nvtx-python
NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"
PYTHONPATH="$NVTX_SKILL_DIR/scripts:${PYTHONPATH:-}" \
NVTX_PROFILE_PYTHON=1 NVTX_PROFILE_INCLUDE=isaaclab,skrl \
nsys profile -t nvtx,cuda,osrt \
Expand Down
4 changes: 2 additions & 2 deletions .agents/skills/nvtx-python/scripts/sitecustomize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
try:
import nvtx

_include = tuple(filter(None, os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",")))
_exclude = tuple(filter(None, os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",")))
_include = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_INCLUDE", "").split(",") if part.strip())
_exclude = tuple(part.strip() for part in os.environ.get("NVTX_PROFILE_EXCLUDE", "importlib").split(",") if part.strip())
_module_cache = {}
_pushed_frames = set()

Expand Down
2 changes: 1 addition & 1 deletion .agents/skills/perf-tuning/SKILL.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
name: perf-tuning
description: Resolve common Kit/Isaac Sim/Isaac Lab performance issues using specific settings and configuration changes. Covers PresentFrame stalls, resolveSamplerFeedback, headless mode, multi-GPU tradeoffs, DLSS/DLSS-G, PhysX tuning, RTX presets (isaaclab_performance/balanced/quality), viewport gizmos, HydraEngine waitIdle, fsWatcher overhead, and CPU governor. Use when profiling data shows a specific bottleneck and you need the fix, when someone asks "why is it slow" and you have Tracy/nsys evidence, or when tuning RTX settings for GPU-bound workloads. NOT for: initial triage (use diagnose-perf), capturing profiles (use profiling), or analyzing traces (use nsys-analyze).
description: 'Resolve common Kit/Isaac Sim/Isaac Lab performance issues using specific settings and configuration changes. Covers PresentFrame stalls, resolveSamplerFeedback, headless mode, multi-GPU tradeoffs, DLSS/DLSS-G, PhysX tuning, RTX presets (isaaclab_performance/balanced/quality), viewport gizmos, HydraEngine waitIdle, fsWatcher overhead, and CPU governor. Use when profiling data shows a specific bottleneck and you need the fix, when someone asks "why is it slow" and you have Tracy/nsys evidence, or when tuning RTX settings for GPU-bound workloads. NOT for: initial triage (use diagnose-perf), capturing profiles (use profiling), or analyzing traces (use nsys-analyze).'
---

# Performance Tuning for Kit / Isaac Sim / Isaac Lab
Expand Down
10 changes: 10 additions & 0 deletions .agents/skills/profiling-api/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ profiler.instant(mask, type, name) # instant event
profiler.flow(mask, type, id, name) # cross-thread flow
profiler.frame(mask, name) # frame marker
profiler.set_python_profiling_enabled(bool) # toggle auto-profiling
profiler.is_python_profiling_enabled() -> bool
```

Types:
Expand All @@ -118,8 +119,11 @@ carb.profiler.FlowType.BEGIN / END # flow start/end
constexpr uint64_t kCaptureMaskNone = 0; // nothing
constexpr uint64_t kCaptureMaskAll = (uint64_t)-1; // everything (default when no mask arg)
constexpr uint64_t kCaptureMaskDefault = uint64_t(1); // bit 0
constexpr uint64_t kCaptureMaskProfiler = uint64_t(1) << 63; // profiler internals
```

If a zone uses mask `0`, Carbonite treats it as `kCaptureMaskDefault` (`1`).

**Workflow:** Start with `--/app/profilerMask=1` (major spans only, minimal overhead). If more detail needed, remove the arg (defaults to ALL). Always start coarse, then zoom in.

## Profiler Channels
Expand Down Expand Up @@ -164,6 +168,12 @@ CARB_PROFILE_VALUE(gpuFrameTimeMs, 1, "GPU Frame Time (ms)");

int32_t triangleCount = 1500000;
CARB_PROFILE_VALUE(triangleCount, 1, "Triangle Count");

uint32_t gpuMemoryMB = 4096;
CARB_PROFILE_VALUE(gpuMemoryMB, 1, "GPU Memory (MB)");

int gpuIndex = 0;
CARB_PROFILE_VALUE(gpuFrameTimeMs, 1, "GPU %d Frame Time", gpuIndex);
```

### Python
Expand Down
10 changes: 9 additions & 1 deletion .agents/skills/profiling/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ profiler.set_capture_mask(0) # stop targeted capture
```bash
export TRACY_NO_SYS_TRACE=1
export TRACY_NO_CALLSTACK=1
export TRACY_PORT="${TRACY_PORT:-8086}" # Isaac Sim 6.0+ commonly uses 8087 to avoid OV Hub

# TRACY analysis phase only. Do not set during COLD/WARM benchmark measurement.
export CARB_PROFILING_PYTHON=1
Expand Down Expand Up @@ -81,7 +82,14 @@ Tracy capture is error-prone. Follow this exact sequence to avoid port conflicts

**Tracy port:** default is `8086`; Isaac Sim 6.0+ commonly uses `8087` to avoid OV Hub. Kit auto-increments to `8087`, `8088`, etc. on conflict. Set `TRACY_PORT` when you know the port.

**Tracy capture binary:** use the bundled `omni.kit.profiler.tracy` capture binary when available, or build Tracy 0.11.1 from source (`capture/build/unix/capture-release`).
**Tracy capture binary:** use the bundled `omni.kit.profiler.tracy` capture binary when available, or build the Tracy version that matches Kit's `carb_sdk_plugins` from source.

| `carb_sdk_plugins` version | Tracy version |
|---|---|
| `< 178` | `0.9.1` legacy protocol |
| `>= 178` | `0.11.1+nv1` current protocol |

Check Kit's `all-deps.packman.xml` before building a fallback capture binary. For current Kit builds, use Tracy `v0.11.1` and the headless binary at `capture/build/unix/capture-release`.

#### Step-by-step:
```bash
Expand Down
1 change: 1 addition & 0 deletions .agents/skills/tracy-memory/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The `omni.cpumemorytracking` extension uses LD_PRELOAD to intercept malloc/free.
export LD_PRELOAD=~/.cache/packman/chk/allocmemwrapper/<version>/liballocwrapper.so
export TRACY_USE_LIB_UNWIND_FOR_BT=1 # libunwind-based backtrace
export TRACY_NO_SYS_TRACE=1 # reduce overhead
export TRACY_PORT="${TRACY_PORT:-8086}" # use 8087 for Isaac Sim 6.0+ when needed
```

## Step 2: Kit Flags
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA Corporation
# SPDX-License-Identifier: Apache-2.0

name: Tests

on:
pull_request:
push:
branches: [main]
workflow_dispatch:

permissions:
contents: read

jobs:
unittest:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Run unit tests
run: uv run python -m unittest discover -s tests
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,17 @@ To enable GitHub Pages for a fresh clone:
This repo ships a set of [Claude Code agent skills](.agents/skills/) for working with Omniverse, Isaac Sim, Isaac Lab, and their profiling tools. They encode install steps, benchmark recipes, and profile-analysis workflows validated from live testing.

- [install-isaacsim](.agents/skills/install-isaacsim/SKILL.md) — install Isaac Sim via pip or source build
- [install-isaaclab](.agents/skills/install-isaaclab/SKILL.md) — install Isaac Lab and link it to Isaac Sim
- [install-isaaclab](.agents/skills/install-isaaclab/SKILL.md) — install Isaac Lab for Isaac Sim-backed or kit-less/Newton workflows
- [install-profilers](.agents/skills/install-profilers/SKILL.md) — set up Nsight Systems, Tracy, and related tooling
- [benchmark-isaacsim](.agents/skills/benchmark-isaacsim/SKILL.md) — run Isaac Sim benchmarks
- [benchmark-isaaclab](.agents/skills/benchmark-isaaclab/SKILL.md) — run Isaac Lab RL and environment benchmarks
- [profiling](.agents/skills/profiling/SKILL.md) — capture traces with Tracy and Nsight Systems
- [profiling-api](.agents/skills/profiling-api/SKILL.md) — add profiling zones, metrics, and annotations to Kit code
- [nsys-analyze](.agents/skills/nsys-analyze/SKILL.md) — analyze Kit-based `.nsys-rep` profiles and compare versions
- [tracy-memory](.agents/skills/tracy-memory/SKILL.md) — profile CPU and GPU memory allocations in Tracy
- [nvtx-python](.agents/skills/nvtx-python/SKILL.md) — trace Python functions with NVTX outside Kit/Carbonite
- [diagnose-perf](.agents/skills/diagnose-perf/SKILL.md) — first-responder triage for slow FPS, stutter, or latency
- [perf-tuning](.agents/skills/perf-tuning/SKILL.md) — apply guide-backed fixes for known performance bottlenecks

## Security

Expand Down
121 changes: 121 additions & 0 deletions tests/test_skill_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA Corporation
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path
import re
import unittest


REPO_ROOT = Path(__file__).resolve().parents[1]
SKILLS_ROOT = REPO_ROOT / ".agents" / "skills"


class SkillDocsTests(unittest.TestCase):
def skill_files(self):
return sorted(SKILLS_ROOT.glob("*/SKILL.md"))

def test_skill_frontmatter_names_match_directories(self):
for path in self.skill_files():
with self.subTest(path=path.relative_to(REPO_ROOT)):
text = path.read_text()
self.assertTrue(text.startswith("---\n"), "missing YAML frontmatter")
end = text.find("\n---\n", 4)
self.assertNotEqual(end, -1, "unterminated YAML frontmatter")
frontmatter = text[4:end]
fields = dict(re.findall(r"^(name|description):\s*(.*)$", frontmatter, re.MULTILINE))
self.assertEqual(path.parent.name, fields.get("name"))
self.assertTrue(fields.get("description"))

def test_skill_frontmatter_plain_scalars_do_not_contain_mapping_delimiters(self):
for path in self.skill_files():
with self.subTest(path=path.relative_to(REPO_ROOT)):
text = path.read_text()
end = text.find("\n---\n", 4)
frontmatter = text[4:end]
fields = dict(re.findall(r"^(name|description):\s*(.*)$", frontmatter, re.MULTILINE))
for key, value in fields.items():
stripped = value.strip()
if stripped.startswith(("'", '"', "|", ">")):
continue
self.assertNotRegex(
stripped,
r":(?:\s|$)",
f"{key} contains an unquoted YAML mapping delimiter",
)

def test_skill_indexes_cover_all_skills(self):
skills = [path.parent.name for path in self.skill_files()]
root_readme = (REPO_ROOT / "README.md").read_text()
skills_readme = (SKILLS_ROOT / "README.md").read_text()

for skill in skills:
with self.subTest(skill=skill):
self.assertIn(f".agents/skills/{skill}/SKILL.md", root_readme)
self.assertIn(f"[{skill}]({skill}/)", skills_readme)

def test_markdown_links_resolve(self):
files = [
REPO_ROOT / "README.md",
SKILLS_ROOT / "README.md",
*self.skill_files(),
]
for path in files:
text = path.read_text()
for _, target in re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text):
if "://" in target or target.startswith("#"):
continue
target = target.split("#", 1)[0]
if not target:
continue
with self.subTest(path=path.relative_to(REPO_ROOT), target=target):
self.assertTrue((path.parent / target).exists())

def test_no_user_absolute_paths_in_skill_docs(self):
stale_path = re.compile(r"/Users/|/home/abaillet")
for path in [REPO_ROOT / "README.md", *self.skill_files()]:
with self.subTest(path=path.relative_to(REPO_ROOT)):
self.assertIsNone(stale_path.search(path.read_text()))

def test_profiling_guide_alignment_markers(self):
expected_markers = {
".agents/skills/profiling/SKILL.md": [
"TRACY_PORT",
"carb_sdk_plugins",
"0.11.1+nv1",
],
".agents/skills/install-profilers/SKILL.md": [
"carb_sdk_plugins",
"all-deps.packman.xml",
"0.11.1+nv1",
],
".agents/skills/nsys-analyze/SKILL.md": [
"total_time",
"total_ns",
"Normalize the column names",
],
".agents/skills/profiling-api/SKILL.md": [
"kCaptureMaskProfiler",
"is_python_profiling_enabled",
"mask `0`",
],
".agents/skills/tracy-memory/SKILL.md": [
"TRACY_PORT",
"LD_PRELOAD",
"TRACY_USE_LIB_UNWIND_FOR_BT",
],
".agents/skills/nvtx-python/SKILL.md": [
'NVTX_SKILL_DIR="${NVTX_SKILL_DIR:-$PWD/.agents/skills/nvtx-python}"',
"sitecustomize.py",
"PYTHONPATH",
],
}

for rel_path, markers in expected_markers.items():
text = (REPO_ROOT / rel_path).read_text()
for marker in markers:
with self.subTest(path=rel_path, marker=marker):
self.assertIn(marker, text)


if __name__ == "__main__":
unittest.main()
Loading