diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index a4f54f3..3bf4424 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -14,7 +14,7 @@ "name": "ralph-reviewed", "source": "./plugins/ralph-reviewed", "description": "Iterative Ralph loops with Codex CLI review gates at completion", - "version": "1.8.8", + "version": "2.0.0", "author": { "name": "Allen", "email": "bigboss@metalrodeo.xyz" diff --git a/.claude/skills/axe-ios-simulator/SKILL.md b/.claude/skills/axe-ios-simulator/SKILL.md index 83fcc10..d31dbc2 100644 --- a/.claude/skills/axe-ios-simulator/SKILL.md +++ b/.claude/skills/axe-ios-simulator/SKILL.md @@ -1,6 +1,6 @@ --- name: axe-ios-simulator -description: iOS Simulator automation using AXe CLI for touch gestures, text input, hardware buttons, screenshots, video recording, and accessibility inspection. Use when automating iOS Simulator interactions, writing UI tests, capturing screenshots/video, or inspecting accessibility elements. Triggers on iOS Simulator automation, AXe CLI usage, simulator tap/swipe/gesture commands, or accessibility testing tasks. +description: Use when automating iOS Simulator interactions, capturing screenshots/video, or inspecting accessibility via AXe CLI. --- # AXe iOS Simulator Automation diff --git a/.claude/skills/canton-network-repos/SKILL.md b/.claude/skills/canton-network-repos/SKILL.md index 2d7e506..9429744 100644 --- a/.claude/skills/canton-network-repos/SKILL.md +++ b/.claude/skills/canton-network-repos/SKILL.md @@ -1,306 +1,95 @@ --- name: canton-network-repos -description: Canton Network, DAML, and Splice repository knowledge. Use when working with Canton participants, DAML smart contracts, Splice applications, LF version compatibility, or package ID mismatches. Triggers on Canton, DAML, Splice, decentralized-canton-sync, or LF version queries. +description: Use when working with Canton Network participants, DAML smart contracts, Splice applications, or debugging LF version and package ID issues. --- -# Canton Network Open-Source Repositories - -This skill provides comprehensive knowledge about the Canton Network open-source ecosystem, repository relationships, and build processes. - -## Activation - -Use this skill when: -- Working with Canton Network, DAML, or Splice repositories -- Investigating version compatibility issues -- Understanding enterprise vs community differences -- Debugging LF version or package ID mismatches -- Building Canton participants or Splice applications +# Canton Network Repositories ## Repository Hierarchy ``` -┌─────────────────────────────────────────────────────────────────┐ -│ Splice Version (e.g., 0.5.4) │ -│ github.com/digital-asset/decentralized-canton-sync │ -│ Applications: Validator, SV, Wallet, Scan, Amulet (CC) │ -└─────────────────────────┬───────────────────────────────────────┘ - │ depends on - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ Canton Version (e.g., 3.4.9) │ -│ github.com/digital-asset/canton │ -│ Runtime: Participant, Sequencer, Mediator, Admin API │ -└─────────────────────────┬───────────────────────────────────────┘ - │ depends on - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ DAML SDK (e.g., 3.4.9) │ -│ github.com/digital-asset/daml │ -│ Compiler: damlc, LF Engine, Ledger API, stdlib, protobuf │ -└─────────────────────────────────────────────────────────────────┘ +Splice (e.g., 0.5.4) github.com/digital-asset/decentralized-canton-sync + └─ depends on +Canton (e.g., 3.4.9) github.com/digital-asset/canton + └─ depends on +DAML SDK (e.g., 3.4.9) github.com/digital-asset/daml ``` -## Repository Details - -### 1. DAML SDK (`github.com/digital-asset/daml`) - -**Purpose**: Smart contract language, compiler, and runtime libraries. - -**Key Directories**: -``` -daml/ -├── sdk/ -│ ├── compiler/damlc/ # Haskell compiler source -│ │ └── lib/DA/Cli/Options.hs # --target version validation -│ ├── daml-lf/ -│ │ ├── language/ # LF version definitions (Scala) -│ │ ├── engine/ # LF execution engine -│ │ └── archive/ # DALF protobuf format -│ └── canton/ # Canton runtime (submodule) -├── ledger-api/ # gRPC API definitions -└── VERSION # SDK version string -``` +## Version Mapping -**LF Version Definitions** (`LanguageVersion.scala` at v3.4.9): -```scala -// V2 versions defined -val List(v2_1, v2_2, v2_dev) = AllV2 // Line 51 - v2_2 IS defined +| Splice | Canton | DAML SDK | Protocol | LF Default | LF Available | +|--------|--------|----------|----------|------------|--------------| +| 0.5.4 | 3.4.9 | 3.4.9 | PV34 | 2.1* | 2.2 (verified) | +| 0.5.3 | 3.4.8 | 3.4.8 | PV34 | 2.1* | 2.2 | +| 0.4.x | 3.3.x | 3.3.x | PV33 | 2.1 | 2.1 | -// Version ranges -case Major.V2 => VersionRange(v2_1, v2_2) // Line 171 - StableVersions includes v2_2 -def AllVersions = VersionRange(v2_1, v2_dev) +*Open-source Splice 0.5.4 ships with SDK snapshot `3.3.0-snapshot.20250502` (pre-dates LF 2.2). LF 2.2 was added to the SDK on 2025-10-03. Updating to SDK 3.4.9 enables LF 2.2 builds. -// Features at v2_2: -val flatArchive = v2_2 -val kindInterning = flatArchive -val exprInterning = flatArchive -val explicitPkgImports = v2_2 -val unsafeFromInterfaceRemoved = v2_2 -``` +## Key Configuration Files -**Note**: v2_2 IS in SDK v3.4.9 source. Older snapshots may not include it. +| Purpose | Repo | File | +|---------|------|------| +| LF version definitions | daml | `sdk/daml-lf/language/.../LanguageVersion.scala` | +| damlc target validation | daml | `sdk/compiler/damlc/lib/DA/Cli/Options.hs` | +| Canton version | canton | `VERSION` | +| Built-in DARs | canton | `community/common/src/main/daml/` | +| Splice LF config | splice | `project/CantonDependencies.scala` | +| Package targets | splice | `daml/*/daml.yaml` | +| Docker builds | splice | `cluster/images/*/Dockerfile` | -**damlc Target Validation** (`Options.hs`): -```haskell -lfVersionOpt :: Parser LF.Version --- Validates against LF.supportedOutputVersions --- Error: "Unknown Daml-LF version: X" if not in list +**Splice LF config** (`project/CantonDependencies.scala`): +```scala +val daml_language_versions = Seq("2.1") // ← LF target; change to "2.2" for upgrade +val daml_compiler_version = sys.env("DAML_COMPILER_VERSION") ``` -### 2. Canton (`github.com/digital-asset/canton`) +## Package ID Derivation -**Purpose**: Distributed ledger runtime implementing the Canton Protocol. +Package IDs are cryptographic hashes of: source content + LF version (`--target`) + SDK/stdlib version + dependency package IDs. -**Key Directories**: -``` -canton/ -├── community/ # Open-source Canton -│ ├── app/ # CantonCommunityApp entry point -│ ├── participant/ # Participant node implementation -│ ├── domain/ # Embedded domain (sequencer/mediator) -│ └── common/src/main/daml/ # Built-in DAML packages -│ └── AdminWorkflows/ # Ping, party replication DARs -├── daml/ # DAML SDK submodule -├── daml_dependencies.json # LF library versions -├── VERSION # Canton version -└── version.sbt # SBT version config -``` +**Changing LF version = different package IDs = incompatible packages.** Canton validates that upgraded packages use equal or newer LF version; mixing LF versions on the same ledger causes validation failures. -**Built-in DARs** (embedded in JAR): -- `canton-builtin-admin-workflow-ping.dar` -- `canton-builtin-admin-workflow-party-replication-alpha.dar` -- `CantonExamples.dar` +## Enterprise vs Community Canton -**Enterprise vs Community**: | Feature | Enterprise | Community | |---------|------------|-----------| -| Main class | CantonEnterpriseApp | CantonCommunityApp | | Transaction processing | Parallel | Sequential | -| Pruning | Available | Limited | | Database | PostgreSQL, Oracle | PostgreSQL only | | HA Domain | Supported | Embedded only | +| Pruning | Full | Limited | -### 3. Splice (`github.com/digital-asset/decentralized-canton-sync`) - -**Purpose**: Decentralized synchronizer governance, Amulet (Canton Coin), and network applications. - -**Key Directories**: -``` -decentralized-canton-sync/ -├── project/ -│ ├── CantonDependencies.scala # Version config, LF versions -│ └── DamlPlugin.scala # DAR build logic -├── daml/ -│ ├── splice-amulet/ # Canton Coin token contracts -│ ├── splice-wallet/ # Wallet contracts -│ ├── splice-dso-governance/ # DSO governance -│ └── */daml.yaml # Package configs with --target -├── apps/ -│ ├── sv/ # Super Validator app -│ ├── validator/ # Validator app -│ ├── wallet/ # Wallet backend -│ └── scan/ # Payment scan service -├── cluster/images/ # Docker image builds -│ └── canton-community/ # Community participant image -└── daml-compiler-sources.json # Compiler version reference -``` - -**Critical Configuration** (`CantonDependencies.scala`): -```scala -object CantonDependencies { - val version: String = "3.4.9" - val daml_language_versions = Seq("2.1") // ← LF target version - val daml_libraries_version = version - val daml_compiler_version = sys.env("DAML_COMPILER_VERSION") -} -``` - -**Package Target** (`daml/splice-amulet/daml.yaml`): -```yaml -sdk-version: 3.3.0-snapshot.20250502.13767.0.v2fc6c7e2 -build-options: - - --target=2.1 # Explicit LF 2.1 target -``` - -## Version Mapping - -| Splice | Canton | DAML SDK | Protocol | LF (Default) | LF (With SDK 3.4.9) | -|--------|--------|----------|----------|--------------|---------------------| -| 0.5.4 | 3.4.9 | 3.4.9 | PV34 | 2.1* | 2.2 (verified) | -| 0.5.3 | 3.4.8 | 3.4.8 | PV34 | 2.1* | 2.2 | -| 0.4.x | 3.3.x | 3.3.x | PV33 | 2.1 | 2.1 | - -*Open-source Splice 0.5.4 ships with SDK snapshot `3.3.0-snapshot.20250502` which predates LF 2.2. - -**Root Cause (Verified)**: The public Splice release uses an SDK snapshot from **May 2, 2025**, but LF 2.2 was added to the SDK on **October 3, 2025**. Updating to SDK 3.4.9 enables LF 2.2 builds. +## Build Commands -**Key insight**: LF 2.2 is fully available in open-source SDK v3.4.9. The Splice project simply needs to be updated to use the newer SDK. - -## LF Version Implications - -### Package ID Derivation -Package IDs are cryptographic hashes derived from: -1. Package source content -2. **LF version used** (`--target`) -3. SDK/stdlib versions -4. Dependency package IDs - -**Changing LF version = Different package IDs = Incompatible packages** - -### Upgrade Validation -Canton validates package upgrades: -- Upgraded packages must use equal or newer LF version -- LF 2.1 package cannot "upgrade" to LF 2.2 package (different IDs) -- Mixing LF versions on same ledger causes validation failures - -## Building from Open-Source - -### Community Canton Participant ```bash -cd canton -sbt "community/app/assembly" +# Community Canton participant +cd canton && sbt "community/app/assembly" # Output: community/app/target/scala-2.13/canton-community.jar -``` -### Splice Applications -```bash -cd decentralized-canton-sync -sbt compile # Requires DAML_COMPILER_VERSION env var +# Splice applications (requires DAML_COMPILER_VERSION env var) +cd decentralized-canton-sync && sbt compile ``` -### Building with LF 2.2 (Verified Working) - -LF 2.2 is available in SDK v3.4.9. The following steps have been **verified to work**: - -1. Edit `project/CantonDependencies.scala`: - ```scala - val daml_language_versions = Seq("2.2") - ``` - -2. Update `nix/daml-compiler-sources.json`: - ```json - { "version": "3.4.9" } - ``` - -3. Update all `daml/*/daml.yaml` files: - ```yaml - sdk-version: 3.4.9 - build-options: - - --target=2.2 - ``` - -4. Remove invalid warning flags (not present in SDK 3.4.9): - ```bash - # Remove -Wno-ledger-time-is-alpha from all daml.yaml files - ``` - -5. Build packages: - ```bash - cd decentralized-canton-sync - nix-shell -p daml-sdk --run "daml build -p daml/splice-util" - nix-shell -p daml-sdk --run "daml build -p daml/splice-amulet" - ``` - -**Verified**: splice-util and splice-amulet build successfully with LF 2.2 and SDK 3.4.9. - -## Fully Open-Source LF 2.2 Build (Verified) +## Upgrading to LF 2.2 (Verified with SDK 3.4.9) -Both Splice and Canton can be built with LF 2.2 from entirely open-source code: +1. `project/CantonDependencies.scala`: `val daml_language_versions = Seq("2.2")` +2. `nix/daml-compiler-sources.json`: `{ "version": "3.4.9" }` +3. All `daml/*/daml.yaml`: set `sdk-version: 3.4.9` and `--target=2.2` +4. Remove `-Wno-ledger-time-is-alpha` from all `daml.yaml` files (not in SDK 3.4.9) +5. Build: `daml build -p daml/splice-util && daml build -p daml/splice-amulet` -### Canton Built-in DARs - -Update Canton's daml.yaml files: -```bash -cd canton/community -# Update all daml.yaml files to sdk-version: 3.4.9 and --target=2.2 -perl -pi -e 's/sdk-version: 3\.3\.0-snapshot\.[^\n]*/sdk-version: 3.4.9/g' **/daml.yaml -perl -pi -e 's/--target=2\.1/--target=2.2/g' **/daml.yaml -``` - -Rebuild Canton: -```bash -sbt "canton-community-app/assembly" -``` - -### Verified Results (2025-12-24) - -Community-built DARs have **identical package IDs** to enterprise: -- `canton-builtin-admin-workflow-ping-3.4.9-fbeb863dab36da66d99...` - -This confirms full compatibility with enterprise deployments. - -## Key Files Reference - -| Purpose | Repository | File | -|---------|------------|------| -| LF versions (Scala) | daml | `sdk/daml-lf/language/.../LanguageVersion.scala` | -| damlc validation | daml | `sdk/compiler/damlc/lib/DA/Cli/Options.hs` | -| Canton version | canton | `VERSION` | -| Canton DARs | canton | `community/common/src/main/daml/` | -| Splice LF config | splice | `project/CantonDependencies.scala` | -| Package targets | splice | `daml/*/daml.yaml` | -| Docker builds | splice | `cluster/images/*/Dockerfile` | +Community-built DARs have identical package IDs to enterprise at the same LF version (verified 2025-12-24). ## Troubleshooting -### "Unknown Daml-LF version: 2.2" -- **Cause**: damlc binary doesn't support 2.2 in `supportedOutputVersions` -- **Check**: `daml damlc --help` for supported targets -- **Fix**: Use SDK version that includes 2.2, or use 2.1 +**"Unknown Daml-LF version: 2.2"**: damlc binary doesn't support 2.2. Check `daml damlc --help` for supported targets; upgrade to SDK 3.4.9. -### Package ID Mismatch -- **Cause**: Different LF versions between builds -- **Check**: `unzip -p package.dar META-INF/MANIFEST.MF | grep Sdk-Version` -- **Fix**: Ensure consistent `--target` across all builds +**Package ID mismatch**: different `--target` values between builds. Check: `unzip -p package.dar META-INF/MANIFEST.MF | grep Sdk-Version` -### Upgrade Validation Failed -- **Cause**: Trying to swap enterprise (LF 2.2) with community (LF 2.1) packages -- **Fix**: Use DAR injection to maintain LF 2.2 compatibility +**Upgrade validation failed**: swapping enterprise (LF 2.2) with community (LF 2.1) packages. Use DAR injection to maintain LF 2.2 compatibility. -## External References +## References - [DAML SDK Releases](https://github.com/digital-asset/daml/releases) - [Canton Releases](https://github.com/digital-asset/canton/releases) -- [Splice Documentation](https://docs.dev.sync.global/) +- [Splice Docs](https://docs.dev.sync.global/) - [DAML-LF Governance](https://github.com/digital-asset/daml/blob/main/daml-lf/governance.rst) -- [Canton Network Docs](https://docs.digitalasset.com/) diff --git a/.claude/skills/codex/SKILL.md b/.claude/skills/codex/SKILL.md deleted file mode 100644 index 7f8e81b..0000000 --- a/.claude/skills/codex/SKILL.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -name: codex -description: Hand off a task to Codex CLI for autonomous execution. Use when a task would benefit from a capable subagent to implement, fix, investigate, or review code. Codex has full codebase access and can make changes. -argument-hint: [--model ] [--sandbox ] -disable-model-invocation: false -allowed-tools: Bash(codex:*), Bash(git:*), Bash(pwd:*), Bash(mkdir:*), Bash(cat:*), Bash(head:*), Bash(tail:*), Bash(wc:*), Read, Grep, Glob ---- - -# Codex Subagent - -Session ID: ${CLAUDE_SESSION_ID} -Output: `~/.claude/codex/${CLAUDE_SESSION_ID}/` - -## Arguments - -Task: $ARGUMENTS - -**Optional flags** (only if user explicitly requests): -- `--model `: `gpt-5.2-codex` (default), `gpt-5.2`, `gpt-5-mini`, `o3` -- `--sandbox `: `read-only`, `workspace-write`, `danger-full-access` - -Omit flags to use user's config defaults. - -## Context Depth - -**Minimal**: Specific file/function → git state only -**Medium**: Feature/multi-file → add recent changes -**Full**: Investigation/unclear → add session summary - -## Gather Context - -**Always:** -```bash -pwd && git rev-parse --show-toplevel 2>/dev/null || echo "Not a git repo" -git branch --show-current 2>/dev/null && git status --short 2>/dev/null | head -20 -``` - -**Medium/Full:** -```bash -git diff --stat 2>/dev/null | tail -20 -git log --oneline -5 --since="4 hours ago" 2>/dev/null -``` - -**Full only:** Session summary (work done, decisions, blockers) - -## Prompt Structure - -Use CTCO format (Context → Task → Constraints → Output): - -``` - -Working directory: {cwd} -Repository: {repo_name} -Branch: {branch} -{git_status} -{recent_changes if medium/full} -{session_summary if full} - - - -{task from arguments} - - - -- Implement EXACTLY what is requested -- Read files before changing -- Run tests/linters to validate -- State interpretation if ambiguous - - - -Summary (≤5 bullets): -- **What changed**: Files and changes -- **Where**: file:line references -- **Validation**: Tests/linters run -- **Risks**: Edge cases to watch -- **Next steps**: Follow-up or "None" - -``` - -## Execute - -Setup: -```bash -mkdir -p ~/.claude/codex/${CLAUDE_SESSION_ID} -git rev-parse --show-toplevel 2>/dev/null && IN_GIT=true || IN_GIT=false -``` - -Run: -```bash -codex exec --json \ - -o ~/.claude/codex/${CLAUDE_SESSION_ID}/summary-{timestamp}.txt \ - {--skip-git-repo-check if not in git} \ - {--full-auto OR --sandbox } \ - {-m if requested} \ - - <<'CODEX_PROMPT' -{prompt} -CODEX_PROMPT > ~/.claude/codex/${CLAUDE_SESSION_ID}/progress-{timestamp}.jsonl -``` - -**Flags:** -- Not in git: add `--skip-git-repo-check` -- Default: `--full-auto` (workspace-write + auto-approval) -- User-requested: `--sandbox ` or `-m ` - -### Background vs Foreground - -**Background tasks** (>30 seconds expected): -- Multi-file changes, investigations, tests, feature work -- Use `run_in_background: true` → returns `task_id` - -**Foreground tasks** (<30 seconds): -- Single-line fixes, simple queries - -### Monitoring Background Tasks - -**Token-efficient approach:** -1. Use `TaskOutput(task_id, block=true)` to wait for completion -2. Ignore TaskOutput's content (stdout redirected to progress file) -3. Read summary file directly: `cat ~/.claude/codex/${CLAUDE_SESSION_ID}/summary-*.txt` - -The summary file contains only Codex's final message (token-efficient). - -**Progress checks** (if needed before completion): -- `TaskOutput(task_id, block=false)` - check if still running -- `tail -n 3 ~/.claude/codex/${CLAUDE_SESSION_ID}/progress-*.jsonl` - last 3 events only - -**Do NOT** read entire progress files or use `tail -f`. - -## Report Result - -Read summary: -```bash -cat ~/.claude/codex/${CLAUDE_SESSION_ID}/summary-*.txt -``` - -Report format (≤5 bullets): -``` -**Status:** {success/error/partial} -**Changed:** {files and changes} -**Validation:** {tests/linters} -**Risks:** {if any} -**Next:** {follow-up or "None"} -``` - -## Examples - -```bash -# Simple fix -/codex fix the null pointer in utils/parser.ts line 42 - -# Feature work -/codex add rate limiting to the /api/submit endpoint - -# Investigation (background) -/codex investigate why the CI build fails on arm64 - -# Model override -/codex --model o3 design a caching strategy - -# Read-only -/codex --sandbox read-only review the auth implementation -``` diff --git a/.claude/skills/data-driven-testing/SKILL.md b/.claude/skills/data-driven-testing/SKILL.md deleted file mode 100644 index f437110..0000000 --- a/.claude/skills/data-driven-testing/SKILL.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: data-driven-testing -description: "DEPRECATED: Use testing-best-practices instead. This skill has been retired." ---- - -## Deprecated - -This skill has been replaced by **testing-best-practices**. - -Use `testing-best-practices` for all test design, test case generation, and test strategy work. - -### What changed - -- Test layering policy (unit / integration / e2e) replaces the unit-only DDT focus. -- Markdown tables replace the rigid canonical JSON test-case schema. -- Output is strategy + matrix + implementation plan, not JSON blocks. -- Added: hard rules against fabricated fixtures and invented source locations. -- Added: e2e execution guidance (preflight, async polling, flake handling). -- Added: CI lane guidance (PR smoke vs nightly full). -- Auth-state reuse and idempotent/state-tolerant e2e are first-class concerns. diff --git a/.claude/skills/e2e/SKILL.md b/.claude/skills/e2e/SKILL.md index 66303eb..36d0519 100644 --- a/.claude/skills/e2e/SKILL.md +++ b/.claude/skills/e2e/SKILL.md @@ -1,29 +1,20 @@ --- name: e2e -description: Run e2e tests, fix flake and outdated tests, identify bugs against spec. Use when running e2e tests, debugging test failures, or fixing flaky tests. Never changes source code logic or API without spec backing. +description: Use when running e2e tests, debugging test failures, or fixing flaky tests. Covers failure taxonomy, fix rules, and workflow. Never changes source code logic or API without spec backing. --- # E2E Testing -## Principles (Always Active) - -These apply whenever working with e2e tests, test failures, or test flakiness: - -### Failure Taxonomy +## Failure Taxonomy Every e2e failure is exactly one of: **A. Flaky** (test infrastructure issue) -- Race conditions, timing-dependent assertions -- Stale selectors after UI changes -- Missing waits, incorrect wait targets -- Network timing, mock setup ordering +- Race conditions, timing-dependent assertions, stale selectors, missing waits - Symptom: passes on retry, fails intermittently **B. Outdated** (test no longer matches implementation) -- Test asserts old behavior that was intentionally changed -- Selectors reference removed/renamed elements -- API contract changed, test wasn't updated +- Test asserts old behavior that was intentionally changed; selectors reference removed elements - Symptom: consistent failure, app works correctly **C. Bug** (implementation doesn't match spec) @@ -31,7 +22,7 @@ Every e2e failure is exactly one of: - **Only classify as bug when a spec exists to validate against** - If no spec exists, classify as "unverified failure" and report to the user -### Fix Rules by Category +## Fix Rules by Category **Flaky fixes:** - Replace `waitForTimeout` with auto-waiting locators @@ -39,7 +30,6 @@ Every e2e failure is exactly one of: - Fix race conditions with `expect()` web-first assertions - Fix mock/route setup ordering (before navigation) - **Never add arbitrary delays** - fix the underlying wait -- **Never weaken assertions** to make flaky tests pass - **Never add retry loops around assertions** - use the framework's built-in retry **Outdated fixes:** @@ -50,24 +40,16 @@ Every e2e failure is exactly one of: **Bug fixes:** - Quote the spec section that defines expected behavior - Fix the source code to match the spec -- **Unit tests MUST exist** before the fix is complete - - If unit tests exist, run them to confirm - - If unit tests don't exist, write them first (TDD) +- **Unit tests MUST exist** before the fix is complete — write them first if missing (TDD) - **Never change e2e assertions** to match buggy code - **Never change API contracts or interfaces** without spec backing - If no spec exists, ask the user: bug or outdated test? -### Source Code Boundary - -E2e test fixes must not change: -- Application logic or business rules -- API contracts, request/response shapes -- Database schemas or migrations -- Configuration defaults +## Source Code Boundary -The only exception: bug fixes where a spec explicitly defines the correct behavior and unit tests cover the fix. +E2e test fixes must not change application logic, API contracts, database schemas, or configuration defaults. The only exception: bug fixes where a spec explicitly defines the correct behavior and unit tests cover the fix. -## Workflow (When Explicitly Running E2E) +## Workflow ### Step 1: Discover Test Infrastructure @@ -78,8 +60,6 @@ The only exception: bug fixes where a spec explicitly defines the correct behavi ### Step 2: Run Tests -Run with minimal reporter to avoid context overflow: - ```bash # Playwright yarn playwright test --reporter=line @@ -88,12 +68,6 @@ yarn playwright test --reporter=line yarn test:e2e ``` -If a filter is specified, apply it: -```bash -yarn playwright test --reporter=line -g "transfer" -yarn test:e2e -- --grep "transfer" -``` - Parse failures into: | Test | File | Error | Category | @@ -102,23 +76,14 @@ Parse failures into: ### Step 3: Categorize -For each failure: -1. Read the test file -2. Read the source code it exercises -3. Check for a corresponding spec file -4. Assign category: flaky, outdated, bug, or unverified +For each failure: read the test file, read the source code it exercises, check for a corresponding spec file, assign category (flaky / outdated / bug / unverified). ### Step 4: Fix by Category -Apply fixes following the Principles above, in order: -1. **Flaky** - fix test infrastructure issues first (unblocks other tests) -2. **Outdated** - update stale assertions -3. **Bug** - fix with spec + unit test gate +Apply fixes in order: flaky first (unblocks other tests), then outdated, then bug. ### Step 5: Re-run and Report -After all fixes, re-run the suite: - ``` ## E2E Results @@ -136,3 +101,5 @@ After all fixes, re-run the suite: ### Unit Tests Added - `src/transfer.test.ts` - amount validation edge cases (covers BUG fix) ``` + +See `testing-best-practices` for async handling, flake classification, and preflight check patterns. diff --git a/.claude/skills/extract-transcripts/PLAN.md b/.claude/skills/extract-transcripts/PLAN.md deleted file mode 100644 index 84b689f..0000000 --- a/.claude/skills/extract-transcripts/PLAN.md +++ /dev/null @@ -1,196 +0,0 @@ -# Transcript Analytics with DuckDB - Implementation Plan - -## Overview - -Extend the existing transcript extraction tools with a DuckDB-based index for querying past Claude Code and Codex CLI sessions at scale. - -## Schema Design (v2) - -```sql --- sessions table: file_path is the unique key (not session_id) -CREATE TABLE sessions ( - file_path TEXT PRIMARY KEY, -- unique identifier (filename handles subagent collision) - session_id TEXT, -- original session_id (for reference, not unique) - source TEXT NOT NULL, -- 'claude_code' | 'codex' - started_at TIMESTAMP, - ended_at TIMESTAMP, - duration_seconds INTEGER, - model TEXT, - cwd TEXT, - git_branch TEXT, - git_repo TEXT, -- derived from cwd - message_count INTEGER, - tool_count INTEGER, - file_mtime REAL, -- for incremental indexing - file_size INTEGER, -- for change detection - indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - --- messages table -CREATE TABLE messages ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL REFERENCES sessions(file_path), - message_idx INTEGER NOT NULL, - role TEXT NOT NULL, -- 'user' | 'assistant' - content TEXT, - timestamp TIMESTAMP, - has_thinking BOOLEAN DEFAULT FALSE, - UNIQUE(file_path, message_idx) -); - --- tool_calls table (simplified - no success tracking) -CREATE TABLE tool_calls ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL REFERENCES sessions(file_path), - message_idx INTEGER, -- nullable: Codex function_call events lack message context - tool_name TEXT NOT NULL - -- NOTE: succeeded/input_summary removed - not derivable from current parsing -); - --- Full-text search index (DuckDB native) --- DuckDB doesn't have fts5; use LIKE/ILIKE for simple search or: --- Option 1: Use DuckDB's full-text search extension (duckdb_fts) --- Option 2: Use PRAGMA create_fts_index (experimental) --- For Phase 1, use simple ILIKE queries; add FTS extension in later phase -CREATE INDEX idx_messages_content ON messages(content); -``` - -### Design Decisions - -1. **`file_path` as primary key**: The existing `extract_transcript.py` explicitly uses filename (not session_id) as the unique identifier because session_id can be shared across subagents (see line 130-131). This schema follows that pattern. - -2. **No `tool_calls.succeeded`**: Current extractors only capture `tool_use` blocks. `tool_result` blocks are skipped in Codex parsing (line 86-87 of `extract_codex_transcript.py`) and not correlated in Claude parsing. Adding success tracking would require new extraction logic. - -3. **No `messages.token_count`**: Current extractors don't capture usage/token data. Would require parsing additional fields from session JSONL. - -4. **Session linking deferred**: No parent/subagent metadata exists in current session format. Tree construction would require heuristics or new metadata. - -5. **`tool_calls.message_idx` nullable**: Claude Code tool_use blocks are nested in assistant messages (so message_idx is available), but Codex function_call events are standalone entries without message context (see `extract_codex_transcript.py:67-69`). Making this nullable allows both sources to populate the schema. - -6. **FTS via ILIKE for Phase 1**: DuckDB doesn't support SQLite's fts5 syntax. Phase 1 uses simple `ILIKE` queries on an indexed column. The `duckdb_fts` extension can be added later for better performance. - ---- - -## Incremental Indexing Strategy - -Per-file tracking stored in DuckDB (no separate JSON file): - -```python -def should_reindex(file_path: Path, db: DuckDB) -> bool: - """Check if file needs reindexing.""" - current_mtime = file_path.stat().st_mtime - current_size = file_path.stat().st_size - - result = db.execute(""" - SELECT file_mtime, file_size FROM sessions - WHERE file_path = ? - """, [str(file_path)]).fetchone() - - if result is None: - return True # New file - - stored_mtime, stored_size = result - return current_mtime != stored_mtime or current_size != stored_size - -def reindex_file(file_path: Path, db: DuckDB): - """Delete old data and reindex file.""" - db.execute("DELETE FROM tool_calls WHERE file_path = ?", [str(file_path)]) - db.execute("DELETE FROM messages WHERE file_path = ?", [str(file_path)]) - db.execute("DELETE FROM sessions WHERE file_path = ?", [str(file_path)]) - # ... parse and insert fresh data - -def delete_session(file_path: str, db: DuckDB): - """Remove all data for a session file.""" - db.execute("DELETE FROM tool_calls WHERE file_path = ?", [file_path]) - db.execute("DELETE FROM messages WHERE file_path = ?", [file_path]) - db.execute("DELETE FROM sessions WHERE file_path = ?", [file_path]) - -def cleanup_deleted_files(db: DuckDB): - """Remove entries for files that no longer exist.""" - indexed_files = db.execute("SELECT file_path FROM sessions").fetchall() - for (file_path,) in indexed_files: - if not Path(file_path).exists(): - delete_session(file_path, db) # Just delete, don't reindex -``` - -### Handles - -| Scenario | Detection | Action | -|----------|-----------|--------| -| New file | Not in DB | Full index | -| Modified file | mtime or size changed | Delete + reindex | -| Deleted file | Path no longer exists | Delete from DB | -| Append-only growth | Size increased | Delete + reindex | - ---- - -## CLI Commands - -```bash -# Index/reindex sessions -transcript index # Incremental index of all sessions -transcript index --full # Force full reindex -transcript index --path # Index specific directory - -# Search -transcript search "error handling" # FTS across message content -transcript search "error" --cwd ~/myproject # Filter by project - -# List sessions -transcript recent # Last 10 sessions -transcript recent --project myapp # Filter by cwd containing "myapp" -transcript recent --since 7d # Last 7 days - -# Analytics -transcript tools # Top 10 tools by usage -transcript tools --top 20 # Top 20 -transcript stats # Session counts, durations, model breakdown - -# View session -transcript show # Full transcript -transcript show --summary # Summary only -``` - ---- - -## Directory Structure - -``` -~/.claude/transcript-index/ -└── sessions.duckdb # Single database file with all tables + FTS -``` - ---- - -## Implementation Phases - -### Phase 1: Core indexing -- DuckDB schema creation -- Parse Claude Code JSONL → sessions/messages/tool_calls tables -- Incremental indexing with mtime/size tracking -- Basic CLI: `index`, `recent`, `search` - -### Phase 2: Codex support -- Add Codex session parsing -- Unified schema handles both sources via `source` column - -### Phase 3: Analytics -- `tools` command with aggregations -- `stats` command for usage patterns -- Time-series queries - -### Phase 4: Future considerations -- Session linking heuristics (if metadata becomes available) -- Token counting (if extraction adds usage parsing) -- Semantic search via embeddings - ---- - -## Out of Scope (with rationale) - -| Feature | Reason | Reference | -|---------|--------|-----------| -| `tool_calls.succeeded` | Requires `tool_result` parsing not in current extractors | `extract_codex_transcript.py:86-87` | -| `messages.token_count` | Not captured by current extraction | `extract_transcript.py:108-125` | -| Parent/subagent linking | No metadata available in session format | `extract_transcript.py:93-100` | -| Real-time updates | Batch indexing only; run `transcript index` as needed | Design choice | diff --git a/.claude/skills/extract-transcripts/SKILL.md b/.claude/skills/extract-transcripts/SKILL.md deleted file mode 100644 index 2b7ed5a..0000000 --- a/.claude/skills/extract-transcripts/SKILL.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -name: extract-transcripts -description: Extract readable transcripts from Claude Code and Codex CLI session JSONL files ---- - -# Extract Transcripts - -Extracts readable markdown transcripts from Claude Code and Codex CLI session JSONL files. - -## Scripts - -### Claude Code Sessions - -```bash -# Extract a single session -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py - -# With tool calls and thinking blocks -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py --include-tools --include-thinking - -# Extract all sessions from a directory -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py --all - -# Output to file -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py -o output.md - -# Summary only (quick overview) -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py --summary - -# Skip empty/warmup-only sessions -uv run ~/.claude/skills/extract-transcripts/extract_transcript.py --all --skip-empty -``` - -**Options:** -- `--include-tools`: Include tool calls and results -- `--include-thinking`: Include Claude's thinking blocks -- `--all`: Process all .jsonl files in directory -- `-o, --output`: Output file path (default: stdout) -- `--summary`: Only output brief summary -- `--skip-empty`: Skip empty and warmup-only sessions -- `--min-messages N`: Minimum messages for --skip-empty (default: 2) - -### Codex CLI Sessions - -```bash -# Extract a Codex session -uv run ~/.claude/skills/extract-transcripts/extract_codex_transcript.py - -# Extract from Codex history file -uv run ~/.claude/skills/extract-transcripts/extract_codex_transcript.py ~/.codex/history.jsonl --history -``` - -## Session File Locations - -### Claude Code -- Sessions: `~/.claude/projects//.jsonl` - -### Codex CLI -- Sessions: `~/.codex/sessions//rollout.jsonl` -- History: `~/.codex/history.jsonl` - -## DuckDB-Based Transcript Index - -For querying across many sessions, use the DuckDB-based indexer: - -```bash -# Index all sessions (incremental - only new/changed files) -uv run ~/.claude/skills/extract-transcripts/transcript_index.py index - -# Force full reindex -uv run ~/.claude/skills/extract-transcripts/transcript_index.py index --full - -# Limit number of files to process -uv run ~/.claude/skills/extract-transcripts/transcript_index.py index --limit 10 - -# List recent sessions -uv run ~/.claude/skills/extract-transcripts/transcript_index.py recent -uv run ~/.claude/skills/extract-transcripts/transcript_index.py recent --limit 20 -uv run ~/.claude/skills/extract-transcripts/transcript_index.py recent --project myapp -uv run ~/.claude/skills/extract-transcripts/transcript_index.py recent --since 7d - -# Search across sessions -uv run ~/.claude/skills/extract-transcripts/transcript_index.py search "error handling" -uv run ~/.claude/skills/extract-transcripts/transcript_index.py search "query" --cwd ~/myproject - -# Show a session transcript -uv run ~/.claude/skills/extract-transcripts/transcript_index.py show -uv run ~/.claude/skills/extract-transcripts/transcript_index.py show --summary -``` - -**Requirements:** uv (dependencies auto-installed via inline script metadata) - -**Database location:** `~/.claude/transcript-index/sessions.duckdb` - -## Output Format - -Transcripts are formatted as markdown with: -- Session metadata (date, duration, model, working directory, git branch) -- User messages prefixed with `## User` -- Assistant responses prefixed with `## Assistant` -- Tool calls in code blocks (if --include-tools) -- Thinking in blockquotes (if --include-thinking) -- Tool usage summary for Codex sessions diff --git a/.claude/skills/extract-transcripts/extract_codex_transcript.py b/.claude/skills/extract-transcripts/extract_codex_transcript.py deleted file mode 100644 index 824cce7..0000000 --- a/.claude/skills/extract-transcripts/extract_codex_transcript.py +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env python3 -"""Extract readable transcripts from Codex CLI session JSONL files.""" - -import json -import sys -import os -from datetime import datetime -from pathlib import Path - - -def parse_timestamp(ts: str) -> datetime: - """Parse ISO timestamp.""" - return datetime.fromisoformat(ts.replace('Z', '+00:00')) - - -def process_codex_session(filepath: Path) -> str: - """Process a Codex session file and return formatted transcript.""" - output = [] - session_meta = None - messages = [] - tool_calls = [] - - with open(filepath, 'r') as f: - for line in f: - line = line.strip() - if not line: - continue - try: - entry = json.loads(line) - except json.JSONDecodeError: - continue - - entry_type = entry.get('type') - - if entry_type == 'session_meta': - payload = entry.get('payload', {}) - session_meta = { - 'id': payload.get('id', 'unknown'), - 'timestamp': payload.get('timestamp'), - 'cwd': payload.get('cwd'), - 'cli_version': payload.get('cli_version'), - 'git': payload.get('git', {}), - } - elif entry_type == 'event_msg': - # Codex wraps messages in event_msg payloads - payload = entry.get('payload', {}) - msg_type = payload.get('type') - - if msg_type == 'user_message': - text = payload.get('message', '') - if text: - messages.append({ - 'role': 'user', - 'text': text, - 'tools': [], - 'timestamp': entry.get('timestamp') - }) - elif msg_type == 'agent_message': - text = payload.get('message', '') - if text: - messages.append({ - 'role': 'assistant', - 'text': text, - 'tools': [], - 'timestamp': entry.get('timestamp') - }) - elif msg_type == 'function_call': - name = payload.get('name', 'unknown') - tool_calls.append({'name': name}) - elif entry_type == 'message': - # Legacy format support - payload = entry.get('payload', {}) - role = payload.get('role', 'unknown') - content = payload.get('content', []) - - # Extract text from content - text_parts = [] - for item in content: - if isinstance(item, dict): - if item.get('type') == 'text': - text_parts.append(item.get('text', '')) - elif item.get('type') == 'tool_use': - tool_calls.append({ - 'name': item.get('name'), - }) - elif item.get('type') == 'tool_result': - pass # Skip tool results for brevity - elif isinstance(item, str): - text_parts.append(item) - - if text_parts: - messages.append({ - 'role': role, - 'text': '\n'.join(text_parts), - 'tools': [], - 'timestamp': entry.get('timestamp') - }) - - # Build output - output.append(f"# Codex Session: {filepath.stem}") - output.append("") - - if session_meta: - if session_meta.get('timestamp'): - try: - ts = parse_timestamp(session_meta['timestamp']) - output.append(f"**Date:** {ts.strftime('%Y-%m-%d %H:%M')}") - except: - pass - if session_meta.get('cwd'): - output.append(f"**Working Directory:** {session_meta['cwd']}") - if session_meta.get('cli_version'): - output.append(f"**Codex Version:** {session_meta['cli_version']}") - git = session_meta.get('git', {}) - if git.get('branch'): - output.append(f"**Git Branch:** {git['branch']}") - if git.get('commit_hash'): - output.append(f"**Commit:** {git['commit_hash'][:8]}") - - output.append("") - user_count = len([m for m in messages if m['role'] == 'user']) - assistant_count = len([m for m in messages if m['role'] == 'assistant']) - output.append(f"**Messages:** {user_count} user, {assistant_count} assistant, {len(tool_calls)} tool calls") - output.append("") - output.append("---") - output.append("") - - # Output messages - for msg in messages: - role_header = "## User" if msg['role'] == 'user' else "## Assistant" - output.append(role_header) - output.append("") - - if msg['text']: - # Truncate very long messages - text = msg['text'] - if len(text) > 2000: - text = text[:2000] + "\n\n... (truncated)" - output.append(text) - output.append("") - - # Append tool summary if any - if tool_calls: - output.append("## Tools Used") - output.append("") - tool_names = {} - for t in tool_calls: - name = t.get('name', 'unknown') - tool_names[name] = tool_names.get(name, 0) + 1 - for name, count in sorted(tool_names.items(), key=lambda x: -x[1])[:10]: - output.append(f"- `{name}`: {count}") - output.append("") - - return '\n'.join(output) - - -def process_history_entry(entry: dict) -> str: - """Format a single history entry.""" - session_id = entry.get('session_id', 'unknown')[:8] - ts = entry.get('ts', 0) - text = entry.get('text', '') - - # Format timestamp - try: - dt = datetime.fromtimestamp(ts) - date_str = dt.strftime('%Y-%m-%d %H:%M') - except: - date_str = 'unknown' - - output = [] - output.append(f"## Session {session_id} ({date_str})") - output.append("") - - # Truncate very long prompts - if len(text) > 3000: - text = text[:3000] + "\n\n... (truncated)" - - output.append(text) - output.append("") - output.append("---") - output.append("") - - return '\n'.join(output) - - -def main(): - if len(sys.argv) < 2: - print("Usage: extract_codex_transcript.py [--history]") - sys.exit(1) - - filepath = Path(sys.argv[1]) - is_history = '--history' in sys.argv - - if is_history: - # Process history.jsonl format - output = ["# Codex History Entries", "", "---", ""] - with open(filepath, 'r') as f: - for line in f: - try: - entry = json.loads(line.strip()) - output.append(process_history_entry(entry)) - except json.JSONDecodeError: - continue - print('\n'.join(output)) - else: - # Process session rollout format - print(process_codex_session(filepath)) - - -if __name__ == '__main__': - main() diff --git a/.claude/skills/extract-transcripts/extract_transcript.py b/.claude/skills/extract-transcripts/extract_transcript.py deleted file mode 100755 index 743f42d..0000000 --- a/.claude/skills/extract-transcripts/extract_transcript.py +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env python3 -"""Extract readable transcripts from Claude Code session JSONL files.""" - -import json -import sys -import os -import argparse -from datetime import datetime -from pathlib import Path -from typing import Optional, TextIO - - -def parse_timestamp(ts: str) -> datetime: - """Parse ISO timestamp.""" - return datetime.fromisoformat(ts.replace('Z', '+00:00')) - - -def format_duration(start: datetime, end: datetime) -> str: - """Format duration between two timestamps.""" - delta = end - start - hours, remainder = divmod(int(delta.total_seconds()), 3600) - minutes, seconds = divmod(remainder, 60) - if hours > 0: - return f"{hours}h {minutes}m {seconds}s" - elif minutes > 0: - return f"{minutes}m {seconds}s" - return f"{seconds}s" - - -def extract_text_content(content) -> str: - """Extract text from message content (handles both string and array formats).""" - if isinstance(content, str): - return content - if isinstance(content, list): - texts = [] - for block in content: - if isinstance(block, dict): - if block.get('type') == 'text': - texts.append(block.get('text', '')) - return '\n'.join(texts) - return '' - - -def extract_thinking(content) -> Optional[str]: - """Extract thinking from message content.""" - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get('type') == 'thinking': - return block.get('thinking', '') - return None - - -def extract_tool_calls(content) -> list: - """Extract tool calls from message content.""" - tools = [] - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get('type') == 'tool_use': - tools.append({ - 'name': block.get('name', 'unknown'), - 'input': block.get('input', {}) - }) - return tools - - -def process_session(filepath: Path, include_tools: bool = False, - include_thinking: bool = False, summary_only: bool = False) -> str: - """Process a single session file and return formatted transcript.""" - messages = [] - metadata = {} - first_ts = None - last_ts = None - - with open(filepath, 'r') as f: - for line in f: - line = line.strip() - if not line: - continue - try: - entry = json.loads(line) - except json.JSONDecodeError: - continue - - entry_type = entry.get('type') - timestamp = entry.get('timestamp') - - if timestamp: - ts = parse_timestamp(timestamp) - if first_ts is None: - first_ts = ts - last_ts = ts - - # Extract session metadata - if entry_type == 'user' and not metadata: - metadata = { - 'sessionId': entry.get('sessionId', 'unknown'), - 'version': entry.get('version', 'unknown'), - 'cwd': entry.get('cwd', 'unknown'), - 'gitBranch': entry.get('gitBranch', 'unknown'), - } - - # Extract model from assistant messages - if entry_type == 'assistant': - msg = entry.get('message', {}) - if 'model' in msg and 'model' not in metadata: - metadata['model'] = msg['model'] - - # Process user and assistant messages - if entry_type in ('user', 'assistant'): - msg = entry.get('message', {}) - role = msg.get('role', entry_type) - content = msg.get('content', '') - - text = extract_text_content(content) - thinking = extract_thinking(content) if include_thinking else None - tools = extract_tool_calls(content) if include_tools else [] - - if text or thinking or tools: - messages.append({ - 'role': role, - 'text': text, - 'thinking': thinking, - 'tools': tools, - 'timestamp': timestamp - }) - - # Build output - output = [] - - # Header - use filename to ensure uniqueness (session_id can be shared by subagents) - file_id = filepath.stem - output.append(f"# Session: {file_id}") - output.append("") - - if first_ts and last_ts: - output.append(f"**Date:** {first_ts.strftime('%Y-%m-%d %H:%M')}") - output.append(f"**Duration:** {format_duration(first_ts, last_ts)}") - - if metadata.get('model'): - output.append(f"**Model:** {metadata['model']}") - if metadata.get('cwd'): - output.append(f"**Working Directory:** {metadata['cwd']}") - if metadata.get('gitBranch'): - output.append(f"**Git Branch:** {metadata['gitBranch']}") - - output.append("") - output.append("---") - output.append("") - - if summary_only: - user_count = sum(1 for m in messages if m['role'] == 'user') - assistant_count = sum(1 for m in messages if m['role'] == 'assistant') - tool_count = sum(len(m['tools']) for m in messages) - - output.append(f"**Messages:** {user_count} user, {assistant_count} assistant") - output.append(f"**Tool calls:** {tool_count}") - - # First user message preview - find first substantive prompt - for m in messages: - if m['role'] == 'user' and m['text']: - text = m['text'].strip() - # Skip very short prompts (likely just "Warmup" or partial) - if len(text) < 20: - continue - preview = text[:500].replace('\n', ' ') - if len(text) > 500: - preview += '...' - output.append(f"\n**First prompt:** {preview}") - break - else: - # No substantive prompt found - output.append(f"\n**First prompt:** (no substantive prompt found)") - - return '\n'.join(output) - - # Full transcript - for msg in messages: - role_header = "## User" if msg['role'] == 'user' else "## Assistant" - output.append(role_header) - output.append("") - - if msg['thinking']: - output.append("> **Thinking:**") - for line in msg['thinking'].split('\n'): - output.append(f"> {line}") - output.append("") - - if msg['text']: - output.append(msg['text']) - output.append("") - - if msg['tools']: - for tool in msg['tools']: - output.append(f"**Tool:** `{tool['name']}`") - input_str = json.dumps(tool['input'], indent=2) - if len(input_str) > 500: - input_str = input_str[:500] + '\n ...(truncated)' - output.append(f"```json\n{input_str}\n```") - output.append("") - - return '\n'.join(output) - - -def has_substantive_content(filepath: Path, min_messages: int = 2) -> bool: - """Check if session has substantive content (not just warmups or empty).""" - user_count = 0 - assistant_count = 0 - has_real_content = False - - with open(filepath, 'r') as f: - for line in f: - line = line.strip() - if not line: - continue - try: - entry = json.loads(line) - except json.JSONDecodeError: - continue - - entry_type = entry.get('type') - if entry_type == 'user': - msg = entry.get('message', {}) - content = msg.get('content', '') - text = content if isinstance(content, str) else '' - if isinstance(content, list): - text = ' '.join(b.get('text', '') for b in content if isinstance(b, dict)) - # Skip warmup-only sessions - if text.strip().lower() not in ('warmup', ''): - has_real_content = True - user_count += 1 - elif entry_type == 'assistant': - assistant_count += 1 - - return has_real_content and (user_count + assistant_count) >= min_messages - - -def main(): - parser = argparse.ArgumentParser(description='Extract transcripts from Claude Code sessions') - parser.add_argument('path', help='Session file or directory') - parser.add_argument('--include-tools', action='store_true', help='Include tool calls') - parser.add_argument('--include-thinking', action='store_true', help='Include thinking blocks') - parser.add_argument('--all', action='store_true', help='Process all .jsonl files in directory') - parser.add_argument('-o', '--output', help='Output file (default: stdout)') - parser.add_argument('--summary', action='store_true', help='Only output summary') - parser.add_argument('--skip-empty', action='store_true', help='Skip empty and warmup-only sessions') - parser.add_argument('--min-messages', type=int, default=2, help='Minimum messages for --skip-empty (default: 2)') - - args = parser.parse_args() - - path = Path(args.path) - - if args.all and path.is_dir(): - files = sorted(path.glob('*.jsonl'), key=lambda p: p.stat().st_mtime) - elif path.is_file(): - files = [path] - else: - print(f"Error: {path} not found or invalid", file=sys.stderr) - sys.exit(1) - - # Filter out empty/warmup sessions if requested - if args.skip_empty: - files = [f for f in files if has_substantive_content(f, args.min_messages)] - - output_file: Optional[TextIO] = None - if args.output: - output_file = open(args.output, 'w') - - seen_sessions = set() - try: - for filepath in files: - # Track unique sessions by session ID to avoid duplicates - session_id = filepath.stem - if session_id in seen_sessions: - continue - seen_sessions.add(session_id) - - transcript = process_session( - filepath, - include_tools=args.include_tools, - include_thinking=args.include_thinking, - summary_only=args.summary - ) - - if output_file: - output_file.write(transcript) - output_file.write('\n\n---\n\n') - else: - print(transcript) - print('\n---\n') - finally: - if output_file: - output_file.close() - - -if __name__ == '__main__': - main() diff --git a/.claude/skills/extract-transcripts/transcript_index.py b/.claude/skills/extract-transcripts/transcript_index.py deleted file mode 100755 index 60bb81e..0000000 --- a/.claude/skills/extract-transcripts/transcript_index.py +++ /dev/null @@ -1,602 +0,0 @@ -#!/usr/bin/env -S uv run -# /// script -# requires-python = ">=3.10" -# dependencies = ["duckdb"] -# /// -"""DuckDB-based indexer for Claude Code session transcripts.""" - -import argparse -import json -import os -import sys -from datetime import datetime, timedelta -from pathlib import Path -from typing import Optional - -import duckdb - - -# Default paths -DEFAULT_DB_PATH = Path.home() / ".claude" / "transcript-index" / "sessions.duckdb" -# Check both possible session locations -DEFAULT_SESSIONS_PATHS = [ - Path.home() / "Library" / "Application Support" / "Claude" / "sessions", # macOS - Path.home() / ".claude" / "projects", # Claude Code CLI projects - Path.home() / ".config" / "claude" / "sessions", # Linux -] - -# Schema - matches PLAN.md -SCHEMA = """ --- sessions table: file_path is the unique key (not session_id) -CREATE TABLE IF NOT EXISTS sessions ( - file_path TEXT PRIMARY KEY, - session_id TEXT, - source TEXT NOT NULL, - started_at TIMESTAMP, - ended_at TIMESTAMP, - duration_seconds INTEGER, - model TEXT, - cwd TEXT, - git_branch TEXT, - git_repo TEXT, - message_count INTEGER, - tool_count INTEGER, - file_mtime DOUBLE, - file_size BIGINT, - indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - --- messages table with id and foreign key reference -CREATE SEQUENCE IF NOT EXISTS messages_id_seq; -CREATE TABLE IF NOT EXISTS messages ( - id INTEGER DEFAULT nextval('messages_id_seq') PRIMARY KEY, - file_path TEXT NOT NULL REFERENCES sessions(file_path), - message_idx INTEGER NOT NULL, - role TEXT NOT NULL, - content TEXT, - timestamp TIMESTAMP, - has_thinking BOOLEAN DEFAULT FALSE, - UNIQUE(file_path, message_idx) -); - --- tool_calls table with id and foreign key reference -CREATE SEQUENCE IF NOT EXISTS tool_calls_id_seq; -CREATE TABLE IF NOT EXISTS tool_calls ( - id INTEGER DEFAULT nextval('tool_calls_id_seq') PRIMARY KEY, - file_path TEXT NOT NULL REFERENCES sessions(file_path), - message_idx INTEGER, - tool_name TEXT NOT NULL -); - --- Indexes for search and lookup --- Note: No index on messages.content - ILIKE search works without it and --- avoids DuckDB's ART index key size limit (122KB) for large message content -CREATE INDEX IF NOT EXISTS idx_messages_file_path ON messages(file_path); -CREATE INDEX IF NOT EXISTS idx_tool_calls_file_path ON tool_calls(file_path); -""" - - -def parse_timestamp(ts: str) -> datetime: - """Parse ISO timestamp.""" - return datetime.fromisoformat(ts.replace('Z', '+00:00')) - - -def extract_text_content(content) -> str: - """Extract text from message content.""" - if isinstance(content, str): - return content - if isinstance(content, list): - texts = [] - for block in content: - if isinstance(block, dict) and block.get('type') == 'text': - texts.append(block.get('text', '')) - return '\n'.join(texts) - return '' - - -def extract_tool_calls(content) -> list: - """Extract tool calls from message content.""" - tools = [] - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get('type') == 'tool_use': - tools.append(block.get('name', 'unknown')) - return tools - - -def has_thinking(content) -> bool: - """Check if content has thinking blocks.""" - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get('type') == 'thinking': - return True - return False - - -def parse_session_file(filepath: Path) -> dict: - """Parse a Claude Code session JSONL file.""" - messages = [] - tool_calls = [] - metadata = {} - first_ts = None - last_ts = None - message_idx = 0 - - with open(filepath, 'r') as f: - for line in f: - line = line.strip() - if not line: - continue - try: - entry = json.loads(line) - except json.JSONDecodeError: - continue - - entry_type = entry.get('type') - timestamp = entry.get('timestamp') - - if timestamp: - try: - ts = parse_timestamp(timestamp) - if first_ts is None: - first_ts = ts - last_ts = ts - except (ValueError, TypeError): - pass - - # Extract session metadata from first user entry - if entry_type == 'user' and not metadata: - metadata = { - 'session_id': entry.get('sessionId', 'unknown'), - 'cwd': entry.get('cwd'), - 'git_branch': entry.get('gitBranch'), - } - - # Extract model from assistant messages - if entry_type == 'assistant': - msg = entry.get('message', {}) - if 'model' in msg and 'model' not in metadata: - metadata['model'] = msg['model'] - - # Process user and assistant messages - if entry_type in ('user', 'assistant'): - msg = entry.get('message', {}) - role = msg.get('role', entry_type) - content = msg.get('content', '') - - text = extract_text_content(content) - tools = extract_tool_calls(content) - thinking = has_thinking(content) - - if text or tools: - messages.append({ - 'message_idx': message_idx, - 'role': role, - 'content': text, - 'timestamp': timestamp, - 'has_thinking': thinking, - }) - - for tool_name in tools: - tool_calls.append({ - 'message_idx': message_idx, - 'tool_name': tool_name, - }) - - message_idx += 1 - - # Calculate duration - duration_seconds = None - if first_ts and last_ts: - duration_seconds = int((last_ts - first_ts).total_seconds()) - - # Derive git_repo from cwd - git_repo = None - if metadata.get('cwd'): - git_repo = Path(metadata['cwd']).name - - return { - 'session_id': metadata.get('session_id'), - 'source': 'claude_code', - 'started_at': first_ts, - 'ended_at': last_ts, - 'duration_seconds': duration_seconds, - 'model': metadata.get('model'), - 'cwd': metadata.get('cwd'), - 'git_branch': metadata.get('git_branch'), - 'git_repo': git_repo, - 'messages': messages, - 'tool_calls': tool_calls, - } - - -def should_reindex(file_path: Path, con: duckdb.DuckDBPyConnection) -> bool: - """Check if file needs reindexing.""" - try: - stat = file_path.stat() - current_mtime = stat.st_mtime - current_size = stat.st_size - except OSError: - return False - - result = con.execute(""" - SELECT file_mtime, file_size FROM sessions - WHERE file_path = ? - """, [str(file_path)]).fetchone() - - if result is None: - return True # New file - - stored_mtime, stored_size = result - return current_mtime != stored_mtime or current_size != stored_size - - -def delete_session(file_path: str, con: duckdb.DuckDBPyConnection): - """Remove all data for a session file.""" - con.execute("DELETE FROM tool_calls WHERE file_path = ?", [file_path]) - con.execute("DELETE FROM messages WHERE file_path = ?", [file_path]) - con.execute("DELETE FROM sessions WHERE file_path = ?", [file_path]) - - -def index_file(file_path: Path, con: duckdb.DuckDBPyConnection) -> bool: - """Index a single session file. Returns True if indexed.""" - if not should_reindex(file_path, con): - return False - - # Delete existing data - delete_session(str(file_path), con) - - # Parse the file - data = parse_session_file(file_path) - - # Get file stats - stat = file_path.stat() - - # Insert session - con.execute(""" - INSERT INTO sessions ( - file_path, session_id, source, started_at, ended_at, - duration_seconds, model, cwd, git_branch, git_repo, - message_count, tool_count, file_mtime, file_size - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, [ - str(file_path), - data['session_id'], - data['source'], - data['started_at'], - data['ended_at'], - data['duration_seconds'], - data['model'], - data['cwd'], - data['git_branch'], - data['git_repo'], - len(data['messages']), - len(data['tool_calls']), - stat.st_mtime, - stat.st_size, - ]) - - # Insert messages - for msg in data['messages']: - con.execute(""" - INSERT INTO messages (file_path, message_idx, role, content, timestamp, has_thinking) - VALUES (?, ?, ?, ?, ?, ?) - """, [ - str(file_path), - msg['message_idx'], - msg['role'], - msg['content'], - msg['timestamp'], - msg['has_thinking'], - ]) - - # Insert tool calls - for tool in data['tool_calls']: - con.execute(""" - INSERT INTO tool_calls (file_path, message_idx, tool_name) - VALUES (?, ?, ?) - """, [ - str(file_path), - tool['message_idx'], - tool['tool_name'], - ]) - - return True - - -def cleanup_deleted_files(con: duckdb.DuckDBPyConnection) -> int: - """Remove entries for files that no longer exist.""" - indexed_files = con.execute("SELECT file_path FROM sessions").fetchall() - deleted = 0 - for (file_path,) in indexed_files: - if not Path(file_path).exists(): - delete_session(file_path, con) - deleted += 1 - return deleted - - -def cmd_index(args, con: duckdb.DuckDBPyConnection): - """Index command handler.""" - if args.path: - # User-specified path - expand ~ and check existence - sessions_path = Path(args.path).expanduser() - if not sessions_path.exists(): - print(f"Error: Sessions directory not found: {sessions_path}", file=sys.stderr) - sys.exit(1) - sessions_paths = [sessions_path] - else: - # Use default paths - check all that exist - sessions_paths = [p for p in DEFAULT_SESSIONS_PATHS if p.exists()] - if not sessions_paths: - print("Error: No sessions directory found. Checked:", file=sys.stderr) - for p in DEFAULT_SESSIONS_PATHS: - print(f" - {p}", file=sys.stderr) - sys.exit(1) - - # Get all JSONL files from all paths (recursively for project directories) - all_files = [] - for sessions_path in sessions_paths: - all_files.extend(sessions_path.glob('**/*.jsonl')) - files = sorted(all_files, key=lambda p: p.stat().st_mtime, reverse=True) - - if args.limit: - files = files[:args.limit] - - if args.full: - # Force full reindex - delete all data first - con.execute("DELETE FROM tool_calls") - con.execute("DELETE FROM messages") - con.execute("DELETE FROM sessions") - print("Full reindex: cleared existing data") - - indexed = 0 - skipped = 0 - for filepath in files: - if index_file(filepath, con): - indexed += 1 - if not args.quiet: - print(f"Indexed: {filepath.name}") - else: - skipped += 1 - - # Cleanup deleted files - deleted = cleanup_deleted_files(con) - - print(f"\nSummary: {indexed} indexed, {skipped} skipped (unchanged), {deleted} removed (deleted files)") - - -def cmd_recent(args, con: duckdb.DuckDBPyConnection): - """Recent sessions command handler.""" - limit = args.limit or 10 - - query = "SELECT file_path, session_id, started_at, duration_seconds, model, cwd, git_branch, message_count, tool_count FROM sessions" - params = [] - - conditions = [] - if args.project: - conditions.append("cwd ILIKE ?") - params.append(f"%{args.project}%") - - if args.since: - # Parse duration like "7d", "24h" - since = args.since.lower() - try: - if since.endswith('d'): - days = int(since[:-1]) - cutoff = datetime.now() - timedelta(days=days) - elif since.endswith('h'): - hours = int(since[:-1]) - cutoff = datetime.now() - timedelta(hours=hours) - else: - print(f"Invalid --since format: {args.since}. Use '7d' or '24h'", file=sys.stderr) - sys.exit(1) - except ValueError: - print(f"Invalid --since value: {args.since}. Use format like '7d' or '24h'", file=sys.stderr) - sys.exit(1) - conditions.append("started_at >= ?") - params.append(cutoff) - - if conditions: - query += " WHERE " + " AND ".join(conditions) - - query += " ORDER BY started_at DESC LIMIT ?" - params.append(limit) - - results = con.execute(query, params).fetchall() - - if not results: - print("No sessions found.") - return - - for row in results: - file_path, session_id, started_at, duration, model, cwd, git_branch, msg_count, tool_count = row - duration_str = f"{duration // 60}m {duration % 60}s" if duration else "?" - date_str = started_at.strftime('%Y-%m-%d %H:%M') if started_at else "?" - cwd_short = Path(cwd).name if cwd else "?" - - print(f"{date_str} | {duration_str:>8} | {msg_count:>3} msgs | {tool_count:>4} tools | {cwd_short}") - print(f" {file_path}") - print() - - -def cmd_search(args, con: duckdb.DuckDBPyConnection): - """Search command handler.""" - query_text = args.query - limit = args.limit or 20 - - query = """ - SELECT DISTINCT s.file_path, s.started_at, s.cwd, s.git_branch, - m.content, m.role - FROM messages m - JOIN sessions s ON m.file_path = s.file_path - WHERE m.content ILIKE ? - """ - params = [f"%{query_text}%"] - - if args.cwd: - query += " AND s.cwd ILIKE ?" - params.append(f"%{args.cwd}%") - - query += " ORDER BY s.started_at DESC LIMIT ?" - params.append(limit) - - results = con.execute(query, params).fetchall() - - if not results: - print(f"No matches for '{query_text}'") - return - - current_file = None - for row in results: - file_path, started_at, cwd, git_branch, content, role = row - - if file_path != current_file: - current_file = file_path - date_str = started_at.strftime('%Y-%m-%d %H:%M') if started_at else "?" - cwd_short = Path(cwd).name if cwd else "?" - print(f"\n{'='*60}") - print(f"{date_str} | {cwd_short} | {git_branch or '?'}") - print(f" {file_path}") - - # Show context around match - content_lower = content.lower() - query_lower = query_text.lower() - idx = content_lower.find(query_lower) - if idx >= 0: - start = max(0, idx - 50) - end = min(len(content), idx + len(query_text) + 50) - snippet = content[start:end].replace('\n', ' ') - if start > 0: - snippet = "..." + snippet - if end < len(content): - snippet = snippet + "..." - print(f" [{role}] {snippet}") - - -def cmd_show(args, con: duckdb.DuckDBPyConnection): - """Show session command handler.""" - file_path = args.file_path - - # Check if session exists - session = con.execute(""" - SELECT file_path, session_id, started_at, ended_at, duration_seconds, - model, cwd, git_branch, message_count, tool_count - FROM sessions WHERE file_path = ? - """, [file_path]).fetchone() - - if not session: - print(f"Session not found: {file_path}", file=sys.stderr) - sys.exit(1) - - file_path, session_id, started_at, ended_at, duration, model, cwd, git_branch, msg_count, tool_count = session - - print(f"# Session: {Path(file_path).stem}") - print() - if started_at: - print(f"**Date:** {started_at.strftime('%Y-%m-%d %H:%M')}") - if duration: - hours, remainder = divmod(duration, 3600) - minutes, seconds = divmod(remainder, 60) - if hours > 0: - print(f"**Duration:** {hours}h {minutes}m {seconds}s") - elif minutes > 0: - print(f"**Duration:** {minutes}m {seconds}s") - else: - print(f"**Duration:** {seconds}s") - if model: - print(f"**Model:** {model}") - if cwd: - print(f"**Working Directory:** {cwd}") - if git_branch: - print(f"**Git Branch:** {git_branch}") - print(f"**Messages:** {msg_count}") - print(f"**Tool Calls:** {tool_count}") - print() - print("---") - print() - - if args.summary: - # Get first user message as preview - first_msg = con.execute(""" - SELECT content FROM messages - WHERE file_path = ? AND role = 'user' AND LENGTH(content) > 20 - ORDER BY message_idx LIMIT 1 - """, [file_path]).fetchone() - - if first_msg: - preview = first_msg[0][:500].replace('\n', ' ') - if len(first_msg[0]) > 500: - preview += "..." - print(f"**First prompt:** {preview}") - return - - # Full transcript - messages = con.execute(""" - SELECT message_idx, role, content, has_thinking - FROM messages WHERE file_path = ? - ORDER BY message_idx - """, [file_path]).fetchall() - - for msg_idx, role, content, thinking in messages: - role_header = "## User" if role == 'user' else "## Assistant" - print(role_header) - print() - if content: - print(content) - print() - - -def main(): - parser = argparse.ArgumentParser(description='DuckDB-based transcript indexer') - parser.add_argument('--db', type=str, help=f'Database path (default: {DEFAULT_DB_PATH})') - - subparsers = parser.add_subparsers(dest='command', required=True) - - # index command - index_parser = subparsers.add_parser('index', help='Index session files') - index_parser.add_argument('--path', type=str, help='Sessions directory (default: auto-detect)') - index_parser.add_argument('--full', action='store_true', help='Force full reindex') - index_parser.add_argument('--limit', type=int, help='Limit number of files to process') - index_parser.add_argument('--quiet', '-q', action='store_true', help='Quiet mode') - - # recent command - recent_parser = subparsers.add_parser('recent', help='List recent sessions') - recent_parser.add_argument('--limit', '-n', type=int, default=10, help='Number of sessions') - recent_parser.add_argument('--project', type=str, help='Filter by project (cwd contains)') - recent_parser.add_argument('--since', type=str, help='Filter by time (e.g., 7d, 24h)') - - # search command - search_parser = subparsers.add_parser('search', help='Search sessions') - search_parser.add_argument('query', type=str, help='Search query') - search_parser.add_argument('--cwd', type=str, help='Filter by working directory') - search_parser.add_argument('--limit', '-n', type=int, default=20, help='Max results') - - # show command - show_parser = subparsers.add_parser('show', help='Show session transcript') - show_parser.add_argument('file_path', type=str, help='Session file path') - show_parser.add_argument('--summary', action='store_true', help='Summary only') - - args = parser.parse_args() - - # Setup database - db_path = Path(args.db) if args.db else DEFAULT_DB_PATH - db_path.parent.mkdir(parents=True, exist_ok=True) - - con = duckdb.connect(str(db_path)) - con.execute(SCHEMA) - - # Dispatch command - if args.command == 'index': - cmd_index(args, con) - elif args.command == 'recent': - cmd_recent(args, con) - elif args.command == 'search': - cmd_search(args, con) - elif args.command == 'show': - cmd_show(args, con) - - con.close() - - -if __name__ == '__main__': - main() diff --git a/.claude/skills/git-best-practices/SKILL.md b/.claude/skills/git-best-practices/SKILL.md index 13c04df..7b4acd6 100644 --- a/.claude/skills/git-best-practices/SKILL.md +++ b/.claude/skills/git-best-practices/SKILL.md @@ -1,6 +1,6 @@ --- name: git-best-practices -description: Git workflow patterns for commits, branching, PRs, and history management across heterogeneous repositories. Use when creating commits, managing branches, opening pull requests, or rewriting history. Do not use for non-git implementation tasks or repo-specific release policy decisions without repository documentation. +description: Use when creating commits, managing branches, opening PRs, or rewriting history. Not for non-git implementation tasks or repo-specific release policy decisions. --- # Git Best Practices @@ -16,17 +16,15 @@ When this skill is loaded, follow these directives for all git operations: ## Agent Git Workflow -Follow this sequence when performing git operations: - -1. **Check state** — run `git status` and `git diff HEAD`; output: working tree and unstaged/staged delta -2. **Discover branches** — identify and store default/current/(optional) production branch names (see Branch Discovery) +1. **Check state** — run `git status` and `git diff HEAD` +2. **Discover branches** — identify default/current/(optional) production branch names (see Branch Discovery) 3. **Stage by name** — `git add path/to/file` for each file; verify with `git status` 4. **Write a conventional commit** — `type(scope): description` with optional body -5. **Push safely** — use regular push by default; use `git push --force-with-lease origin {branch}` only for rewritten history and only after user confirmation +5. **Push safely** — regular push by default; `git push --force-with-lease origin {branch}` only for rewritten history after user confirmation ### Checkpoint Commits -Agents may create WIP checkpoint commits during long-running tasks. These are development artifacts, cleaned up before PR. +Agents may create WIP checkpoint commits during long-running tasks, cleaned up before PR. - Prefix with `wip:` or use standard conventional commit format - Keep changes logically grouped even in WIP state @@ -82,84 +80,7 @@ Add a body when: - Multi-part changes benefit from a bullet list - External context is needed (links, issue references, root cause) -### Examples - - - - -Single-line fix, no body needed: - -``` -fix(shell): restore Alt+F terminal navigation -``` - - - -Non-obvious fix with body explaining root cause: - -``` -fix(shell): use HOMEBREW_PREFIX to avoid path_helper breaking plugins in login shells - -macOS path_helper reorders PATH in login shells, putting /usr/local/bin -before /opt/homebrew/bin. This caused `brew --prefix` to resolve the stale -Intel Homebrew, so fzf, zsh-autosuggestions, and zsh-syntax-highlighting -all silently failed to load in Ghostty (which spawns login shells). - -Use the HOMEBREW_PREFIX env var (set by brew shellenv in .zshenv) instead -of calling `brew --prefix` — it survives path_helper and is faster. -``` - - - -Feature with bullet-list body for multi-part changes: - -``` -feat(install): add claude bootstrap runtime management - -- migrate Claude defaults to declarative files under claude/defaults -- add claude-bootstrap check/fix/uninstall with backup-first migration -- stop stowing full claude/codex runtime trees and tighten drift checks -``` - - - -Monorepo commit with ticket reference in branch and scope: - -``` -fix(pool-party): handle stale settlement state on reconnect - -PoolSettlement contract stays in pending state when the participant -disconnects mid-settlement. Check settlement timestamp and expire -stale entries on reconnect. - -Fixes SEND-718 -``` - - - -Submodule update with downstream commit info: - -``` -chore(submodule): update claude-code - -Bump claude-code to 88d0c75 (feat(skills): add tiltup, specalign, and e2e skills). -``` - -For trivial bumps, `bump` or `bump claude-code submodule` is acceptable. - - - -Breaking change using `!` suffix: - -``` -refactor(api)!: change auth endpoint response format - -The /auth/token endpoint now returns { access_token, expires_in } -instead of { token, expiry }. All clients must update their parsers. -``` - - - +See git-examples.md for commit message examples. ## Branch Discovery @@ -176,22 +97,7 @@ git branch --show-current git branch -r --list 'origin/main' 'origin/master' 'origin/production' ``` -**Fallback when `gh` is unavailable or the repo has no remote:** - -```bash -# Infer default branch from local refs -git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' - -# Last resort: check local branches and fail loudly if unknown -if git rev-parse --verify main >/dev/null 2>&1; then - echo main -elif git rev-parse --verify master >/dev/null 2>&1; then - echo master -else - echo "ERROR: unable to determine default branch (main/master not found)." >&2 - exit 1 -fi -``` +If `gh` is unavailable or the repo has no remote, see the fallback commands in git-examples.md. Store the discovered branch name and reference it throughout. Use the actual branch name in all subsequent commands. diff --git a/.claude/skills/git-best-practices/git-examples.md b/.claude/skills/git-best-practices/git-examples.md new file mode 100644 index 0000000..d9aa7a7 --- /dev/null +++ b/.claude/skills/git-best-practices/git-examples.md @@ -0,0 +1,99 @@ +# Git Examples Reference + +## Commit Message Examples + + + + +Single-line fix, no body needed: + +``` +fix(shell): restore Alt+F terminal navigation +``` + + + +Non-obvious fix with body explaining root cause: + +``` +fix(shell): use HOMEBREW_PREFIX to avoid path_helper breaking plugins in login shells + +macOS path_helper reorders PATH in login shells, putting /usr/local/bin +before /opt/homebrew/bin. This caused `brew --prefix` to resolve the stale +Intel Homebrew, so fzf, zsh-autosuggestions, and zsh-syntax-highlighting +all silently failed to load in Ghostty (which spawns login shells). + +Use the HOMEBREW_PREFIX env var (set by brew shellenv in .zshenv) instead +of calling `brew --prefix` — it survives path_helper and is faster. +``` + + + +Feature with bullet-list body for multi-part changes: + +``` +feat(install): add claude bootstrap runtime management + +- migrate Claude defaults to declarative files under claude/defaults +- add claude-bootstrap check/fix/uninstall with backup-first migration +- stop stowing full claude/codex runtime trees and tighten drift checks +``` + + + +Monorepo commit with ticket reference in branch and scope: + +``` +fix(pool-party): handle stale settlement state on reconnect + +PoolSettlement contract stays in pending state when the participant +disconnects mid-settlement. Check settlement timestamp and expire +stale entries on reconnect. + +Fixes SEND-718 +``` + + + +Submodule update with downstream commit info: + +``` +chore(submodule): update claude-code + +Bump claude-code to 88d0c75 (feat(skills): add tiltup, specalign, and e2e skills). +``` + +For trivial bumps, `bump` or `bump claude-code submodule` is acceptable. + + + +Breaking change using `!` suffix: + +``` +refactor(api)!: change auth endpoint response format + +The /auth/token endpoint now returns { access_token, expires_in } +instead of { token, expiry }. All clients must update their parsers. +``` + + + + +## Branch Discovery Fallback + +Use when `gh` is unavailable or the repo has no remote: + +```bash +# Infer default branch from local refs +git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' + +# Last resort: check local branches and fail loudly if unknown +if git rev-parse --verify main >/dev/null 2>&1; then + echo main +elif git rev-parse --verify master >/dev/null 2>&1; then + echo master +else + echo "ERROR: unable to determine default branch (main/master not found)." >&2 + exit 1 +fi +``` diff --git a/.claude/skills/git-rebase-sync/SKILL.md b/.claude/skills/git-rebase-sync/SKILL.md index 5fa9af7..896cdef 100644 --- a/.claude/skills/git-rebase-sync/SKILL.md +++ b/.claude/skills/git-rebase-sync/SKILL.md @@ -1,6 +1,6 @@ --- name: git-rebase-sync -description: Sync a feature branch onto the latest origin base branch via git rebase, with safety rails, deliberate conflict resolution, and safe force-with-lease pushing. +description: Use when syncing a feature branch onto the latest origin base branch via git rebase. metadata: short-description: Rebase branch sync --- diff --git a/.claude/skills/git-worktree-tidy/SKILL.md b/.claude/skills/git-worktree-tidy/SKILL.md new file mode 100644 index 0000000..361f903 --- /dev/null +++ b/.claude/skills/git-worktree-tidy/SKILL.md @@ -0,0 +1,127 @@ +--- +name: git-worktree-tidy +description: Fetch latest from origin, prune remote-tracking refs, delete stale local branches and worktrees, and fast-forward important branches. Use when tidying up a worktree-based repo layout. +metadata: + short-description: Worktree hygiene cleanup +--- + +# git-worktree-tidy + +Routine hygiene for bare-repo + worktree layouts. Fetches origin, prunes +gone branches and orphaned worktrees, and fast-forwards important local +branches. + +## When to use + +User asks to "fetch prune", "clean up stale branches/worktrees", or +"update main/dev to latest" in a worktree-based repo. + +## Hard Rules + +- All destructive actions (worktree remove, branch delete) require user + confirmation. Present the full list and wait. +- Never force-delete a worktree with uncommitted changes without explicit + approval. Flag dirty worktrees separately. +- Use `--ff-only` when updating branches. If ff-only fails, stop and ask. +- Operate from the `.bare` directory (or repo root) for branch/worktree + management commands. + +## Workflow + +### 1) Locate the bare root + +Determine the bare repo directory: +- If cwd contains `.bare/`, use it +- Otherwise: `git rev-parse --git-common-dir` + +All branch and worktree management commands run from this directory. + +### 2) Fetch + prune + +```bash +git fetch --prune origin +``` + +Report what was pruned (deleted remote-tracking branches, updated refs). + +### 3) Discover stale branches + +```bash +git branch -vv | grep ': gone]' +``` + +Collect branch names whose upstream is gone. + +### 4) Discover stale worktrees + +```bash +git worktree list +git worktree prune --dry-run +``` + +Cross-reference worktrees against the gone-branch list. Check each stale +worktree for dirty state: + +```bash +cd && git status --short +``` + +Categorize: +- **Clean + gone**: safe to remove +- **Dirty + gone**: flag for user review +- **Prunable metadata**: orphaned worktree entries (directory already gone) + +### 5) Confirm deletions + +Present a summary table: + +``` +Stale worktrees to remove: + () [clean] + () [dirty — N uncommitted changes] + +Stale branches to delete: + + +Prunable worktree metadata: + +``` + +Wait for user confirmation before proceeding. + +### 6) Remove stale worktrees + +For each confirmed worktree: + +```bash +git worktree remove +``` + +If removal fails (dirty), report and skip unless user approved force. + +### 7) Delete stale branches + +```bash +git branch -D ... +``` + +### 8) Prune worktree metadata + +```bash +git worktree prune -v +``` + +### 9) Update important branches + +Identify which branches have dedicated worktrees for `main`, `dev`, or +other important branches (user may specify). For each: + +```bash +cd && git pull --ff-only origin +``` + +If ff-only fails, report the divergence and ask for guidance. + +### 10) Final status + +Show a summary: what was removed, what was updated, any items skipped. diff --git a/.claude/skills/go-best-practices/SKILL.md b/.claude/skills/go-best-practices/SKILL.md index 12d407b..b4d2fab 100644 --- a/.claude/skills/go-best-practices/SKILL.md +++ b/.claude/skills/go-best-practices/SKILL.md @@ -1,44 +1,16 @@ --- name: go-best-practices -description: Provides Go patterns for type-first development with custom types, interfaces, functional options, and error handling. Must use when reading or writing Go files. +description: Use when reading or writing Go files (.go, go.mod). --- # Go Best Practices -## Type-First Development +Follows type-first, functional, and error handling patterns from CLAUDE.md. This skill covers language-specific idioms only. -Types define the contract before implementation. Follow this workflow: - -1. **Define data structures** - structs and interfaces first -2. **Define function signatures** - parameters, return types, and error conditions -3. **Implement to satisfy types** - let the compiler guide completeness -4. **Validate at boundaries** - check inputs where data enters the system - -### Make Illegal States Unrepresentable +## Make Illegal States Unrepresentable Use Go's type system to prevent invalid states at compile time. -**Structs for domain models:** -```go -// Define the data model first -type User struct { - ID UserID - Email string - Name string - CreatedAt time.Time -} - -type CreateUserRequest struct { - Email string - Name string -} - -// Functions follow from the types -func CreateUser(req CreateUserRequest) (*User, error) { - // implementation -} -``` - **Custom types for domain primitives:** ```go // Distinct types prevent mixing up IDs @@ -49,10 +21,6 @@ func GetUser(id UserID) (*User, error) { // Compiler prevents passing OrderID here } -func NewUserID(raw string) UserID { - return UserID(raw) -} - // Methods attach behavior to the type func (id UserID) String() string { return string(id) @@ -62,22 +30,18 @@ func (id UserID) String() string { **Interfaces for behavior contracts:** ```go // Define what you need, not what you have -type Reader interface { - Read(p []byte) (n int, err error) -} - type UserRepository interface { GetByID(ctx context.Context, id UserID) (*User, error) Save(ctx context.Context, user *User) error } // Accept interfaces, return structs -func ProcessInput(r Reader) ([]byte, error) { +func ProcessInput(r io.Reader) ([]byte, error) { return io.ReadAll(r) } ``` -**Enums with iota:** +**Enums with iota and exhaustive switch:** ```go type Status int @@ -87,20 +51,6 @@ const ( StatusPending ) -func (s Status) String() string { - switch s { - case StatusActive: - return "active" - case StatusInactive: - return "inactive" - case StatusPending: - return "pending" - default: - return fmt.Sprintf("Status(%d)", s) - } -} - -// Exhaustive handling in switch func ProcessStatus(s Status) (string, error) { switch s { case StatusActive: @@ -120,28 +70,16 @@ func ProcessStatus(s Status) (string, error) { type ServerOption func(*Server) func WithPort(port int) ServerOption { - return func(s *Server) { - s.port = port - } -} - -func WithTimeout(d time.Duration) ServerOption { - return func(s *Server) { - s.timeout = d - } + return func(s *Server) { s.port = port } } func NewServer(opts ...ServerOption) *Server { - s := &Server{ - port: 8080, // sensible defaults - timeout: 30 * time.Second, - } + s := &Server{port: 8080, timeout: 30 * time.Second} for _, opt := range opts { opt(s) } return s } - // Usage: NewServer(WithPort(3000), WithTimeout(time.Minute)) ``` @@ -153,65 +91,25 @@ type Timestamps struct { } type User struct { - Timestamps // embedded - User has CreatedAt, UpdatedAt + Timestamps // User gains CreatedAt, UpdatedAt ID UserID Email string } ``` -## Module Structure - -Prefer smaller files within packages: one type or concern per file. Split when a file handles multiple unrelated types or exceeds ~300 lines. Keep tests in `_test.go` files alongside implementation. Package boundaries define the API; internal organization is flexible. - -## Functional Patterns - -- Use value receivers when methods don't mutate state; reserve pointer receivers for mutation. -- Avoid package-level mutable variables; pass dependencies explicitly via function parameters. -- Return new structs/slices rather than mutating inputs; makes data flow explicit. -- Use closures and higher-order functions where they simplify code (e.g., `sort.Slice`, iterators). +## Go-Specific Error Handling -## Instructions - -- Return errors with context using `fmt.Errorf` and `%w` for wrapping. This preserves the error chain for debugging. -- Every function returns a value or an error; unimplemented paths return descriptive errors. Explicit failures are debuggable. -- Handle all branches in `switch` statements; include a `default` case that returns an error. Exhaustive handling prevents silent bugs. -- Pass `context.Context` to external calls with explicit timeouts. Runaway requests cause cascading failures. -- Reserve `panic` for truly unrecoverable situations; prefer returning errors. Panics crash the program. -- Add or update table-driven tests for new logic; cover edge cases (empty input, nil, boundaries). - -## Examples - -Explicit failure for unimplemented logic: -```go -func buildWidget(widgetType string) (*Widget, error) { - return nil, fmt.Errorf("buildWidget not implemented for type: %s", widgetType) -} -``` - -Wrap errors with context to preserve the chain: +Wrap errors with `%w` to preserve the chain for `errors.Is` / `errors.As`: ```go out, err := client.Do(ctx, req) if err != nil { return nil, fmt.Errorf("fetch widget failed: %w", err) } -return out, nil ``` -Exhaustive switch with default error: -```go -func processStatus(status string) (string, error) { - switch status { - case "active": - return "processing", nil - case "inactive": - return "skipped", nil - default: - return "", fmt.Errorf("unhandled status: %s", status) - } -} -``` +## Structured Logging -Structured logging with slog: +Use `log/slog` with structured key-value pairs: ```go import "log/slog" @@ -224,46 +122,3 @@ func createWidget(name string) (*Widget, error) { return widget, nil } ``` - -## Configuration - -- Load config from environment variables at startup; validate required values before use. Missing config should cause immediate exit. -- Define a Config struct as single source of truth; avoid `os.Getenv` scattered throughout code. -- Use sensible defaults for development; require explicit values for production secrets. - -### Examples - -Typed config struct: -```go -type Config struct { - Port int - DatabaseURL string - APIKey string - Env string -} - -func LoadConfig() (*Config, error) { - dbURL := os.Getenv("DATABASE_URL") - if dbURL == "" { - return nil, fmt.Errorf("DATABASE_URL is required") - } - apiKey := os.Getenv("API_KEY") - if apiKey == "" { - return nil, fmt.Errorf("API_KEY is required") - } - port := 3000 - if p := os.Getenv("PORT"); p != "" { - var err error - port, err = strconv.Atoi(p) - if err != nil { - return nil, fmt.Errorf("invalid PORT: %w", err) - } - } - return &Config{ - Port: port, - DatabaseURL: dbURL, - APIKey: apiKey, - Env: getEnvOrDefault("ENV", "development"), - }, nil -} -``` diff --git a/.claude/skills/improve/SKILL.md b/.claude/skills/improve/SKILL.md new file mode 100644 index 0000000..26ee7fd --- /dev/null +++ b/.claude/skills/improve/SKILL.md @@ -0,0 +1,119 @@ +--- +name: improve +description: Use after completing a task, before claiming done, before handoff, or when asked "what are some improvements?" Surfaces concrete suggestions grounded in session observations. +--- + +# Improve + +Structured improvement pass grounded in what was observed during this session. + +## Principles (Always Active) + +- Every suggestion must cite a specific observation — file path, error seen, pattern noticed, or behavior verified. "I saw X" beats "common practice suggests Y." +- Distinguish grounded suggestions (observed evidence) from speculative ones (general knowledge). Mark speculative suggestions explicitly. +- Prioritize by impact to the user, not by ease of description. +- Do not suggest improvements that would change public API contracts, auth boundaries, or data schemas without flagging as high-risk. +- Simplicity is a feature. Do not suggest adding complexity unless it prevents a concrete problem observed in this session. + +## When to Use + +- User asks "knowing what you know now, what are some improvements?" or similar +- Before claiming done in a ralph loop (self-check) +- Before generating a handoff +- After completing a significant chunk of work +- When reviewing code that was just written or modified + +## When Not to Use + +- At the start of a session before meaningful work has been done +- When the user explicitly asks to skip review and ship +- For architectural redesigns (use spec/plan flow instead) + +## Dimensions + +Analyze across these dimensions. Skip any dimension with nothing grounded to say. + +### Correctness +Error handling gaps, unhandled edge cases, type safety issues, race conditions, missing validation at system boundaries. + +### Simplicity +Unnecessary indirection, dead code, naming that obscures intent, over-engineered abstractions, complexity that doesn't earn its keep. Reference the `simplify` skill for implementation. + +### Security +Auth boundary gaps, input validation holes, secret handling issues, OWASP concerns observed in touched code. + +### Test Coverage +Untested code paths observed during the session, missing test layers (unit/integration/e2e), fixture realism gaps, assertions that don't verify meaningful behavior. + +### Performance +Bottlenecks observed during the session (slow queries, N+1 patterns, unnecessary recomputation). Only flag what was actually observed, not hypothetical. + +### Developer Experience +API ergonomics issues, missing documentation for non-obvious behavior, CLI friction, configuration that should be extracted. + +## Workflow + +1. **Gather context**: Review what happened this session — files changed, commands run, errors encountered, tests written, patterns established. + +2. **Analyze each dimension**: For each, check whether session observations surface anything concrete. Skip dimensions with nothing to say. + +3. **Structure each suggestion**: + +``` +[IMP-N] effort/gate: summary + Observation: what was seen (file:line, error message, test gap, etc.) + Suggestion: concrete change + Grounded: yes/no +``` + +Where: +- `effort`: `trivial` (< 5 min) | `small` (< 30 min) | `medium` (hours) +- `gate`: which delivery gate this strengthens: `TDD` | `DEV` | `E2E` | `REVIEW` | `CI` + +4. **Sort by impact** (highest first), not by dimension. + +5. **Present as actionable list**. If in a ralph loop, address trivial/small improvements inline before claiming done. Log medium improvements as decisions for the user to prioritize. + +## Integration with Ralph Loops + +When running inside a ralph loop (`.rl/state.json` exists): + +- Run the improvement pass before `.rl/rl done` +- Address `trivial` improvements inline — just fix them +- Address `small` improvements if they're within the current milestone scope +- Log `medium` improvements: `.rl/rl log decision "IMP-N deferred: [reason]"` +- Do not expand scope beyond the task's milestones for medium improvements + +## Integration with Handoffs + +When generating a handoff, include unaddressed improvements in the `` section. Use the same `[IMP-N]` format so the receiving agent can reference them. + +## Output Format + +```markdown +## Improvements + +N suggestions (X grounded, Y speculative) + +[IMP-1] trivial/DEV: Extract magic number to config constant + Observation: `src/server.ts:42` uses hardcoded port 3000, but other services read from env + Suggestion: Read from `PORT` env var with 3000 as default + Grounded: yes + +[IMP-2] small/TDD: Add boundary test for empty input + Observation: `processItems([])` path untested — saw it handle non-empty arrays only in test suite + Suggestion: Add test case for empty array input in `process-items.test.ts` + Grounded: yes + +[IMP-3] medium/E2E: Add timeout handling for external API calls + Observation: Catalog search took 1.2s avg in profiling — no timeout configured + Suggestion: Add configurable timeout with bounded retry + Grounded: yes +``` + +## Red Flags + +- Suggestions without observations — if you can't point to something specific, don't suggest it +- More than 10 suggestions — focus on the top 5-7 by impact +- Suggestions that expand scope beyond what was touched — stay within the blast radius of the current work +- Generic "add more tests" without specifying which paths are untested diff --git a/.claude/skills/ios-device-screenshot/SKILL.md b/.claude/skills/ios-device-screenshot/SKILL.md index 7350307..681a892 100644 --- a/.claude/skills/ios-device-screenshot/SKILL.md +++ b/.claude/skills/ios-device-screenshot/SKILL.md @@ -1,6 +1,6 @@ --- name: ios-device-screenshot -description: Take screenshots from physical iOS devices connected via USB using pymobiledevice3. Use when capturing screenshots from real iPhones/iPads (not simulators), debugging on-device, or needing high-fidelity device captures. Triggers on physical iOS device screenshots, pymobiledevice3 usage, or USB-connected device capture tasks. +description: Use when capturing screenshots from physical iOS devices connected via USB using pymobiledevice3. --- # iOS Device Screenshot diff --git a/.claude/skills/nix-best-practices/SKILL.md b/.claude/skills/nix-best-practices/SKILL.md index 44866c1..afc659c 100644 --- a/.claude/skills/nix-best-practices/SKILL.md +++ b/.claude/skills/nix-best-practices/SKILL.md @@ -1,6 +1,6 @@ --- name: nix-best-practices -description: Nix patterns for flakes, overlays, unfree handling, and binary overlays. Use when working with flake.nix or shell.nix. +description: Use when working with Nix flakes, overlays, shell.nix, or flake.nix files. --- # Nix Best Practices diff --git a/.claude/skills/op-cli/SKILL.md b/.claude/skills/op-cli/SKILL.md index ba5e111..b75e15a 100644 --- a/.claude/skills/op-cli/SKILL.md +++ b/.claude/skills/op-cli/SKILL.md @@ -1,6 +1,6 @@ --- name: op-cli -description: Secure 1Password CLI patterns for reading secrets, discovering vaults/items, and piping credentials to other tools. Use when reading from 1Password, rotating secrets, or piping credentials to wrangler/kubectl/etc. Triggers on op CLI, 1Password, secret rotation, or credential piping tasks. +description: Use when reading from 1Password, discovering vaults/items, rotating secrets, or piping credentials to other tools via op CLI. --- # 1Password CLI (`op`) — Secure Handling diff --git a/.claude/skills/openai-image-gen/SKILL.md b/.claude/skills/openai-image-gen/SKILL.md index 855eee8..518f7c6 100644 --- a/.claude/skills/openai-image-gen/SKILL.md +++ b/.claude/skills/openai-image-gen/SKILL.md @@ -1,6 +1,6 @@ --- name: openai-image-gen -description: Generate images using OpenAI's DALL-E 3 API. Use when needing to create graphics, icons, backgrounds, or any visual assets. Requires OPENAI_API_KEY in environment. +description: Use when generating images, graphics, icons, or visual assets via OpenAI DALL-E 3 API. Requires OPENAI_API_KEY. --- # OpenAI Image Generation (DALL-E 3) diff --git a/.claude/skills/orbstack-best-practices/SKILL.md b/.claude/skills/orbstack-best-practices/SKILL.md index 4f1e9c0..e38c531 100644 --- a/.claude/skills/orbstack-best-practices/SKILL.md +++ b/.claude/skills/orbstack-best-practices/SKILL.md @@ -1,192 +1,78 @@ --- name: orbstack-best-practices -description: Patterns for OrbStack Linux VMs and Docker on macOS. Covers orbctl/orb commands, machine lifecycle, cloud-init, networking, file sharing, and SSH access. Must use when working with OrbStack, orbctl commands, or Linux VMs on macOS. +description: Use when working with OrbStack Linux VMs, Docker on macOS, orbctl commands, or orb machine lifecycle. --- # OrbStack Best Practices -OrbStack is a fast, lightweight Docker and Linux VM runtime for macOS. Replaces Docker Desktop with better performance and seamless macOS integration. +OrbStack is a fast Docker and Linux VM runtime for macOS. Replaces Docker Desktop with better performance and seamless macOS integration. ## Core Commands ```bash -# Start/stop -orb # Start + open default machine shell -orb start # Start OrbStack -orb stop # Stop OrbStack +# Machine lifecycle +orb list # List machines +orb create ubuntu:noble myvm # Create (distro:version name) +orb create --arch amd64 ubuntu x86vm # x86 emulation via Rosetta +orb create ubuntu myvm -c cloud.yml # With cloud-init +orb start/stop/restart/delete myvm +orb default myvm # Set default machine -# Machine management -orb list # List machines -orb create ubuntu # Create with latest version -orb create ubuntu:jammy myvm # Specific version + name -orb create --arch amd64 ubuntu intel # x86 on Apple Silicon -orb delete myvm # Delete machine - -# Shell access -orb # Default machine shell -orb -m myvm # Specific machine -orb -u root # As root -orb -m myvm -u root # Combined - -# Run commands -orb uname -a # Run in default machine -orb -m myvm ./script.sh # Run in specific machine +# Shell and exec +orb # Shell into default machine +orb -m myvm -u root # Specific machine + user +orb -m myvm ./script.sh # Run command in machine # File transfer -orb push ~/local.txt # Copy to Linux -orb pull ~/remote.txt # Copy from Linux -orb push -m vm ~/f.txt /dest/ # Push to specific machine/path +orb push ~/local.txt # Copy to default machine home +orb pull ~/remote.txt # Copy from default machine +orb push -m vm ~/f.txt /tmp/ # Specific machine + path # Docker/K8s orb restart docker # Restart Docker engine orb logs docker # Docker engine logs -orb start k8s # Start Kubernetes -orb delete k8s # Delete K8s cluster +orb start k8s / orb delete k8s # Config -orb config set memory_mib 8192 # Set memory limit -orb config docker # Edit daemon.json +orb config set memory_mib 8192 +orb config set cpu 4 +orb config set rosetta true +orb config set network_proxy http://proxy:8080 ``` ## Key Paths -| Path | Description | -|------|-------------| -| `~/OrbStack//` | Linux files from macOS | -| `~/OrbStack/docker/volumes/` | Docker volumes from macOS | -| `/mnt/mac/Users/...` | macOS files from Linux | -| `/mnt/machines//` | Other machines from Linux | -| `~/.orbstack/ssh/id_ed25519` | SSH private key | -| `~/.orbstack/config/docker.json` | Docker daemon config | +| Location | Path | +|----------|------| +| Linux files from macOS | `~/OrbStack//` | +| Docker volumes from macOS | `~/OrbStack/docker/volumes/` | +| macOS files from Linux | `/mnt/mac/Users/...` (also at same path directly) | +| Other machines from Linux | `/mnt/machines//` | +| SSH key | `~/.orbstack/ssh/id_ed25519` | +| Docker daemon config | `~/.orbstack/config/docker.json` | -## DNS Names +## Networking (OrbStack-Specific) -| Pattern | Description | +Servers in Linux machines are **automatically available on `localhost`** on macOS — no port mapping required. + +**DNS names:** +| Pattern | Resolves to | |---------|-------------| -| `.orb.local` | Linux machine | +| `.orb.local` | Linux VM | | `.orb.local` | Docker container | | `..orb.local` | Compose service | -| `host.orb.internal` | macOS from Linux machine | -| `host.docker.internal` | macOS from container | -| `docker.orb.internal` | Docker from Linux machine | - -## Machine Lifecycle - -### Creation - -```bash -orb create ubuntu # Latest Ubuntu -orb create ubuntu:noble devbox # Ubuntu 24.04 named "devbox" -orb create --arch amd64 debian x86vm # x86 emulation via Rosetta -orb create --set-password ubuntu pwvm # With password set -orb create ubuntu myvm -c cloud.yml # With cloud-init -``` - -Supported distros: Alma, Alpine, Arch, CentOS, Debian, Devuan, Fedora, Gentoo, Kali, NixOS, openSUSE, Oracle, Rocky, Ubuntu, Void - -### Lifecycle - -```bash -orb start myvm # Start stopped machine -orb stop myvm # Stop machine -orb restart myvm # Restart -orb delete myvm # Delete permanently -orb default myvm # Set as default machine -orb logs myvm # View boot logs -``` - -## Cloud-Init - -Create machines with automated provisioning: - -```bash -orb create ubuntu myvm -c user-data.yml -``` - -Example `user-data.yml`: - -```yaml -#cloud-config -packages: - - git - - vim - - docker.io - -users: - - name: dev - groups: sudo, docker - shell: /bin/bash - sudo: ALL=(ALL) NOPASSWD:ALL - -runcmd: - - systemctl enable docker - - systemctl start docker -``` - -Debug cloud-init: - -```bash -orb logs myvm # Boot logs from macOS -orb -m myvm cloud-init status --long # Status inside machine -orb -m myvm cat /var/log/cloud-init-output.log -``` - -## Networking +| `host.orb.internal` | macOS host (from Linux machine) | +| `host.docker.internal` | macOS host (from container) | -### Port Access +All `.orb.local` domains get **zero-config HTTPS** automatically. -Servers in Linux machines are automatically on `localhost`: +Custom container domain: `docker run -l dev.orbstack.domains=myapp.local nginx` -```bash -# In Linux: python3 -m http.server 8000 -# From macOS: curl localhost:8000 or curl myvm.orb.local:8000 -``` - -### Connecting from Linux to macOS - -```bash -# From Linux machine -curl host.orb.internal:3000 - -# From Docker container -curl host.docker.internal:3000 -``` - -### VPN/Proxy - -- Fully VPN-compatible with automatic DNS handling -- Follows macOS proxy settings automatically -- Custom proxy: `orb config set network_proxy http://proxy:8080` -- Disable: `orb config set network_proxy none` - -## File Sharing - -### macOS Files from Linux - -```bash -# Same paths work -cat /Users/allen/file.txt -cat /mnt/mac/Users/allen/file.txt # Explicit prefix -``` - -### Linux Files from macOS - -```bash -ls ~/OrbStack/myvm/home/user/ -ls ~/OrbStack/docker/volumes/myvolume/ -``` - -### Transfer Commands - -```bash -orb push ~/local.txt # To default machine home -orb pull ~/remote.txt # From default machine -orb push -m vm ~/f.txt /tmp/ # To specific path -``` +VPN-compatible; follows macOS proxy settings automatically. ## SSH Access -Built-in multiplexed SSH server (no per-machine setup): +Single multiplexed SSH server — no per-machine setup needed: ```bash ssh orb # Default machine @@ -194,144 +80,34 @@ ssh myvm@orb # Specific machine ssh user@myvm@orb # Specific user + machine ``` -### IDE Setup - -**VS Code**: Install "Remote - SSH" extension, connect to `orb` or `myvm@orb` - -**JetBrains**: Host `localhost`, Port `32222`, Key `~/.orbstack/ssh/id_ed25519` - -### Ansible - -```ini -[servers] -myvm@orb ansible_user=ubuntu -``` - -SSH agent forwarding is automatic. +IDE config: VS Code "Remote - SSH" → `orb` or `myvm@orb`. JetBrains: host `localhost`, port `32222`, key `~/.orbstack/ssh/id_ed25519`. SSH agent forwarding is automatic. -## Docker Integration +## Docker Differences from Docker Desktop -### Container Domains +- Container domains resolve without port mapping (`web.orb.local` instead of `localhost:8080`). +- Prefer named volumes over bind mounts — data stays in Linux, no cross-filesystem overhead. +- x86 images on Apple Silicon: `docker run --platform linux/amd64 ubuntu` or `export DOCKER_DEFAULT_PLATFORM=linux/amd64`. +- SSH agent in containers: `-v /run/host-services/ssh-auth.sock:/agent.sock -e SSH_AUTH_SOCK=/agent.sock`. +- Kubernetes: all service types accessible from macOS without `kubectl port-forward`; `cluster.local` DNS works directly. -```bash -docker run --name web nginx -# Access: http://web.orb.local (no port needed for web servers) - -# Compose: ..orb.local -``` - -### HTTPS - -Zero-config HTTPS for all `.orb.local` domains: - -```bash -curl https://mycontainer.orb.local -``` - -### Custom Domains - -```bash -docker run -l dev.orbstack.domains=myapp.local nginx -``` - -### Host Networking - -```bash -docker run --net=host nginx -# localhost works both directions -``` - -### x86 Emulation - -```bash -docker run --platform linux/amd64 ubuntu -export DOCKER_DEFAULT_PLATFORM=linux/amd64 # Default to x86 -``` - -### SSH Agent in Containers - -```bash -docker run -v /run/host-services/ssh-auth.sock:/agent.sock \ - -e SSH_AUTH_SOCK=/agent.sock alpine -``` - -### Volumes vs Bind Mounts - -Prefer volumes for performance (data stays in Linux): - -```bash -docker run -v mydata:/data alpine # Volume (fast) -docker run -v ~/code:/code alpine # Bind mount (slower) -``` - -## Kubernetes - -```bash -orb start k8s # Start cluster -kubectl get nodes # kubectl included -``` - -All service types accessible from macOS without port-forward: +## macOS Commands from Linux ```bash -curl myservice.default.svc.cluster.local # cluster.local works -curl 192.168.194.20 # Pod IPs work -curl myservice.k8s.orb.local # LoadBalancer wildcard +mac open https://example.com # Open in macOS browser +mac notify "Build done" # macOS notification +ORBENV=AWS_PROFILE:EDITOR orb ./deploy.sh # Forward env vars ``` -Local images available immediately (use non-`latest` tag or `imagePullPolicy: IfNotPresent`). - ## Troubleshooting ```bash orb report # Generate diagnostic report orb logs myvm # Machine boot logs -orb logs docker # Docker engine logs -orb restart docker # Restart Docker +orb restart docker # Restart Docker engine orb reset # Factory reset (deletes everything) +docker context use orbstack # Fix "cannot connect to Docker daemon" ``` -**Cannot connect to Docker daemon**: Start OrbStack with `orb start`, or fix context with `docker context use orbstack` - -**Machine not starting**: Check `orb logs myvm`, try `orb restart myvm` - -**Rosetta x86 error**: Install x86 libc: -```bash -sudo dpkg --add-architecture amd64 -sudo apt update && sudo apt install libc6:amd64 -``` - -## Configuration - -```bash -orb config set rosetta true # Enable x86 emulation -orb config set memory_mib 8192 # Memory limit (MiB) -orb config set cpu 4 # CPU limit (cores) -orb config set network_proxy auto # Proxy (auto/none/url) -``` - -Docker daemon config at `~/.orbstack/config/docker.json`: - -```json -{ - "insecure-registries": ["registry.local:5000"], - "registry-mirrors": ["https://mirror.gcr.io"] -} -``` - -Apply with `orb restart docker`. - -## macOS Commands from Linux - -```bash -mac open https://example.com # Open URL in macOS browser -mac uname -a # Run macOS command -mac link brew # Link command for reuse -mac notify "Build done" # Send notification -``` - -Forward env vars: +**Rosetta x86 error**: `sudo dpkg --add-architecture amd64 && sudo apt install libc6:amd64` -```bash -ORBENV=AWS_PROFILE:EDITOR orb ./deploy.sh -``` +Cloud-init debug: `orb -m myvm cloud-init status --long` or `orb -m myvm cat /var/log/cloud-init-output.log` diff --git a/.claude/skills/playwright-best-practices/SKILL.md b/.claude/skills/playwright-best-practices/SKILL.md index fb5a724..87cede7 100644 --- a/.claude/skills/playwright-best-practices/SKILL.md +++ b/.claude/skills/playwright-best-practices/SKILL.md @@ -1,6 +1,6 @@ --- name: playwright-best-practices -description: Provides Playwright test patterns for resilient locators, Page Object Models, fixtures, web-first assertions, and network mocking. Must use when writing or modifying Playwright tests (.spec.ts, .test.ts files with @playwright/test imports). +description: Use when writing or modifying Playwright tests (.spec.ts, .test.ts with @playwright/test imports). --- # Playwright Best Practices @@ -9,488 +9,50 @@ description: Provides Playwright test patterns for resilient locators, Page Obje When running Playwright tests from Claude Code or any CLI agent, always use minimal reporters to prevent verbose output from consuming the context window. -**Use `--reporter=line` or `--reporter=dot` for CLI test runs:** - -```bash -# REQUIRED: Use minimal reporter to prevent context overflow -npx playwright test --reporter=line -npx playwright test --reporter=dot - -# BAD: Default reporter generates thousands of lines, floods context -npx playwright test -``` - -Configure `playwright.config.ts` to use minimal reporters by default when `CI` or `CLAUDE` env vars are set: - -```ts -reporter: process.env.CI || process.env.CLAUDE - ? [['line'], ['html', { open: 'never' }]] - : 'list', -``` +**Use `--reporter=line` or `--reporter=dot` for CLI test runs.** Configure `playwright.config.ts` to default to minimal reporters when `CI` or `CLAUDE` env vars are set — see `playwright-patterns.md` for the config snippet. ## Locator Priority (Most to Least Resilient) Always prefer user-facing attributes: -1. `page.getByRole('button', { name: 'Submit' })` - accessibility roles -2. `page.getByLabel('Email')` - form control labels -3. `page.getByPlaceholder('Search...')` - input placeholders -4. `page.getByText('Welcome')` - visible text (non-interactive) -5. `page.getByAltText('Logo')` - image alt text -6. `page.getByTitle('Settings')` - title attributes -7. `page.getByTestId('submit-btn')` - explicit test contracts -8. CSS/XPath - last resort, avoid - -```ts -// BAD: Brittle selectors tied to implementation -page.locator('button.btn-primary.submit-form') -page.locator('//div[@class="container"]/form/button') -page.locator('#app > div:nth-child(2) > button') - -// GOOD: User-facing, resilient locators -page.getByRole('button', { name: 'Submit' }) -page.getByLabel('Password') -``` - -### Chaining and Filtering - -```ts -// Scope within a region -const card = page.getByRole('listitem').filter({ hasText: 'Product A' }); -await card.getByRole('button', { name: 'Add to cart' }).click(); - -// Filter by child locator -const row = page.getByRole('row').filter({ - has: page.getByRole('cell', { name: 'John' }) -}); - -// Combine conditions -const visibleSubmit = page.getByRole('button', { name: 'Submit' }).and(page.locator(':visible')); -const primaryOrSecondary = page.getByRole('button', { name: 'Save' }).or(page.getByRole('button', { name: 'Update' })); -``` - -### Strictness - -Locators throw if multiple elements match. Use `first()`, `last()`, `nth()` only when intentional: - -```ts -// Throws if multiple buttons match -await page.getByRole('button', { name: 'Delete' }).click(); - -// Explicit selection when needed -await page.getByRole('listitem').first().click(); -await page.getByRole('row').nth(2).getByRole('button').click(); -``` - -## Web-First Assertions - -Use async assertions that auto-wait and retry: - -```ts -// BAD: No auto-wait, flaky -expect(await page.getByText('Success').isVisible()).toBe(true); - -// GOOD: Auto-waits up to timeout -await expect(page.getByText('Success')).toBeVisible(); -await expect(page.getByRole('button')).toBeEnabled(); -await expect(page.getByTestId('status')).toHaveText('Submitted'); -await expect(page).toHaveURL(/dashboard/); -await expect(page).toHaveTitle('Dashboard'); - -// Collections -await expect(page.getByRole('listitem')).toHaveCount(5); -await expect(page.getByRole('listitem')).toHaveText(['Item 1', 'Item 2', 'Item 3']); - -// Soft assertions (continue on failure, report all) -await expect.soft(locator).toBeVisible(); -await expect.soft(locator).toHaveText('Expected'); -// Test continues, failures compiled at end -``` - -## Page Object Model - -Encapsulate page interactions. Define locators as readonly properties in constructor. - -```ts -// pages/base.page.ts -import { type Page, type Locator, expect } from '@playwright/test'; -import debug from 'debug'; - -export abstract class BasePage { - protected readonly log: debug.Debugger; - - constructor( - protected readonly page: Page, - protected readonly timeout = 30_000 - ) { - this.log = debug(`test:page:${this.constructor.name}`); - } - - protected async safeClick(locator: Locator, description?: string) { - this.log('clicking: %s', description ?? locator); - await expect(locator).toBeVisible({ timeout: this.timeout }); - await expect(locator).toBeEnabled({ timeout: this.timeout }); - await locator.click(); - } - - protected async safeFill(locator: Locator, value: string) { - await expect(locator).toBeVisible({ timeout: this.timeout }); - await locator.fill(value); - } - - abstract isLoaded(): Promise; -} -``` - -```ts -// pages/login.page.ts -import { type Locator, type Page, expect } from '@playwright/test'; -import { BasePage } from './base.page'; - -export class LoginPage extends BasePage { - readonly emailInput: Locator; - readonly passwordInput: Locator; - readonly submitButton: Locator; - readonly errorMessage: Locator; - - constructor(page: Page) { - super(page); - this.emailInput = page.getByLabel('Email'); - this.passwordInput = page.getByLabel('Password'); - this.submitButton = page.getByRole('button', { name: 'Sign in' }); - this.errorMessage = page.getByRole('alert'); - } - - async goto() { - await this.page.goto('/login'); - await this.isLoaded(); - } - - async isLoaded() { - await expect(this.emailInput).toBeVisible(); - } - - async login(email: string, password: string) { - await this.safeFill(this.emailInput, email); - await this.safeFill(this.passwordInput, password); - await this.safeClick(this.submitButton, 'Sign in button'); - } - - async expectError(message: string) { - await expect(this.errorMessage).toHaveText(message); - } -} -``` - -## Fixtures - -Prefer fixtures over beforeEach/afterEach. Fixtures encapsulate setup + teardown, run on-demand, and compose with dependencies. - -```ts -// fixtures/index.ts -import { test as base, expect } from '@playwright/test'; -import { LoginPage } from '../pages/login.page'; -import { DashboardPage } from '../pages/dashboard.page'; - -type TestFixtures = { - loginPage: LoginPage; - dashboardPage: DashboardPage; -}; - -export const test = base.extend({ - loginPage: async ({ page }, use) => { - const loginPage = new LoginPage(page); - await loginPage.goto(); - await use(loginPage); - }, - - dashboardPage: async ({ page }, use) => { - await use(new DashboardPage(page)); - }, -}); - -export { expect }; -``` - -### Worker-Scoped Fixtures - -Use for expensive setup shared across tests (database connections, authenticated users): +1. `page.getByRole('button', { name: 'Submit' })` — accessibility roles +2. `page.getByLabel('Email')` — form control labels +3. `page.getByPlaceholder('Search...')` — input placeholders +4. `page.getByText('Welcome')` — visible text (non-interactive) +5. `page.getByAltText('Logo')` — image alt text +6. `page.getByTitle('Settings')` — title attributes +7. `page.getByTestId('submit-btn')` — explicit test contracts +8. CSS/XPath — last resort, avoid -```ts -// fixtures/auth.fixture.ts -import { test as base } from '@playwright/test'; +## Core Rules -type WorkerFixtures = { - authenticatedUser: { token: string; userId: string }; -}; - -export const test = base.extend<{}, WorkerFixtures>({ - authenticatedUser: [async ({}, use) => { - // Expensive setup - runs once per worker - const user = await createTestUser(); - const token = await authenticateUser(user); - - await use({ token, userId: user.id }); - - // Cleanup after all tests in worker - await deleteTestUser(user.id); - }, { scope: 'worker' }], -}); -``` - -### Automatic Fixtures - -Run for every test without explicit declaration: - -```ts -export const test = base.extend<{ autoLog: void }>({ - autoLog: [async ({ page }, use) => { - page.on('console', msg => console.log(`[browser] ${msg.text()}`)); - await use(); - }, { auto: true }], -}); -``` - -## Authentication - -Save authenticated state to reuse. Never log in via UI in every test. - -```ts -// auth.setup.ts -import { test as setup, expect } from '@playwright/test'; - -const authFile = 'playwright/.auth/user.json'; - -setup('authenticate', async ({ page }) => { - await page.goto('/login'); - await page.getByLabel('Email').fill(process.env.TEST_USER_EMAIL!); - await page.getByLabel('Password').fill(process.env.TEST_USER_PASSWORD!); - await page.getByRole('button', { name: 'Sign in' }).click(); - await page.waitForURL('/dashboard'); - await page.context().storageState({ path: authFile }); -}); -``` - -```ts -// playwright.config.ts -export default defineConfig({ - projects: [ - { name: 'setup', testMatch: /.*\.setup\.ts/ }, - { - name: 'chromium', - use: { - ...devices['Desktop Chrome'], - storageState: 'playwright/.auth/user.json', - }, - dependencies: ['setup'], - }, - ], -}); -``` - -### API Authentication (Faster) - -```ts -setup('authenticate via API', async ({ request }) => { - const response = await request.post('/api/auth/login', { - data: { email: process.env.TEST_USER_EMAIL, password: process.env.TEST_USER_PASSWORD }, - }); - expect(response.ok()).toBeTruthy(); - await request.storageState({ path: authFile }); -}); -``` - -## Network Mocking - -Set up routes before navigation. - -```ts -test('displays mocked data', async ({ page }) => { - await page.route('**/api/users', route => route.fulfill({ - json: [{ id: 1, name: 'Test User' }], - })); - - await page.goto('/users'); - await expect(page.getByText('Test User')).toBeVisible(); -}); - -// Modify real response -test('injects item into response', async ({ page }) => { - await page.route('**/api/items', async route => { - const response = await route.fetch(); - const json = await response.json(); - json.push({ id: 999, name: 'Injected' }); - await route.fulfill({ response, json }); - }); - await page.goto('/items'); -}); - -// HAR recording -test('uses recorded responses', async ({ page }) => { - await page.routeFromHAR('./fixtures/api.har', { - url: '**/api/**', - update: false, // true to record - }); - await page.goto('/'); -}); -``` - -## Test Isolation - -Each test gets fresh browser context. Never share state between tests. - -```ts -// BAD: Tests depend on each other -let userId: string; -test('create user', async ({ request }) => { - userId = (await (await request.post('/api/users', { data: { name: 'Test' } })).json()).id; -}); -test('delete user', async ({ request }) => { - await request.delete(`/api/users/${userId}`); // Depends on previous! -}); - -// GOOD: Each test creates its own data -test('can delete created user', async ({ request }) => { - const { id } = await (await request.post('/api/users', { data: { name: 'Test' } })).json(); - const deleteResponse = await request.delete(`/api/users/${id}`); - expect(deleteResponse.ok()).toBeTruthy(); -}); -``` - -## Configuration - -```ts -// playwright.config.ts -import { defineConfig, devices } from '@playwright/test'; - -export default defineConfig({ - testDir: './tests', - fullyParallel: true, - forbidOnly: !!process.env.CI, - retries: process.env.CI ? 2 : 0, - workers: process.env.CI ? 1 : undefined, - // Use minimal reporter in CI/agent contexts to prevent context overflow - reporter: process.env.CI || process.env.CLAUDE - ? [['line'], ['html', { open: 'never' }]] - : 'list', - - use: { - baseURL: process.env.BASE_URL ?? 'http://localhost:3000', - trace: 'on-first-retry', - screenshot: 'only-on-failure', - video: 'on-first-retry', - }, - - projects: [ - { name: 'setup', testMatch: /.*\.setup\.ts/ }, - { - name: 'chromium', - use: { ...devices['Desktop Chrome'] }, - dependencies: ['setup'], - }, - { - name: 'firefox', - use: { ...devices['Desktop Firefox'] }, - dependencies: ['setup'], - }, - { - name: 'webkit', - use: { ...devices['Desktop Safari'] }, - dependencies: ['setup'], - }, - ], - - webServer: { - command: 'npm run start', - url: 'http://localhost:3000', - reuseExistingServer: !process.env.CI, - }, -}); -``` - -## Project Structure - -``` -tests/ - fixtures/ # Custom fixtures (extend base test) - pages/ # Page Object Models - helpers/ # Utility functions (API clients, data factories) - auth.setup.ts # Authentication setup project - *.spec.ts # Test files -playwright/ - .auth/ # Auth state storage (gitignored) -playwright.config.ts -``` - -Organize tests by feature or user journey. Colocate page objects with tests when possible. - -## Helpers (Separate from Pages) - -```ts -// helpers/user.helper.ts -import type { Page } from '@playwright/test'; -import debug from 'debug'; - -const log = debug('test:helper:user'); - -export class UserHelper { - constructor(private page: Page) {} - - async createUser(data: { name: string; email: string }) { - log('creating user: %s', data.email); - const response = await this.page.request.post('/api/users', { data }); - return response.json(); - } - - async deleteUser(id: string) { - log('deleting user: %s', id); - await this.page.request.delete(`/api/users/${id}`); - } -} - -// helpers/data.factory.ts -export function createTestUser(overrides: Partial = {}): User { - return { - id: crypto.randomUUID(), - email: `test-${Date.now()}@example.com`, - name: 'Test User', - ...overrides, - }; -} -``` - -## Debugging - -```bash -npx playwright test --debug # Step through with inspector -npx playwright test --trace on # Record trace for all tests -npx playwright test --ui # Interactive UI mode -npx playwright codegen localhost:3000 # Generate locators interactively -npx playwright show-report # View HTML report -``` - -Enable debug logs: `DEBUG=test:* npx playwright test` +- **Web-first assertions**: always `await expect(locator).toBeVisible()`, never `expect(await locator.isVisible()).toBe(true)` — web-first matchers auto-wait and retry +- **Test isolation**: each test creates its own data; never share state between tests +- **Auth state reuse**: save authenticated state via setup project + `storageState`; never log in via UI in every test +- **Fixtures over beforeEach**: fixtures encapsulate setup + teardown, run on-demand, and compose ## Anti-Patterns -- `page.waitForTimeout(ms)` - use auto-waiting locators instead -- `page.locator('.class')` - use role/label/testid -- XPath selectors - fragile, use user-facing attributes -- Shared state between tests - each test creates own data -- UI login in every test - use setup project + storageState -- Manual assertions without await - use web-first assertions -- Hardcoded waits - rely on Playwright's auto-waiting -- Default reporter in CI/agent - use `--reporter=line` or `--reporter=dot` to prevent context overflow +- `page.waitForTimeout(ms)` — use auto-waiting locators instead +- `page.locator('.class')` — use role/label/testid +- XPath selectors — fragile, use user-facing attributes +- Shared state between tests — each test creates own data +- UI login in every test — use setup project + storageState +- Manual assertions without await — use web-first assertions +- Hardcoded waits — rely on Playwright's auto-waiting +- Default reporter in CI/agent — use `--reporter=line` or `--reporter=dot` ## Checklist - [ ] Locators use role/label/testid, not CSS classes or XPath - [ ] All assertions use `await expect()` web-first matchers - [ ] Page objects define locators in constructor -- [ ] No `page.waitForTimeout()` - use auto-waiting -- [ ] Tests isolated - no shared state +- [ ] No `page.waitForTimeout()` — use auto-waiting +- [ ] Tests isolated — no shared state - [ ] Auth state reused via setup project - [ ] Network mocks set up before navigation - [ ] Test data created per-test or via fixtures - [ ] Debug logging added for complex flows - [ ] Minimal reporter (`line`/`dot`) used in CI/agent contexts + +See `playwright-patterns.md` for Page Object Model, fixtures, network mocking, and configuration examples. diff --git a/.claude/skills/playwright-best-practices/playwright-patterns.md b/.claude/skills/playwright-best-practices/playwright-patterns.md new file mode 100644 index 0000000..718558e --- /dev/null +++ b/.claude/skills/playwright-best-practices/playwright-patterns.md @@ -0,0 +1,422 @@ +# Playwright Patterns Reference + +Code examples for patterns summarized in `SKILL.md`. Load this file when you need to see or produce a concrete implementation. + +## Locator Chaining and Filtering + +```ts +// Scope within a region +const card = page.getByRole('listitem').filter({ hasText: 'Product A' }); +await card.getByRole('button', { name: 'Add to cart' }).click(); + +// Filter by child locator +const row = page.getByRole('row').filter({ + has: page.getByRole('cell', { name: 'John' }) +}); + +// Combine conditions +const visibleSubmit = page.getByRole('button', { name: 'Submit' }).and(page.locator(':visible')); +const primaryOrSecondary = page.getByRole('button', { name: 'Save' }).or(page.getByRole('button', { name: 'Update' })); +``` + +### Strictness + +Locators throw if multiple elements match. Use `first()`, `last()`, `nth()` only when intentional: + +```ts +// Throws if multiple buttons match — forces you to be precise +await page.getByRole('button', { name: 'Delete' }).click(); + +// Explicit selection when multiple matches are expected +await page.getByRole('listitem').first().click(); +await page.getByRole('row').nth(2).getByRole('button').click(); +``` + +## Web-First Assertions + +```ts +// BAD: No auto-wait, flaky +expect(await page.getByText('Success').isVisible()).toBe(true); + +// GOOD: Auto-waits up to timeout +await expect(page.getByText('Success')).toBeVisible(); +await expect(page.getByRole('button')).toBeEnabled(); +await expect(page.getByTestId('status')).toHaveText('Submitted'); +await expect(page).toHaveURL(/dashboard/); +await expect(page).toHaveTitle('Dashboard'); + +// Collections +await expect(page.getByRole('listitem')).toHaveCount(5); +await expect(page.getByRole('listitem')).toHaveText(['Item 1', 'Item 2', 'Item 3']); + +// Soft assertions — continue on failure, report all at end +await expect.soft(locator).toBeVisible(); +await expect.soft(locator).toHaveText('Expected'); +``` + +## Page Object Model + +Encapsulate page interactions. Define locators as readonly properties in constructor so they are computed once and reused. + +```ts +// pages/base.page.ts +import { type Page, type Locator, expect } from '@playwright/test'; +import debug from 'debug'; + +export abstract class BasePage { + protected readonly log: debug.Debugger; + + constructor( + protected readonly page: Page, + protected readonly timeout = 30_000 + ) { + this.log = debug(`test:page:${this.constructor.name}`); + } + + protected async safeClick(locator: Locator, description?: string) { + this.log('clicking: %s', description ?? locator); + await expect(locator).toBeVisible({ timeout: this.timeout }); + await expect(locator).toBeEnabled({ timeout: this.timeout }); + await locator.click(); + } + + protected async safeFill(locator: Locator, value: string) { + await expect(locator).toBeVisible({ timeout: this.timeout }); + await locator.fill(value); + } + + abstract isLoaded(): Promise; +} +``` + +```ts +// pages/login.page.ts +import { type Locator, type Page, expect } from '@playwright/test'; +import { BasePage } from './base.page'; + +export class LoginPage extends BasePage { + readonly emailInput: Locator; + readonly passwordInput: Locator; + readonly submitButton: Locator; + readonly errorMessage: Locator; + + constructor(page: Page) { + super(page); + this.emailInput = page.getByLabel('Email'); + this.passwordInput = page.getByLabel('Password'); + this.submitButton = page.getByRole('button', { name: 'Sign in' }); + this.errorMessage = page.getByRole('alert'); + } + + async goto() { + await this.page.goto('/login'); + await this.isLoaded(); + } + + async isLoaded() { + await expect(this.emailInput).toBeVisible(); + } + + async login(email: string, password: string) { + await this.safeFill(this.emailInput, email); + await this.safeFill(this.passwordInput, password); + await this.safeClick(this.submitButton, 'Sign in button'); + } + + async expectError(message: string) { + await expect(this.errorMessage).toHaveText(message); + } +} +``` + +## Fixtures + +Prefer fixtures over `beforeEach`/`afterEach`. Fixtures encapsulate setup + teardown, run on-demand, and compose with dependencies. + +```ts +// fixtures/index.ts +import { test as base, expect } from '@playwright/test'; +import { LoginPage } from '../pages/login.page'; +import { DashboardPage } from '../pages/dashboard.page'; + +type TestFixtures = { + loginPage: LoginPage; + dashboardPage: DashboardPage; +}; + +export const test = base.extend({ + loginPage: async ({ page }, use) => { + const loginPage = new LoginPage(page); + await loginPage.goto(); + await use(loginPage); + }, + + dashboardPage: async ({ page }, use) => { + await use(new DashboardPage(page)); + }, +}); + +export { expect }; +``` + +### Worker-Scoped Fixtures + +Use for expensive setup shared across tests (database connections, authenticated users). Runs once per worker, not once per test. + +```ts +// fixtures/auth.fixture.ts +import { test as base } from '@playwright/test'; + +type WorkerFixtures = { + authenticatedUser: { token: string; userId: string }; +}; + +export const test = base.extend<{}, WorkerFixtures>({ + authenticatedUser: [async ({}, use) => { + const user = await createTestUser(); + const token = await authenticateUser(user); + + await use({ token, userId: user.id }); + + // Cleanup after all tests in worker complete + await deleteTestUser(user.id); + }, { scope: 'worker' }], +}); +``` + +### Automatic Fixtures + +Run for every test without explicit declaration in the test body: + +```ts +export const test = base.extend<{ autoLog: void }>({ + autoLog: [async ({ page }, use) => { + page.on('console', msg => console.log(`[browser] ${msg.text()}`)); + await use(); + }, { auto: true }], +}); +``` + +## Authentication + +Save authenticated state to reuse across tests. Never log in via UI in every test — it's slow and fragile. + +```ts +// auth.setup.ts +import { test as setup, expect } from '@playwright/test'; + +const authFile = 'playwright/.auth/user.json'; + +setup('authenticate', async ({ page }) => { + await page.goto('/login'); + await page.getByLabel('Email').fill(process.env.TEST_USER_EMAIL!); + await page.getByLabel('Password').fill(process.env.TEST_USER_PASSWORD!); + await page.getByRole('button', { name: 'Sign in' }).click(); + await page.waitForURL('/dashboard'); + await page.context().storageState({ path: authFile }); +}); +``` + +```ts +// playwright.config.ts — wire up setup project +export default defineConfig({ + projects: [ + { name: 'setup', testMatch: /.*\.setup\.ts/ }, + { + name: 'chromium', + use: { + ...devices['Desktop Chrome'], + storageState: 'playwright/.auth/user.json', + }, + dependencies: ['setup'], + }, + ], +}); +``` + +### API Authentication (Faster) + +Bypass the UI entirely when your API supports it: + +```ts +setup('authenticate via API', async ({ request }) => { + const response = await request.post('/api/auth/login', { + data: { email: process.env.TEST_USER_EMAIL, password: process.env.TEST_USER_PASSWORD }, + }); + expect(response.ok()).toBeTruthy(); + await request.storageState({ path: authFile }); +}); +``` + +## Network Mocking + +Always set up routes before navigation — routes must be registered before the page makes requests. + +```ts +test('displays mocked data', async ({ page }) => { + await page.route('**/api/users', route => route.fulfill({ + json: [{ id: 1, name: 'Test User' }], + })); + + await page.goto('/users'); + await expect(page.getByText('Test User')).toBeVisible(); +}); + +// Modify real response — fetch it then augment before fulfilling +test('injects item into response', async ({ page }) => { + await page.route('**/api/items', async route => { + const response = await route.fetch(); + const json = await response.json(); + json.push({ id: 999, name: 'Injected' }); + await route.fulfill({ response, json }); + }); + await page.goto('/items'); +}); + +// HAR recording — record once, replay in CI +test('uses recorded responses', async ({ page }) => { + await page.routeFromHAR('./fixtures/api.har', { + url: '**/api/**', + update: false, // set true to re-record + }); + await page.goto('/'); +}); +``` + +## Test Isolation + +Each test gets a fresh browser context. Never share mutable state between tests. + +```ts +// BAD: Tests depend on each other — order-sensitive, fragile +let userId: string; +test('create user', async ({ request }) => { + userId = (await (await request.post('/api/users', { data: { name: 'Test' } })).json()).id; +}); +test('delete user', async ({ request }) => { + await request.delete(`/api/users/${userId}`); // Breaks if run alone +}); + +// GOOD: Each test is self-contained +test('can delete created user', async ({ request }) => { + const { id } = await (await request.post('/api/users', { data: { name: 'Test' } })).json(); + const deleteResponse = await request.delete(`/api/users/${id}`); + expect(deleteResponse.ok()).toBeTruthy(); +}); +``` + +## Configuration + +```ts +// playwright.config.ts +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: './tests', + fullyParallel: true, + forbidOnly: !!process.env.CI, + retries: process.env.CI ? 2 : 0, + workers: process.env.CI ? 1 : undefined, + // Minimal reporter prevents context overflow in CI/agent contexts + reporter: process.env.CI || process.env.CLAUDE + ? [['line'], ['html', { open: 'never' }]] + : 'list', + + use: { + baseURL: process.env.BASE_URL ?? 'http://localhost:3000', + trace: 'on-first-retry', + screenshot: 'only-on-failure', + video: 'on-first-retry', + }, + + projects: [ + { name: 'setup', testMatch: /.*\.setup\.ts/ }, + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + dependencies: ['setup'], + }, + { + name: 'firefox', + use: { ...devices['Desktop Firefox'] }, + dependencies: ['setup'], + }, + { + name: 'webkit', + use: { ...devices['Desktop Safari'] }, + dependencies: ['setup'], + }, + ], + + webServer: { + command: 'npm run start', + url: 'http://localhost:3000', + reuseExistingServer: !process.env.CI, + }, +}); +``` + +## Project Structure + +``` +tests/ + fixtures/ # Custom fixtures (extend base test) + pages/ # Page Object Models + helpers/ # Utility functions (API clients, data factories) + auth.setup.ts # Authentication setup project + *.spec.ts # Test files +playwright/ + .auth/ # Auth state storage (gitignored) +playwright.config.ts +``` + +Organize tests by feature or user journey. Colocate page objects with tests when possible. + +## Helpers + +Keep helpers separate from page objects. Page objects model UI structure; helpers handle data setup and cross-cutting concerns. + +```ts +// helpers/user.helper.ts +import type { Page } from '@playwright/test'; +import debug from 'debug'; + +const log = debug('test:helper:user'); + +export class UserHelper { + constructor(private page: Page) {} + + async createUser(data: { name: string; email: string }) { + log('creating user: %s', data.email); + const response = await this.page.request.post('/api/users', { data }); + return response.json(); + } + + async deleteUser(id: string) { + log('deleting user: %s', id); + await this.page.request.delete(`/api/users/${id}`); + } +} + +// helpers/data.factory.ts +export function createTestUser(overrides: Partial = {}): User { + return { + id: crypto.randomUUID(), + email: `test-${Date.now()}@example.com`, + name: 'Test User', + ...overrides, + }; +} +``` + +## Debugging + +```bash +npx playwright test --debug # Step through with inspector +npx playwright test --trace on # Record trace for all tests +npx playwright test --ui # Interactive UI mode +npx playwright codegen localhost:3000 # Generate locators interactively +npx playwright show-report # View HTML report +``` + +Enable debug logs: `DEBUG=test:* npx playwright test` diff --git a/.claude/skills/python-best-practices/SKILL.md b/.claude/skills/python-best-practices/SKILL.md index a7533a4..46ae9b8 100644 --- a/.claude/skills/python-best-practices/SKILL.md +++ b/.claude/skills/python-best-practices/SKILL.md @@ -1,24 +1,17 @@ --- name: python-best-practices -description: Provides Python patterns for type-first development with dataclasses, discriminated unions, NewType, and Protocol. Must use when reading or writing Python files. +description: Use when reading or writing Python files (.py, pyproject.toml, requirements.txt). --- # Python Best Practices -## Type-First Development +Follows type-first, functional, and error handling patterns from CLAUDE.md. This skill covers language-specific idioms only. -Types define the contract before implementation. Follow this workflow: - -1. **Define data models** - dataclasses, Pydantic models, or TypedDict first -2. **Define function signatures** - parameter and return type hints -3. **Implement to satisfy types** - let the type checker guide completeness -4. **Validate at boundaries** - runtime checks where data enters the system - -### Make Illegal States Unrepresentable +## Make Illegal States Unrepresentable Use Python's type system to prevent invalid states at type-check time. -**Dataclasses for structured data:** +**Frozen dataclasses for immutable domain models:** ```python from dataclasses import dataclass from datetime import datetime @@ -30,12 +23,7 @@ class User: name: str created_at: datetime -@dataclass(frozen=True) -class CreateUser: - email: str - name: str - -# Frozen dataclasses are immutable - no accidental mutation +# Frozen dataclasses are immutable — no accidental mutation ``` **Discriminated unions with Literal:** @@ -43,14 +31,6 @@ class CreateUser: from dataclasses import dataclass from typing import Literal -@dataclass -class Idle: - status: Literal["idle"] = "idle" - -@dataclass -class Loading: - status: Literal["loading"] = "loading" - @dataclass class Success: status: Literal["success"] = "success" @@ -61,14 +41,10 @@ class Failure: status: Literal["error"] = "error" error: Exception -RequestState = Idle | Loading | Success | Failure +RequestState = Success | Failure def handle_state(state: RequestState) -> None: match state: - case Idle(): - pass - case Loading(): - show_spinner() case Success(data=data): render(data) case Failure(error=err): @@ -85,29 +61,6 @@ OrderId = NewType("OrderId", str) def get_user(user_id: UserId) -> User: # Type checker prevents passing OrderId here ... - -def create_user_id(raw: str) -> UserId: - return UserId(raw) -``` - -**Enums for constrained values:** -```python -from enum import Enum, auto - -class Role(Enum): - ADMIN = auto() - USER = auto() - GUEST = auto() - -def check_permission(role: Role) -> bool: - match role: - case Role.ADMIN: - return True - case Role.USER: - return limited_check() - case Role.GUEST: - return False - # Type checker warns if case is missing ``` **Protocol for structural typing:** @@ -118,57 +71,13 @@ class Readable(Protocol): def read(self, n: int = -1) -> bytes: ... def process_input(source: Readable) -> bytes: - # Accepts any object with a read() method + # Accepts any object with a read() method — no inheritance required return source.read() ``` -**TypedDict for external data shapes:** -```python -from typing import TypedDict, Required, NotRequired - -class UserResponse(TypedDict): - id: Required[str] - email: Required[str] - name: Required[str] - avatar_url: NotRequired[str] - -def parse_user(data: dict) -> UserResponse: - # Runtime validation needed - TypedDict is structural - return UserResponse( - id=data["id"], - email=data["email"], - name=data["name"], - ) -``` - -## Module Structure - -Prefer smaller, focused files: one class or closely related set of functions per module. Split when a file handles multiple concerns or exceeds ~300 lines. Use `__init__.py` to expose public API; keep implementation details in private modules (`_internal.py`). Colocate tests in `tests/` mirroring the source structure. - -## Functional Patterns +## Python-Specific Error Handling -- Use list/dict/set comprehensions and generator expressions over explicit loops. -- Prefer `@dataclass(frozen=True)` for immutable data; avoid mutable default arguments. -- Use `functools.partial` for partial application; compose small functions over large classes. -- Avoid class-level mutable state; prefer pure functions that take inputs and return outputs. - -## Instructions - -- Raise descriptive exceptions for unsupported cases; every code path returns a value or raises. This makes failures debuggable and prevents silent corruption. -- Propagate exceptions with context using `from err`; catching requires re-raising or returning a meaningful result. Swallowed exceptions hide root causes. -- Handle edge cases explicitly: empty inputs, `None`, boundary values. Include `else` clauses in conditionals where appropriate. -- Use context managers for I/O; prefer `pathlib` and explicit encodings. Resource leaks cause production issues. -- Add or adjust unit tests when touching logic; prefer minimal repros that isolate the failure. - -## Examples - -Explicit failure for unimplemented logic: -```python -def build_widget(widget_type: str) -> Widget: - raise NotImplementedError(f"build_widget not implemented for type: {widget_type}") -``` - -Propagate with context to preserve the original traceback: +Chain exceptions with `from err` to preserve the original traceback: ```python try: data = json.loads(raw) @@ -176,19 +85,9 @@ except json.JSONDecodeError as err: raise ValueError(f"invalid JSON payload: {err}") from err ``` -Exhaustive match with explicit default: -```python -def process_status(status: str) -> str: - match status: - case "active": - return "processing" - case "inactive": - return "skipped" - case _: - raise ValueError(f"unhandled status: {status}") -``` +## Structured Logging -Debug-level tracing with namespaced logger: +Use a module-level logger with `%s` formatting (deferred string interpolation): ```python import logging @@ -201,70 +100,22 @@ def create_widget(name: str) -> Widget: return widget ``` -## Configuration - -- Load config from environment variables at startup; validate required values before use. Missing config should fail immediately. -- Define a config dataclass or Pydantic model as single source of truth; avoid `os.getenv` scattered throughout code. -- Use sensible defaults for development; require explicit values for production secrets. - -### Examples - -Typed config with dataclass: -```python -import os -from dataclasses import dataclass - -@dataclass(frozen=True) -class Config: - port: int = 3000 - database_url: str = "" - api_key: str = "" - env: str = "development" - - @classmethod - def from_env(cls) -> "Config": - database_url = os.environ.get("DATABASE_URL", "") - if not database_url: - raise ValueError("DATABASE_URL is required") - return cls( - port=int(os.environ.get("PORT", "3000")), - database_url=database_url, - api_key=os.environ["API_KEY"], # required, will raise if missing - env=os.environ.get("ENV", "development"), - ) - -config = Config.from_env() -``` - ## Optional: ty -For fast type checking, consider [ty](https://docs.astral.sh/ty/) from Astral (creators of ruff and uv). Written in Rust, it's significantly faster than mypy or pyright. +For fast type checking, consider [ty](https://docs.astral.sh/ty/) from Astral (creators of ruff and uv). Written in Rust, significantly faster than mypy or pyright. -**Installation and usage:** ```bash -# Run directly with uvx (no install needed) -uvx ty check - -# Check specific files -uvx ty check src/main.py - -# Install permanently -uv tool install ty +uvx ty check # run directly, no install needed +uvx ty check src/ # check specific path ``` -**Key features:** -- Automatic virtual environment detection (via `VIRTUAL_ENV` or `.venv`) -- Project discovery from `pyproject.toml` -- Fast incremental checking -- Compatible with standard Python type hints - -**Configuration in `pyproject.toml`:** ```toml +# pyproject.toml [tool.ty] python-version = "3.12" ``` -**When to use ty vs alternatives:** -- `ty` - fastest, good for CI and large codebases (early stage, rapidly evolving) -- `pyright` - most complete type inference, VS Code integration -- `mypy` - mature, extensive plugin ecosystem +When to choose: +- `ty` — fastest, good for CI and large codebases (early stage, rapidly evolving) +- `pyright` — most complete type inference, VS Code integration +- `mypy` — mature, extensive plugin ecosystem diff --git a/.claude/skills/react-best-practices/SKILL.md b/.claude/skills/react-best-practices/SKILL.md index 1d8fc2e..4f9fa36 100644 --- a/.claude/skills/react-best-practices/SKILL.md +++ b/.claude/skills/react-best-practices/SKILL.md @@ -1,6 +1,6 @@ --- name: react-best-practices -description: Provides React patterns for hooks, effects, refs, and component design. Covers escape hatches, anti-patterns, and correct effect usage. Must use when reading or writing React components (.tsx, .jsx files with React imports). +description: Use when reading or writing React components (.tsx, .jsx files with React imports). --- # React Best Practices @@ -13,558 +13,47 @@ When working with React, always load both this skill and `typescript-best-practi Effects let you "step outside" React to synchronize with external systems. **Most component logic should NOT use Effects.** Before writing an Effect, ask: "Is there a way to do this without an Effect?" -## When to Use Effects - -Effects are for synchronizing with **external systems**: -- Subscribing to browser APIs (WebSocket, IntersectionObserver, resize) -- Connecting to third-party libraries not written in React -- Setting up/cleaning up event listeners on window/document -- Fetching data on mount (though prefer React Query or framework data fetching) -- Controlling non-React DOM elements (video players, maps, modals) - -## When NOT to Use Effects - -### Derived State (Calculate During Render) - -```tsx -// BAD: Effect for derived state -const [firstName, setFirstName] = useState('Taylor'); -const [lastName, setLastName] = useState('Swift'); -const [fullName, setFullName] = useState(''); -useEffect(() => { - setFullName(firstName + ' ' + lastName); -}, [firstName, lastName]); - -// GOOD: Calculate during render -const [firstName, setFirstName] = useState('Taylor'); -const [lastName, setLastName] = useState('Swift'); -const fullName = firstName + ' ' + lastName; -``` - -### Expensive Calculations (Use useMemo) - -```tsx -// BAD: Effect for caching -const [visibleTodos, setVisibleTodos] = useState([]); -useEffect(() => { - setVisibleTodos(getFilteredTodos(todos, filter)); -}, [todos, filter]); - -// GOOD: useMemo for expensive calculations -const visibleTodos = useMemo( - () => getFilteredTodos(todos, filter), - [todos, filter] -); -``` - -### Resetting State on Prop Change (Use key) - -```tsx -// BAD: Effect to reset state -function ProfilePage({ userId }) { - const [comment, setComment] = useState(''); - useEffect(() => { - setComment(''); - }, [userId]); - // ... -} - -// GOOD: Use key to reset component state -function ProfilePage({ userId }) { - return ; -} - -function Profile({ userId }) { - const [comment, setComment] = useState(''); // Resets automatically - // ... -} -``` - -### User Event Handling (Use Event Handlers) - -```tsx -// BAD: Event-specific logic in Effect -function ProductPage({ product, addToCart }) { - useEffect(() => { - if (product.isInCart) { - showNotification(`Added ${product.name} to cart`); - } - }, [product]); - // ... -} - -// GOOD: Logic in event handler -function ProductPage({ product, addToCart }) { - function buyProduct() { - addToCart(product); - showNotification(`Added ${product.name} to cart`); - } - // ... -} -``` - -### Notifying Parent of State Changes - -```tsx -// BAD: Effect to notify parent -function Toggle({ onChange }) { - const [isOn, setIsOn] = useState(false); - useEffect(() => { - onChange(isOn); - }, [isOn, onChange]); - // ... -} - -// GOOD: Update both in event handler -function Toggle({ onChange }) { - const [isOn, setIsOn] = useState(false); - function updateToggle(nextIsOn) { - setIsOn(nextIsOn); - onChange(nextIsOn); - } - // ... -} - -// BEST: Fully controlled component -function Toggle({ isOn, onChange }) { - function handleClick() { - onChange(!isOn); - } - // ... -} -``` - -### Chains of Effects - -```tsx -// BAD: Effect chain -useEffect(() => { - if (card !== null && card.gold) { - setGoldCardCount(c => c + 1); - } -}, [card]); - -useEffect(() => { - if (goldCardCount > 3) { - setRound(r => r + 1); - setGoldCardCount(0); - } -}, [goldCardCount]); - -// GOOD: Calculate derived state, update in event handler -const isGameOver = round > 5; - -function handlePlaceCard(nextCard) { - setCard(nextCard); - if (nextCard.gold) { - if (goldCardCount < 3) { - setGoldCardCount(goldCardCount + 1); - } else { - setGoldCardCount(0); - setRound(round + 1); - } - } -} -``` - -## Effect Dependencies - -### Never Suppress the Linter - -```tsx -// BAD: Suppressing linter hides bugs -useEffect(() => { - const id = setInterval(() => { - setCount(count + increment); - }, 1000); - return () => clearInterval(id); - // eslint-disable-next-line react-hooks/exhaustive-deps -}, []); - -// GOOD: Fix the code, not the linter -useEffect(() => { - const id = setInterval(() => { - setCount(c => c + increment); - }, 1000); - return () => clearInterval(id); -}, [increment]); -``` - -### Use Updater Functions to Remove State Dependencies - -```tsx -// BAD: messages in dependencies causes reconnection on every message -useEffect(() => { - connection.on('message', (msg) => { - setMessages([...messages, msg]); - }); - // ... -}, [messages]); // Reconnects on every message! - -// GOOD: Updater function removes dependency -useEffect(() => { - connection.on('message', (msg) => { - setMessages(msgs => [...msgs, msg]); - }); - // ... -}, []); // No messages dependency needed -``` - -### Move Objects/Functions Inside Effects - -```tsx -// BAD: Object created each render triggers Effect -function ChatRoom({ roomId }) { - const options = { serverUrl, roomId }; // New object each render - useEffect(() => { - const connection = createConnection(options); - connection.connect(); - return () => connection.disconnect(); - }, [options]); // Reconnects every render! -} - -// GOOD: Create object inside Effect -function ChatRoom({ roomId }) { - useEffect(() => { - const options = { serverUrl, roomId }; - const connection = createConnection(options); - connection.connect(); - return () => connection.disconnect(); - }, [roomId, serverUrl]); // Only reconnects when values change -} -``` - -### useEffectEvent for Non-Reactive Logic - -```tsx -// BAD: theme change reconnects chat -function ChatRoom({ roomId, theme }) { - useEffect(() => { - const connection = createConnection(serverUrl, roomId); - connection.on('connected', () => { - showNotification('Connected!', theme); - }); - connection.connect(); - return () => connection.disconnect(); - }, [roomId, theme]); // Reconnects on theme change! -} - -// GOOD: useEffectEvent for non-reactive logic -function ChatRoom({ roomId, theme }) { - const onConnected = useEffectEvent(() => { - showNotification('Connected!', theme); - }); - - useEffect(() => { - const connection = createConnection(serverUrl, roomId); - connection.on('connected', () => { - onConnected(); - }); - connection.connect(); - return () => connection.disconnect(); - }, [roomId]); // theme no longer causes reconnection -} -``` - -### Wrap Callback Props with useEffectEvent - -```tsx -// BAD: Callback prop in dependencies -function ChatRoom({ roomId, onReceiveMessage }) { - useEffect(() => { - connection.on('message', onReceiveMessage); - // ... - }, [roomId, onReceiveMessage]); // Reconnects if parent re-renders -} - -// GOOD: Wrap callback in useEffectEvent -function ChatRoom({ roomId, onReceiveMessage }) { - const onMessage = useEffectEvent(onReceiveMessage); - - useEffect(() => { - connection.on('message', onMessage); - // ... - }, [roomId]); // Stable dependency list -} -``` - -## Effect Cleanup - -### Always Clean Up Subscriptions - -```tsx -useEffect(() => { - const connection = createConnection(serverUrl, roomId); - connection.connect(); - return () => connection.disconnect(); // REQUIRED -}, [roomId]); - -useEffect(() => { - function handleScroll(e) { - console.log(window.scrollY); - } - window.addEventListener('scroll', handleScroll); - return () => window.removeEventListener('scroll', handleScroll); // REQUIRED -}, []); -``` - -### Data Fetching with Ignore Flag - -```tsx -useEffect(() => { - let ignore = false; +## Decision Tree - async function fetchData() { - const result = await fetchTodos(userId); - if (!ignore) { - setTodos(result); - } - } - - fetchData(); - - return () => { - ignore = true; // Prevents stale data from old requests - }; -}, [userId]); -``` +1. **Need to respond to user interaction?** Use event handler +2. **Need computed value from props/state?** Calculate during render +3. **Need cached expensive calculation?** Use `useMemo` +4. **Need to reset state on prop change?** Use `key` prop +5. **Need to synchronize with external system?** Use Effect with cleanup +6. **Need non-reactive code in Effect?** Use `useEffectEvent` +7. **Need mutable value that doesn't trigger render?** Use ref -### Development Double-Fire Is Intentional +## When to Use Effects -React remounts components in development to verify cleanup works. If you see effects firing twice, don't try to prevent it with refs: +Synchronizing with **external systems**: browser APIs (WebSocket, IntersectionObserver), third-party non-React libraries, window/document event listeners, non-React DOM elements (video, maps). -```tsx -// BAD: Hiding the symptom -const didInit = useRef(false); -useEffect(() => { - if (didInit.current) return; - didInit.current = true; - // ... -}, []); +## When NOT to Use Effects -// GOOD: Fix the cleanup -useEffect(() => { - const connection = createConnection(); - connection.connect(); - return () => connection.disconnect(); // Proper cleanup -}, []); -``` +- Derived state — calculate during render +- Expensive calculations — use `useMemo` +- Resetting state on prop change — use `key` prop +- Responding to user events — use event handlers +- Notifying parent of state changes — update both in the same event handler +- Chains of effects — calculate derived state and update in one event handler ## Refs -### Use Refs for Values That Don't Affect Rendering - -```tsx -// GOOD: Ref for timeout ID (doesn't affect UI) -const timeoutRef = useRef(null); - -function handleClick() { - clearTimeout(timeoutRef.current); - timeoutRef.current = setTimeout(() => { - // ... - }, 1000); -} - -// BAD: Using ref for displayed value -const countRef = useRef(0); -countRef.current++; // UI won't update! -``` - -### Never Read/Write ref.current During Render - -```tsx -// BAD: Reading ref during render -function MyComponent() { - const ref = useRef(0); - ref.current++; // Mutating during render! - return
{ref.current}
; // Reading during render! -} - -// GOOD: Read/write refs in event handlers and effects -function MyComponent() { - const ref = useRef(0); - - function handleClick() { - ref.current++; // OK in event handler - } - - useEffect(() => { - ref.current = someValue; // OK in effect - }, [someValue]); -} -``` - -### Ref Callbacks for Dynamic Lists - -```tsx -// BAD: Can't call useRef in a loop -{items.map((item) => { - const ref = useRef(null); // Rule violation! - return
  • ; -})} - -// GOOD: Ref callback with Map -const itemsRef = useRef(new Map()); - -{items.map((item) => ( -
  • { - if (node) { - itemsRef.current.set(item.id, node); - } else { - itemsRef.current.delete(item.id); - } - }} - /> -))} -``` - -### useImperativeHandle for Controlled Exposure - -```tsx -// Limit what parent can access -function MyInput({ ref }) { - const realInputRef = useRef(null); - - useImperativeHandle(ref, () => ({ - focus() { - realInputRef.current.focus(); - }, - // Parent can ONLY call focus(), not access full DOM node - })); - - return ; -} -``` +- Use for values that don't affect rendering (timer IDs, DOM node references) +- Never read or write `ref.current` during render; only in event handlers and effects +- Use ref callbacks (not `useRef` in loops) for dynamic lists +- Use `useImperativeHandle` to limit what parent can access ## Custom Hooks -### Hooks Share Logic, Not State - -```tsx -// Each call gets independent state -function StatusBar() { - const isOnline = useOnlineStatus(); // Own state -} - -function SaveButton() { - const isOnline = useOnlineStatus(); // Separate state instance -} -``` - -### Name Hooks useXxx Only If They Use Hooks - -```tsx -// BAD: useXxx but doesn't use hooks -function useSorted(items) { - return items.slice().sort(); -} - -// GOOD: Regular function -function getSorted(items) { - return items.slice().sort(); -} - -// GOOD: Uses hooks, so prefix with use -function useAuth() { - return useContext(AuthContext); -} -``` - -### Avoid "Lifecycle" Hooks - -```tsx -// BAD: Custom lifecycle hooks -function useMount(fn) { - useEffect(() => { - fn(); - }, []); // Missing dependency, linter can't catch it -} - -// GOOD: Use useEffect directly -useEffect(() => { - doSomething(); -}, [doSomething]); -``` - -### Keep Custom Hooks Focused - -```tsx -// GOOD: Focused, concrete use cases -useChatRoom({ serverUrl, roomId }); -useOnlineStatus(); -useFormInput(initialValue); - -// BAD: Generic, abstract hooks -useMount(fn); -useEffectOnce(fn); -useUpdateEffect(fn); -``` +- Share logic, not state — each call gets an independent state instance +- Name `useXxx` only if it actually calls other hooks; otherwise use a regular function +- Avoid lifecycle hooks (`useMount`, `useEffectOnce`) — use `useEffect` directly so the linter catches missing deps +- Keep focused on a single concrete use case ## Component Patterns -### Controlled vs Uncontrolled - -```tsx -// Uncontrolled: component owns state -function SearchInput() { - const [query, setQuery] = useState(''); - return setQuery(e.target.value)} />; -} - -// Controlled: parent owns state -function SearchInput({ query, onQueryChange }) { - return onQueryChange(e.target.value)} />; -} -``` +- Controlled: parent owns state; uncontrolled: component owns state +- Prefer composition with `children` over prop drilling; use Context only for truly global state +- Use `flushSync` when you need to read the DOM synchronously after a state update -### Prefer Composition Over Prop Drilling - -```tsx -// BAD: Prop drilling - - -
    - -
    -
    -
    - -// GOOD: Composition with children - - -
    } /> - - - -// GOOD: Context for truly global state - - - -``` - -### flushSync for Synchronous DOM Updates - -```tsx -// When you need to read DOM immediately after state update -import { flushSync } from 'react-dom'; - -function handleAdd() { - flushSync(() => { - setTodos([...todos, newTodo]); - }); - // DOM is now updated, safe to read - listRef.current.lastChild.scrollIntoView(); -} -``` - -## Summary: Decision Tree - -1. **Need to respond to user interaction?** Use event handler -2. **Need computed value from props/state?** Calculate during render -3. **Need cached expensive calculation?** Use useMemo -4. **Need to reset state on prop change?** Use key prop -5. **Need to synchronize with external system?** Use Effect with cleanup -6. **Need non-reactive code in Effect?** Use useEffectEvent -7. **Need mutable value that doesn't trigger render?** Use ref +See `react-patterns.md` for code examples and detailed patterns. diff --git a/.claude/skills/react-best-practices/react-patterns.md b/.claude/skills/react-best-practices/react-patterns.md new file mode 100644 index 0000000..43e44e2 --- /dev/null +++ b/.claude/skills/react-best-practices/react-patterns.md @@ -0,0 +1,516 @@ +# React Patterns Reference + +Code examples for patterns summarized in `SKILL.md`. Load this file when you need to see or produce a concrete implementation. + +## Effect Anti-Patterns + +### Derived State (Calculate During Render) + +```tsx +// BAD: Effect for derived state +const [firstName, setFirstName] = useState('Taylor'); +const [lastName, setLastName] = useState('Swift'); +const [fullName, setFullName] = useState(''); +useEffect(() => { + setFullName(firstName + ' ' + lastName); +}, [firstName, lastName]); + +// GOOD: Calculate during render +const [firstName, setFirstName] = useState('Taylor'); +const [lastName, setLastName] = useState('Swift'); +const fullName = firstName + ' ' + lastName; +``` + +### Expensive Calculations (Use useMemo) + +```tsx +// BAD: Effect for caching +const [visibleTodos, setVisibleTodos] = useState([]); +useEffect(() => { + setVisibleTodos(getFilteredTodos(todos, filter)); +}, [todos, filter]); + +// GOOD: useMemo for expensive calculations +const visibleTodos = useMemo( + () => getFilteredTodos(todos, filter), + [todos, filter] +); +``` + +### Resetting State on Prop Change (Use key) + +```tsx +// BAD: Effect to reset state +function ProfilePage({ userId }) { + const [comment, setComment] = useState(''); + useEffect(() => { + setComment(''); + }, [userId]); +} + +// GOOD: Use key to reset component state +function ProfilePage({ userId }) { + return ; +} + +function Profile({ userId }) { + const [comment, setComment] = useState(''); // Resets automatically when key changes +} +``` + +### User Event Handling (Use Event Handlers) + +```tsx +// BAD: Event-specific logic in Effect +function ProductPage({ product, addToCart }) { + useEffect(() => { + if (product.isInCart) { + showNotification(`Added ${product.name} to cart`); + } + }, [product]); +} + +// GOOD: Logic in event handler +function ProductPage({ product, addToCart }) { + function buyProduct() { + addToCart(product); + showNotification(`Added ${product.name} to cart`); + } +} +``` + +### Notifying Parent of State Changes + +```tsx +// BAD: Effect to notify parent +function Toggle({ onChange }) { + const [isOn, setIsOn] = useState(false); + useEffect(() => { + onChange(isOn); + }, [isOn, onChange]); +} + +// GOOD: Update both in event handler +function Toggle({ onChange }) { + const [isOn, setIsOn] = useState(false); + function updateToggle(nextIsOn) { + setIsOn(nextIsOn); + onChange(nextIsOn); + } +} + +// BEST: Fully controlled component +function Toggle({ isOn, onChange }) { + function handleClick() { + onChange(!isOn); + } +} +``` + +### Chains of Effects + +```tsx +// BAD: Effect chain — each effect re-renders before the next fires +useEffect(() => { + if (card !== null && card.gold) { + setGoldCardCount(c => c + 1); + } +}, [card]); + +useEffect(() => { + if (goldCardCount > 3) { + setRound(r => r + 1); + setGoldCardCount(0); + } +}, [goldCardCount]); + +// GOOD: Calculate derived state, update everything in one event handler +const isGameOver = round > 5; + +function handlePlaceCard(nextCard) { + setCard(nextCard); + if (nextCard.gold) { + if (goldCardCount < 3) { + setGoldCardCount(goldCardCount + 1); + } else { + setGoldCardCount(0); + setRound(round + 1); + } + } +} +``` + +## Effect Dependencies + +### Never Suppress the Linter + +```tsx +// BAD: Suppressing linter hides bugs +useEffect(() => { + const id = setInterval(() => { + setCount(count + increment); + }, 1000); + return () => clearInterval(id); + // eslint-disable-next-line react-hooks/exhaustive-deps +}, []); + +// GOOD: Fix the code, not the linter +useEffect(() => { + const id = setInterval(() => { + setCount(c => c + increment); + }, 1000); + return () => clearInterval(id); +}, [increment]); +``` + +### Use Updater Functions to Remove State Dependencies + +```tsx +// BAD: messages in dependencies causes reconnection on every message +useEffect(() => { + connection.on('message', (msg) => { + setMessages([...messages, msg]); + }); +}, [messages]); // Reconnects on every message! + +// GOOD: Updater function removes the dependency +useEffect(() => { + connection.on('message', (msg) => { + setMessages(msgs => [...msgs, msg]); + }); +}, []); // No messages dependency needed +``` + +### Move Objects/Functions Inside Effects + +```tsx +// BAD: Object created each render triggers Effect +function ChatRoom({ roomId }) { + const options = { serverUrl, roomId }; // New object each render + useEffect(() => { + const connection = createConnection(options); + connection.connect(); + return () => connection.disconnect(); + }, [options]); // Reconnects every render! +} + +// GOOD: Create object inside Effect +function ChatRoom({ roomId }) { + useEffect(() => { + const options = { serverUrl, roomId }; + const connection = createConnection(options); + connection.connect(); + return () => connection.disconnect(); + }, [roomId, serverUrl]); // Only reconnects when values change +} +``` + +### useEffectEvent for Non-Reactive Logic + +```tsx +// BAD: theme change reconnects chat +function ChatRoom({ roomId, theme }) { + useEffect(() => { + const connection = createConnection(serverUrl, roomId); + connection.on('connected', () => { + showNotification('Connected!', theme); + }); + connection.connect(); + return () => connection.disconnect(); + }, [roomId, theme]); // Reconnects on theme change! +} + +// GOOD: useEffectEvent for non-reactive logic +function ChatRoom({ roomId, theme }) { + const onConnected = useEffectEvent(() => { + showNotification('Connected!', theme); + }); + + useEffect(() => { + const connection = createConnection(serverUrl, roomId); + connection.on('connected', () => { + onConnected(); + }); + connection.connect(); + return () => connection.disconnect(); + }, [roomId]); // theme no longer causes reconnection +} +``` + +### Wrap Callback Props with useEffectEvent + +```tsx +// BAD: Callback prop in dependencies reconnects if parent re-renders +function ChatRoom({ roomId, onReceiveMessage }) { + useEffect(() => { + connection.on('message', onReceiveMessage); + }, [roomId, onReceiveMessage]); +} + +// GOOD: Wrap callback in useEffectEvent +function ChatRoom({ roomId, onReceiveMessage }) { + const onMessage = useEffectEvent(onReceiveMessage); + + useEffect(() => { + connection.on('message', onMessage); + }, [roomId]); // Stable dependency list +} +``` + +## Effect Cleanup + +### Always Clean Up Subscriptions + +```tsx +useEffect(() => { + const connection = createConnection(serverUrl, roomId); + connection.connect(); + return () => connection.disconnect(); // REQUIRED +}, [roomId]); + +useEffect(() => { + function handleScroll(e) { + console.log(window.scrollY); + } + window.addEventListener('scroll', handleScroll); + return () => window.removeEventListener('scroll', handleScroll); // REQUIRED +}, []); +``` + +### Data Fetching with Ignore Flag + +```tsx +useEffect(() => { + let ignore = false; + + async function fetchData() { + const result = await fetchTodos(userId); + if (!ignore) { + setTodos(result); + } + } + + fetchData(); + + return () => { + ignore = true; // Prevents stale data from superseded requests + }; +}, [userId]); +``` + +### Development Double-Fire Is Intentional + +React remounts components in development to verify cleanup works. If effects fire twice, fix the cleanup — don't suppress the double-fire: + +```tsx +// BAD: Hiding the symptom +const didInit = useRef(false); +useEffect(() => { + if (didInit.current) return; + didInit.current = true; + // ... +}, []); + +// GOOD: Fix the cleanup so remounting is safe +useEffect(() => { + const connection = createConnection(); + connection.connect(); + return () => connection.disconnect(); +}, []); +``` + +## Ref Patterns + +### Use Refs for Values That Don't Affect Rendering + +```tsx +// GOOD: Ref for timeout ID (doesn't affect UI) +const timeoutRef = useRef(null); + +function handleClick() { + clearTimeout(timeoutRef.current); + timeoutRef.current = setTimeout(() => { + // ... + }, 1000); +} + +// BAD: Using ref for displayed value — UI won't update +const countRef = useRef(0); +countRef.current++; +``` + +### Never Read/Write ref.current During Render + +```tsx +// BAD: Reading/writing ref during render +function MyComponent() { + const ref = useRef(0); + ref.current++; // Mutating during render! + return
    {ref.current}
    ; // Reading during render! +} + +// GOOD: Read/write refs in event handlers and effects +function MyComponent() { + const ref = useRef(0); + + function handleClick() { + ref.current++; // OK in event handler + } + + useEffect(() => { + ref.current = someValue; // OK in effect + }, [someValue]); +} +``` + +### Ref Callbacks for Dynamic Lists + +```tsx +// BAD: Can't call useRef in a loop +{items.map((item) => { + const ref = useRef(null); // Rules of Hooks violation! + return
  • ; +})} + +// GOOD: Ref callback with Map +const itemsRef = useRef(new Map()); + +{items.map((item) => ( +
  • { + if (node) { + itemsRef.current.set(item.id, node); + } else { + itemsRef.current.delete(item.id); + } + }} + /> +))} +``` + +### useImperativeHandle for Controlled Exposure + +```tsx +// Limit what parent can access through a ref — expose only the API surface you intend +function MyInput({ ref }) { + const realInputRef = useRef(null); + + useImperativeHandle(ref, () => ({ + focus() { + realInputRef.current.focus(); + }, + // Parent can ONLY call focus(), not access the full DOM node + })); + + return ; +} +``` + +## Custom Hook Patterns + +### Hooks Share Logic, Not State + +```tsx +// Each call gets independent state — these are two separate online status subscriptions +function StatusBar() { + const isOnline = useOnlineStatus(); +} + +function SaveButton() { + const isOnline = useOnlineStatus(); +} +``` + +### Name Hooks useXxx Only If They Use Hooks + +```tsx +// BAD: useXxx prefix but doesn't call any hooks +function useSorted(items) { + return items.slice().sort(); +} + +// GOOD: Regular function +function getSorted(items) { + return items.slice().sort(); +} + +// GOOD: Uses hooks, so prefix with use +function useAuth() { + return useContext(AuthContext); +} +``` + +### Avoid "Lifecycle" Hooks + +```tsx +// BAD: Custom lifecycle hooks prevent linter from catching missing dependencies +function useMount(fn) { + useEffect(() => { + fn(); + }, []); // fn is missing from dependencies — linter can't catch it +} + +// GOOD: Use useEffect directly +useEffect(() => { + doSomething(); +}, [doSomething]); +``` + +## Component Patterns + +### Controlled vs Uncontrolled + +```tsx +// Uncontrolled: component owns state +function SearchInput() { + const [query, setQuery] = useState(''); + return setQuery(e.target.value)} />; +} + +// Controlled: parent owns state — more composable, easier to test +function SearchInput({ query, onQueryChange }) { + return onQueryChange(e.target.value)} />; +} +``` + +### Prefer Composition Over Prop Drilling + +```tsx +// BAD: Prop drilling through intermediate components that don't use the value + + +
    + +
    +
    +
    + +// GOOD: Pass the rendered element, not raw data + + +
    } /> + + + +// GOOD: Context for truly global state (auth, theme, locale) + + + +``` + +### flushSync for Synchronous DOM Updates + +```tsx +// When you need to read the DOM immediately after a state update +// (e.g., scroll to a newly added list item before the next paint) +import { flushSync } from 'react-dom'; + +function handleAdd() { + flushSync(() => { + setTodos([...todos, newTodo]); + }); + // DOM is now updated synchronously — safe to read layout + listRef.current.lastChild.scrollIntoView(); +} +``` diff --git a/.claude/skills/rl-clean b/.claude/skills/rl-clean new file mode 120000 index 0000000..6aedce1 --- /dev/null +++ b/.claude/skills/rl-clean @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-clean \ No newline at end of file diff --git a/.claude/skills/rl-done b/.claude/skills/rl-done new file mode 120000 index 0000000..830c829 --- /dev/null +++ b/.claude/skills/rl-done @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-done \ No newline at end of file diff --git a/.claude/skills/rl-init b/.claude/skills/rl-init new file mode 120000 index 0000000..37200c8 --- /dev/null +++ b/.claude/skills/rl-init @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-init \ No newline at end of file diff --git a/.claude/skills/rl-log b/.claude/skills/rl-log new file mode 120000 index 0000000..1dbc5de --- /dev/null +++ b/.claude/skills/rl-log @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-log \ No newline at end of file diff --git a/.claude/skills/rl-prompt b/.claude/skills/rl-prompt new file mode 120000 index 0000000..c78a742 --- /dev/null +++ b/.claude/skills/rl-prompt @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-prompt \ No newline at end of file diff --git a/.claude/skills/rl-state b/.claude/skills/rl-state new file mode 120000 index 0000000..3a18549 --- /dev/null +++ b/.claude/skills/rl-state @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-state \ No newline at end of file diff --git a/.claude/skills/rl-status b/.claude/skills/rl-status new file mode 120000 index 0000000..0ecceec --- /dev/null +++ b/.claude/skills/rl-status @@ -0,0 +1 @@ +../../../../../.agents/skills/rl-status \ No newline at end of file diff --git a/.claude/skills/spec-best-practices/SKILL.md b/.claude/skills/spec-best-practices/SKILL.md index 22650bd..2383367 100644 --- a/.claude/skills/spec-best-practices/SKILL.md +++ b/.claude/skills/spec-best-practices/SKILL.md @@ -1,154 +1,61 @@ --- name: spec-best-practices -description: Spec authoring conventions for naming, placement, structure, and lifecycle. Use when creating, reviewing, or updating SPEC.md files, running /specout, or entering the ADF SPEC gate. +description: Use when creating, reviewing, or updating SPEC.md files, running /specout, or entering the ADF SPEC gate. --- -## When to activate - -Engage when: -- Creating a new spec (greenfield or retroactive) -- Reviewing or updating an existing `SPEC.md` -- Entering the ADF `SPEC` gate -- Running `/specout` -- An agent proposes a spec file with the wrong name or location - ## Naming -Always `SPEC.md`. No exceptions for the primary spec file. +Always `SPEC.md`. No exceptions for the primary spec file. Not `feature.spec.md`, not `SPEC-feature.md`. -- Not `feature.spec.md`, not `thing-spec.md`, not `SPEC-feature.md` -- The file name is always exactly `SPEC.md` - -Supporting documents linked from a `SPEC.md` TOC may use descriptive names (e.g., `commands.spec.md`, `config-and-state.spec.md`), but only when the root or package `SPEC.md` exists and links to them. +Supporting documents linked from a `SPEC.md` TOC may use descriptive names (e.g., `commands.spec.md`), but only when a root `SPEC.md` exists and links to them. ## Placement -Specs are colocated with the code they describe. - -### Standard layout - -``` -repo/ - SPEC.md # root spec: project-level scope - apps/foo/SPEC.md # app-level spec - packages/bar/SPEC.md # package-level spec - src/lib/module/SPEC.md # module-level spec (non-monorepo) -``` - -### Rules +Specs are colocated with the code they describe: root `SPEC.md` for project scope, `apps/foo/SPEC.md` for app scope, `packages/bar/SPEC.md` for package scope. -- Root `SPEC.md` covers the project/repo scope: problem, solution, domain model, cross-cutting requirements. -- Package/app/module `SPEC.md` files cover the behavior of that unit. -- Avoid `spec/`, `docs/specs/`, and `docs/plans/` directories by default. Prefer colocated `SPEC.md` files and adjacent supporting docs. -- Plan documents are ephemeral. If a plan captures durable decisions, absorb them into the relevant `SPEC.md` and delete the plan doc. - -### When a spec gets long - -Add a TOC to the `SPEC.md` linking to adjacent supporting files: - -```markdown -## Specifications - -- [Commands](./commands.spec.md) -- [Config and State](./config-and-state.spec.md) -- [Error Handling](./errors-and-observability.spec.md) -``` - -Supporting files live alongside the `SPEC.md` that references them, not in a subdirectory. Exception: large single-binary projects with many cross-cutting spec topics may use a `spec/` directory with a contracts index when the domain is complex enough that colocated `SPEC.md` trees would be awkward. Treat this as an explicit exception, not the default layout. +- Avoid `spec/`, `docs/specs/`, and `docs/plans/` directories. Prefer colocated `SPEC.md` files. +- Plan documents are ephemeral. Absorb durable decisions into the relevant `SPEC.md` and delete the plan doc. +- When a spec gets long, add a TOC linking to adjacent supporting files (`./commands.spec.md`, etc.). Supporting files live alongside the `SPEC.md`, not in a subdirectory. ## Content -Specs are freeform markdown. No rigid template, no YAML frontmatter, no required section ordering. The following elements must be present, arranged in whatever order suits the domain. - -### Required elements +Specs are freeform markdown. No rigid template, no YAML frontmatter, no required section ordering. These elements must be present: **Problem and solution** -- narrative context for why this system/feature exists. Lead with the problem. -**Domain model** -- types, relationships, data flow. Required for new systems. For retroactive specs, derive from inspected code. +**Domain model** -- types, relationships, data flow. For retroactive specs, derive from inspected code. -**Requirements with `REQ-*` IDs** -- every behavioral requirement gets a stable identifier. -- Format: `REQ-{DOMAIN}-{NNN}` (e.g., `REQ-AUTH-001`, `REQ-SYNC-003`) -- Domain prefix matches the module/package scope -- Append-only; never renumber -- Each requirement is testable and traceable +**Requirements with `REQ-*` IDs** -- every behavioral requirement gets a stable identifier. Format: `REQ-{DOMAIN}-{NNN}` (e.g., `REQ-AUTH-001`). Append-only; never renumber. Each requirement is testable and traceable. -**Invariants** -- conditions that must always hold. State inline with requirements or in a dedicated section. +**Invariants** -- conditions that must always hold. -**Non-goals** -- explicit scope boundary. What this spec intentionally does not cover. Prevents scope creep and sets expectations for reviewers. +**Non-goals** -- explicit scope boundary. What this spec intentionally does not cover. -**Acceptance criteria** -- checklistable verification items. Use markdown checklists, not prose. +**Acceptance criteria** -- markdown checklist, not prose: ```markdown -## Acceptance Criteria - - [ ] Auth endpoint returns JWT with tier claim - [ ] Rate limiter rejects >100 req/min per IP -- [ ] Drift scan completes in <5s for repos with <1000 managed files ``` -### Conditional elements - -**Risk tags** -- flag high-risk items (schema migrations, auth changes, public API contracts, infra changes). Include them when those risks exist or when the ADF `PLAN` gate requires approval. - -**Test traceability** -- maps `REQ-*` IDs to test file:line references. Added during or after the TDD/DEV phase, not at initial authoring. - -```markdown -## Test Traceability - -| Requirement | Test | -|-------------|------| -| REQ-AUTH-001 | src/auth/auth.test.ts:42 | -| REQ-SYNC-003 | src/sync/sync.test.ts:87 | -``` +**Risk tags** (conditional) -- flag high-risk items (schema migrations, auth changes, public API contracts, infra changes) when those risks exist or the ADF `PLAN` gate requires approval. -**`[Normative]` / `[Informative]` section labels** -- use when multiple specs cross-reference each other and precision matters about which sections define binding contracts vs. provide examples. +**Test traceability** (conditional) -- `REQ-*` to test file:line mapping. Added during/after TDD, not at initial authoring. ## Authoring rules -### Evidence-based - -Read code before writing spec content. Do not invent behavior, signatures, or file paths. For retroactive specs, derive requirements from the actual implementation. +**Evidence-based**: read code before writing spec content. Do not invent behavior, signatures, or file paths. For retroactive specs, derive requirements from the actual implementation. -### Retroactive specs are first-class +**Retroactive specs are first-class**: documenting existing behavior is valid and encouraged. Read the implementation, extract requirements from actual behavior, note inconsistencies as open items (not silent omissions), map traceability to existing tests. -Documenting existing behavior in a `SPEC.md` is valid and encouraged. Retroactive specs follow the same structure and naming rules. When writing retroactively: -1. Read the implementation thoroughly -2. Extract requirements from actual behavior -3. Note any discovered inconsistencies as open items, not silent omissions -4. Map test traceability to existing tests +**Mutation policy**: do not edit a spec without explicit user direction. When drift is found, surface it immediately using `specalign` patterns. Never silently tolerate or fix drift — the user decides whether to update spec or code. -### Mutation policy - -- Do not edit a spec without explicit user direction -- When drift is found between spec and code, surface it immediately (use `specalign` patterns) -- Never silently tolerate drift; never silently fix it -- The user decides whether to update spec or code for each discrepancy - -### Spec vs. plan - -Specs describe **what** the system does and **why**. Plans describe **how** and **when** to build it. Plans are ephemeral work artifacts; specs are durable project documentation. - -If a plan doc contains decisions that should outlive the implementation sprint, those decisions belong in the spec. Delete the plan doc after absorption. +**Spec vs. plan**: specs describe what and why; plans describe how and when. Plans are ephemeral. Absorb durable decisions into the spec; delete the plan doc. ## Lifecycle -### Creation (SPEC gate) - -The ADF `SPEC` gate requires: IDs, invariants, non-goals, acceptance criteria, and risk tags when high-risk items exist. - -When entering the SPEC gate: -1. Determine placement: which `SPEC.md` file should this go in? -2. If the file exists, read it and identify gaps -3. If the file doesn't exist, create it at the correct colocated path -4. Ensure all required elements are present before passing the gate - -### Maintenance - -- Update spec when behavior changes (spec leads code changes; code leads retroactive spec updates) -- Append new `REQ-*` IDs; never renumber existing ones -- Add test traceability as tests are written -- Run `specalign` when both spec and implementation are in context +**Creation (SPEC gate)**: see ADF SPEC gate in CLAUDE.md for gate requirements. Determine placement, read existing file and identify gaps (or create at the correct colocated path), ensure all required elements are present. -### Retirement +**Maintenance**: update spec when behavior changes; append new `REQ-*` IDs, never renumber; add test traceability as tests are written; run `specalign` when spec and implementation are both in context. -When a feature is removed, remove or archive its `SPEC.md`. Do not leave stale specs that describe deleted behavior. +**Retirement**: when a feature is removed, remove or archive its `SPEC.md`. Do not leave stale specs describing deleted behavior. diff --git a/.claude/skills/specalign/SKILL.md b/.claude/skills/specalign/SKILL.md deleted file mode 100644 index f0f5574..0000000 --- a/.claude/skills/specalign/SKILL.md +++ /dev/null @@ -1,112 +0,0 @@ ---- -name: specalign -description: Align spec files with implementation. Detects drift between spec and code, surfaces discrepancies, user decides whether to update spec or code. Use when both a spec file and its implementation are in context. ---- - -# Spec Alignment - -## Principles (Always Active) - -These apply whenever a spec file and its corresponding implementation are both in context: - -### Spec and Code Must Agree - -- A spec describes intended behavior; code implements it. When they disagree, one is wrong. -- Never silently tolerate drift - surface it immediately when noticed. -- The user decides which is the source of truth for each discrepancy. Do not assume. - -### Drift Categories - -- **Type drift**: spec defines fields/types that don't match the implementation -- **Behavior drift**: spec describes logic the code doesn't follow -- **Missing implementation**: spec defines something with no corresponding code -- **Missing spec**: code implements behavior not described in the spec -- **Constraint drift**: spec states invariants the code doesn't enforce -- **Error handling drift**: spec defines error cases the code doesn't handle (or vice versa) - -### Mutation Policy - -- Do not edit spec files unless the user explicitly chooses "update spec" for a discrepancy. -- Do not change implementation logic unless the user explicitly chooses "update code". -- When updating code, run lint/typecheck after changes. -- When updating spec, preserve formatting and structure of unrelated sections. - -### Bidirectional Awareness - -When reading code, check if a spec exists and note divergences. -When reading a spec, check if the implementation matches and note divergences. -This awareness should be passive - flag drift in your responses without interrupting the user's primary task, unless the drift is directly relevant. - -## Workflow (When Explicitly Aligning) - -### Step 1: Locate the Spec - -A spec file is required. Search for `SPEC.md` at colocated paths: -- `SPEC.md` (root, `apps/*/`, `packages/*/`, `src/lib/*/`) -- Supporting files linked from a `SPEC.md` TOC (e.g., `commands.spec.md`) -- Legacy patterns as fallback: `*.spec.md`, `*-spec.md`, `spec/*.md` - -If multiple specs exist, ask which to align. If none exist, stop - this workflow requires an existing spec. - -Read the spec file completely. - -### Step 2: Map Spec to Code - -For each spec section, identify the corresponding implementation: - -| Spec Section | Source File(s) | Status | -|---|---|---| -| `## Types` | `src/types.ts:10-40` | aligned / drifted / missing-impl / missing-spec | - -Read each mapped source file before assessing. - -### Step 3: Present Discrepancies - -For each discrepancy: - -``` -### DRIFT-01: - -**Spec says** (spec-file.md:L42): -> - -**Code does** (src/module.ts:L87): -> - -**Impact**: -``` - -Number with stable IDs (`DRIFT-NN`). Batch related discrepancies that share a root cause. - -### Step 4: User Decision - -For each discrepancy, ask: - -- **Update spec** - the code is correct, update the spec to match -- **Update code** - the spec is correct, update the code to match -- **Skip** - defer this discrepancy - -### Step 5: Apply Changes - -**Spec updates:** -- Edit the spec file with corrected text -- Preserve formatting and structure of unrelated sections - -**Code updates:** -- Fix the implementation to match the spec -- Run lint/typecheck after changes -- If non-trivial, outline the change and confirm before editing -- If unit tests exist for the affected code, run them -- If unit tests don't exist and the spec defines testable behavior, flag it - -### Step 6: Summary - -``` -## Spec Alignment: - -**Discrepancies found**: N -**Resolved**: X (spec: A, code: B, skipped: C) - -### Remaining -- DRIFT-04: (skipped) -``` diff --git a/.claude/skills/tamagui-best-practices/SKILL.md b/.claude/skills/tamagui-best-practices/SKILL.md index 05fd349..fd104ff 100644 --- a/.claude/skills/tamagui-best-practices/SKILL.md +++ b/.claude/skills/tamagui-best-practices/SKILL.md @@ -1,393 +1,112 @@ --- name: tamagui-best-practices -description: Provides Tamagui patterns for config v4, compiler optimization, styled context, and cross-platform styling. Must use when working with Tamagui projects (tamagui.config.ts, @tamagui imports). +description: Use when working with Tamagui projects (tamagui.config.ts, @tamagui imports). --- -This skill provides patterns for Tamagui v1.x that go beyond fundamentals. It focuses on Config v4, compiler optimization, compound components, and common mistakes. +# Tamagui Best Practices -## Mandatory Context Loading +Tamagui v1.x patterns beyond fundamentals: Config v4, compiler optimization, compound components, and gotchas. -When working with these components, read the corresponding pattern file BEFORE writing code: +## Reference Files — Read Before Writing Code -| Component Type | Required Reading | Cross-Skills | -|---------------|------------------|--------------| -| Dialog, Sheet, modal overlays | @DIALOG_PATTERNS.md | | -| Form, Input, Label, validation | @FORM_PATTERNS.md | `typescript-best-practices` (zod) | -| Animations, transitions | @ANIMATION_PATTERNS.md | | -| Popover, Tooltip, Select | @OVERLAY_PATTERNS.md | | -| Compiler optimization | @COMPILER_PATTERNS.md | | -| Design tokens, theming | @DESIGN_SYSTEM.md | | +| Context | File | What it covers | +|---------|------|----------------| +| Dialog, Sheet, modal overlays | @DIALOG_PATTERNS.md | Adapt component, accessibility | +| Form, Input, Label, validation | @FORM_PATTERNS.md | zod integration | +| Animations, transitions | @ANIMATION_PATTERNS.md | drivers, enterStyle/exitStyle | +| Popover, Tooltip, Select | @OVERLAY_PATTERNS.md | overlay primitives | +| Compiler optimization | @COMPILER_PATTERNS.md | what the compiler can/cannot flatten | +| Design tokens, theming | @DESIGN_SYSTEM.md | palette, token structure | -## Config v4 Quick Start +## Config v4 -Use `@tamagui/config/v4` for simplified setup: +Minimal setup with `@tamagui/config/v4`. Add `styleCompat: 'react-native'` for new projects to align `flexBasis` with React Native behavior: ```tsx -// tamagui.config.ts import { defaultConfig } from '@tamagui/config/v4' import { createTamagui } from 'tamagui' -export const config = createTamagui(defaultConfig) - -type CustomConfig = typeof config - -declare module 'tamagui' { - interface TamaguiCustomConfig extends CustomConfig {} -} -``` - -**Recommended setting** for new projects (aligns flexBasis to React Native): -```tsx export const config = createTamagui({ ...defaultConfig, - settings: { - ...defaultConfig.settings, - styleCompat: 'react-native', - }, + settings: { ...defaultConfig.settings, styleCompat: 'react-native' }, }) -``` - -### createThemes Pattern - -For custom themes, use `createThemes` with palette/accent/childrenThemes: -```tsx -import { createThemes, defaultComponentThemes } from '@tamagui/config/v4' - -const generatedThemes = createThemes({ - componentThemes: defaultComponentThemes, - base: { - palette: { - dark: ['#050505', '#151515', /* ...12 colors */ '#fff'], - light: ['#fff', '#f8f8f8', /* ...12 colors */ '#000'], - }, - extra: { - light: { ...Colors.blue, shadowColor: 'rgba(0,0,0,0.04)' }, - dark: { ...Colors.blueDark, shadowColor: 'rgba(0,0,0,0.2)' }, - }, - }, - accent: { - palette: { dark: lightPalette, light: darkPalette }, // inverted - }, - childrenThemes: { - blue: { palette: { dark: Object.values(Colors.blueDark), light: Object.values(Colors.blue) } }, - red: { /* ... */ }, - green: { /* ... */ }, - }, -}) +declare module 'tamagui' { + interface TamaguiCustomConfig extends typeof config {} +} ``` -## Token and Theme Syntax - -### $ Prefix Rules - -- **Props**: Use `$` prefix for token references: `` -- **Theme keys**: Access without `$` in theme definitions: `{ color: palette[11] }` -- **Token access in variants**: Use `tokens.size[name]` pattern +For custom themes use `createThemes` with `palette`/`accent`/`childrenThemes` — see @DESIGN_SYSTEM.md. -### Variant Spread Operators +## Compiler Optimization Rules -Special spread operators map token categories to variant values: +- Use `styled()` variants instead of inline conditionals — dynamic values break flattening. +- Avoid `style={{ ... }}` with variables; use variant props instead. +- The `context` pattern (createStyledContext) disables compiler flattening — use for higher-level components (Button, Card), not primitives. ```tsx -const Button = styled(View, { +// BAD — breaks compiler + + +// GOOD — use variants +const Box = styled(View, { variants: { - size: { - // Maps size tokens: $1, $2, $true, etc. - '...size': (size, { tokens }) => ({ - height: tokens.size[size] ?? size, - borderRadius: tokens.radius[size] ?? size, - gap: tokens.space[size]?.val * 0.2, - }), - }, - textSize: { - // Maps fontSize tokens - '...fontSize': (name, { font }) => ({ - fontSize: font?.size[name], - }), - }, - } as const, + dark: { true: { backgroundColor: '$gray1' }, false: { backgroundColor: '$gray12' } }, + }, }) ``` -**Important**: Use `as const` on variants object until TypeScript supports inferred const generics. +## styled() vs Inline -## Compound Components with createStyledContext +- `styled()`: reusable components, variant-driven behavior, compiler-optimizable primitives. +- Inline props: one-off layout adjustments on already-styled components. +- Always use `as const` on `variants` objects (TypeScript limitation until inferred const generics). -For compound APIs like ``: +## Key Gotchas +**Prop order determines override priority** — props after a spread cannot be overridden by callers: ```tsx -import { - SizeTokens, - View, - Text, - createStyledContext, - styled, - withStaticProperties, -} from '@tamagui/core' - -// 1. Create context with shared variant types -export const ButtonContext = createStyledContext<{ size: SizeTokens }>({ - size: '$medium', -}) - -// 2. Create frame with context -export const ButtonFrame = styled(View, { - name: 'Button', - context: ButtonContext, - variants: { - size: { - '...size': (name, { tokens }) => ({ - height: tokens.size[name], - borderRadius: tokens.radius[name], - gap: tokens.space[name].val * 0.2, - }), - }, - } as const, - defaultVariants: { - size: '$medium', - }, -}) - -// 3. Create text with same context (variants auto-sync) -export const ButtonText = styled(Text, { - name: 'ButtonText', - context: ButtonContext, - variants: { - size: { - '...fontSize': (name, { font }) => ({ - fontSize: font?.size[name], - }), - }, - } as const, -}) - -// 4. Compose with withStaticProperties -export const Button = withStaticProperties(ButtonFrame, { - Props: ButtonContext.Provider, - Text: ButtonText, -}) +// width is locked; backgroundColor can be overridden + ``` -**Usage**: +**Variant order matters** — later props win: ```tsx - - -// Or override defaults from above: - - - + // scale = 3 (scale listed first) + // scale = 2 (huge overrides, comes first in variants) ``` -**Note**: `context` pattern does not work with compiler flattening. Use for higher-level components (Button, Card), not primitives (Stack, Text). - -## styleable() for Wrapper Components - -When wrapping a styled component in a functional component, use `.styleable()` to preserve variant inheritance: - +**Use `.styleable()` when wrapping styled components** — preserves variant inheritance: ```tsx -const StyledText = styled(Text) - -// WITHOUT styleable - BROKEN variant inheritance -const BrokenWrapper = (props) => - -// WITH styleable - CORRECT const CorrectWrapper = StyledText.styleable((props, ref) => ( )) - -// Now this works: -const StyledCorrectWrapper = styled(CorrectWrapper, { - variants: { - bold: { true: { fontWeight: 'bold' } }, - }, -}) -``` - -### Adding Extra Props - -Pass generic type argument for additional props: - -```tsx -type ExtraProps = { icon?: React.ReactNode } - -const IconText = StyledText.styleable((props, ref) => { - const { icon, ...rest } = props - return ( - - {icon} - - - ) -}) ``` -## accept Prop for Custom Components - -Enable token/theme resolution on non-standard props: - -```tsx -// For SVG fill/stroke that should accept theme colors -const StyledSVG = styled(SVG, {}, { - accept: { fill: 'color', stroke: 'color' } as const, -}) - -// Usage: - -// For style objects (like ScrollView's contentContainerStyle) -const MyScrollView = styled(ScrollView, {}, { - accept: { contentContainerStyle: 'style' } as const, -}) - -// Usage: -``` - -**Important**: Use `as const` on the accept object. - -## Prop Order Matters - -In `styled()`, prop order determines override priority: - -```tsx -// backgroundColor can be overridden by props -const Overridable = (props) => ( - -) -// width CANNOT be overridden (comes after spread) - -// Variant order matters too: - // scale = 3 (scale comes first) - // scale = 2 (huge overrides) -``` - -## Anti-Patterns - -### Dynamic Styles Break Optimization - -```tsx -// BAD - breaks compiler optimization - - - -// GOOD - use variants -const Box = styled(View, { - variants: { - dark: { true: { backgroundColor: '$gray1' }, false: { backgroundColor: '$gray12' } }, - }, -}) - -``` - -### Inline Functions - +**`accept` prop for non-standard token resolution** (SVG fill/stroke, contentContainerStyle): ```tsx -// BAD - new function every render - handlePress(id)} /> - -// GOOD - stable reference -const handlePressCallback = useCallback(() => handlePress(id), [id]) - +const StyledSVG = styled(SVG, {}, { accept: { fill: 'color', stroke: 'color' } as const }) ``` -### Wrong Import Paths - -```tsx -// These are different packages with different contents: -import { View } from 'tamagui' // Full UI kit -import { View } from '@tamagui/core' // Core only (smaller) -import { Button } from '@tamagui/button' // Individual component - -// Pick one approach and be consistent -``` +**Import consistency** — `tamagui`, `@tamagui/core`, and `@tamagui/button` are different packages; pick one approach per project. -### Mixing RN StyleSheet with Tamagui +**Never mix RN StyleSheet with Tamagui** — StyleSheet values don't resolve tokens. -```tsx -// BAD - StyleSheet values don't resolve tokens -const styles = StyleSheet.create({ box: { padding: 20 } }) - +**Platform branching for Dialog/Sheet** — use `Adapt` instead of `Platform.OS` checks (see @DIALOG_PATTERNS.md). -// GOOD - all Tamagui - -``` +## Quick Reference -### Platform.OS Branching for Dialog/Sheet +**Config v4 shorthands**: `bg` backgroundColor, `p` padding, `m` margin, `w` width, `h` height, `br` borderRadius -```tsx -// BAD - manual platform branching -if (Platform.OS === 'web') { - return ... -} -return ... +**Media breakpoints**: `$xs` 660px, `$sm` 800px, `$md` 1020px, `$lg` 1280px, `$xl` 1420px -// GOOD - use Adapt (see @DIALOG_PATTERNS.md) - - ... - - - - -``` +**Animation drivers**: `css` (web, default), `react-native-reanimated` (native, required) -## Fetching Current Documentation +**Token `$` prefix**: use in props (`color="$color"`), omit in theme definitions (`{ color: palette[11] }`) -For latest API details, fetch markdown docs directly: +## Fetching Current Docs ```bash -# Core docs curl -sL "https://tamagui.dev/docs/core/configuration.md" -curl -sL "https://tamagui.dev/docs/core/styled.md" -curl -sL "https://tamagui.dev/docs/core/variants.md" -curl -sL "https://tamagui.dev/docs/core/animations.md" - -# Component docs -curl -sL "https://tamagui.dev/ui/sheet.md" -curl -sL "https://tamagui.dev/ui/dialog.md" -curl -sL "https://tamagui.dev/ui/select.md" - -# Full docs index -curl -sL "https://tamagui.dev/llms.txt" +curl -sL "https://tamagui.dev/llms.txt" # full index ``` - -For HTML pages, use the web-fetch skill with appropriate selectors. - -## Quick Reference - -### Config v4 Shorthands (Tailwind-aligned) - -| Shorthand | Property | -|-----------|----------| -| `bg` | backgroundColor | -| `p` | padding | -| `m` | margin | -| `w` | width | -| `h` | height | -| `br` | borderRadius | - -### Media Query Breakpoints - -| Token | Default | Server Default | -|-------|---------|----------------| -| `$xs` | 660px | true | -| `$sm` | 800px | false | -| `$md` | 1020px | false | -| `$lg` | 1280px | false | -| `$xl` | 1420px | false | - -### Animation Drivers - -| Driver | Platform | Use Case | -|--------|----------|----------| -| `css` | Web | Default, best performance | -| `react-native-reanimated` | Native | Required for native animations | - -## Additional Pattern Files - -- @DIALOG_PATTERNS.md - Dialog, Sheet, Adapt, accessibility -- @FORM_PATTERNS.md - Form, Input, Label, validation with zod -- @ANIMATION_PATTERNS.md - Animation drivers, enterStyle/exitStyle -- @OVERLAY_PATTERNS.md - Popover, Tooltip, Select -- @COMPILER_PATTERNS.md - Compiler optimization details -- @DESIGN_SYSTEM.md - Design tokens and theming diff --git a/.claude/skills/testing-best-practices/SKILL.md b/.claude/skills/testing-best-practices/SKILL.md index 6d53c2e..e6b382f 100644 --- a/.claude/skills/testing-best-practices/SKILL.md +++ b/.claude/skills/testing-best-practices/SKILL.md @@ -1,23 +1,8 @@ --- name: testing-best-practices -description: Test layering, execution, and CI guidance across unit, integration, and e2e. Use when designing tests, writing test cases, or planning test strategy for a module. +description: Use when designing tests, writing test cases, or planning test strategy for a module. Covers unit, integration, and e2e layering. --- -## When to activate - -Engage when: -- Working with spec files (`*.spec.md`, `SPEC.md`, `spec/*.md`) -- Designing test cases or test strategy for a module -- Writing or reviewing unit, integration, or e2e tests -- After `/specout` completes -- Planning CI test lanes - -## Mutation policy - -- Default: analyze code and produce test strategy, matrix, and implementation plan. -- Do not edit spec files unless the user explicitly requests spec maintenance. -- When this skill conflicts with system/project rules, follow system/project rules. - ## Test layering policy ### Unit tests @@ -27,7 +12,6 @@ Purpose: verify individual functions and invariants in isolation. - **Data-driven**: parameterized tables covering happy path, boundary, error, and edge cases. - **Property-based**: fuzz invariants that must hold across all inputs (e.g., idempotency, sort stability, roundtrip serialization). - Derive cases from the module's public API surface: input types/constraints, output shape, error modes, invariants. -- Cover categories per function: happy path, boundary values, error cases, edge cases, invariants. ### Integration / contract tests @@ -45,7 +29,6 @@ Purpose: verify real user workflows through the full stack. - No mocks; exercise real services, databases, and APIs. - Happy-path workflows only; save edge cases for lower layers. -- Fast: each test should complete within a reasonable timeout. - **State-tolerant**: never assume a clean slate; tolerate and work with prior state. - **Idempotent**: safe to run repeatedly without cleanup between runs. - **Flow-oriented**: validate real data paths end-to-end rather than isolated assertions. @@ -56,8 +39,6 @@ Purpose: verify real user workflows through the full stack. - **No fabricated fixtures.** Derive test data from actual schemas, types, or seed data in the repo. - **No test-only hacks in product code.** No `if (process.env.TEST)` branches, no test-specific exports, no test backdoors. - **E2E must not rely on clean slate.** Tests must tolerate pre-existing data, prior test runs, and shared environments. -- **Never weaken assertions to make tests pass.** Fix the underlying issue. -- **Never hard-code values matching test assertions.** Implement general-purpose logic. ## Execution guidance @@ -95,56 +76,16 @@ Before generating test cases: - Confirm scope from the user request and inspected code context; if ambiguous, state assumptions and proceed conservatively. - For each function: input types/constraints, output shape, error modes, invariants. - Probe for state dependencies and ordering constraints between functions. -- Decide granularity from context: unit-level (individual functions) vs integration-level (compositions). ## Output format -Keep outputs actionable and concise. Use markdown, not rigid JSON schemas. - -### Test strategy - -Brief summary of what to test and at which layer: - -```markdown -## Test Strategy - -- **Unit**: [functions/modules], data-driven + property-based for [invariants] -- **Integration**: [API contracts], auth scoping, error envelopes -- **E2E**: [workflows], happy-path flows against real services -``` - -### Test matrix - -Tabular case listing per function or flow: - -```markdown -## Test Matrix - -### `functionName` - -| ID | Category | Name | Input | Expected | -|----|----------|------|-------|----------| -| HP-01 | happy_path | basic uppercase | "hello" | "HELLO" | -| BV-01 | boundary | empty string | "" | "" | -| ERR-01 | error | null input | null | INVALID_ARGUMENT | -| EDGE-01 | edge | unicode combining | "cafe\u0301" | "CAFE\u0301" | -``` - -Case ID scheme: `{CATEGORY}-{NN}` (HP, BV, ERR, EDGE). Append-only; never renumber. - -### Implementation plan +Use markdown. Produce three sections: -Ordered steps to write and run the tests: +**Test Strategy** -- one bullet per layer (unit/integration/e2e) naming the functions/flows and their coverage type. -```markdown -## Implementation Plan +**Test Matrix** -- table per function: columns `ID | Category | Name | Input | Expected`. Case ID scheme: `{CATEGORY}-{NN}` (HP, BV, ERR, EDGE). Append-only; never renumber. -1. Add factory for [fixture] using seeded data -2. Write parameterized unit tests for [function] (X cases) -3. Write integration test for [API endpoint] auth + error contract -4. Write e2e flow for [workflow] with preflight checks -5. Run suite: `[command]` -``` +**Implementation Plan** -- ordered steps: fixtures, unit tests, integration tests, e2e flows, run command. ## CI guidance @@ -156,10 +97,7 @@ Ordered steps to write and run the tests: ### Nightly full lane -- Full unit + integration + e2e suite. -- Include property-based tests with higher iteration counts. -- Idempotency verification: run critical setup paths twice, assert no side effects on second run. -- Flake detection: flag tests that pass on retry but failed initially. +Full unit + integration + e2e suite with higher property-based iteration counts. Flag tests that pass on retry but failed initially. ## Workflow diff --git a/.claude/skills/tilt/SKILL.md b/.claude/skills/tilt/SKILL.md index f829bb8..d0c85d2 100644 --- a/.claude/skills/tilt/SKILL.md +++ b/.claude/skills/tilt/SKILL.md @@ -1,6 +1,6 @@ --- name: tilt -description: Queries Tilt resource status, logs, and manages dev environments. Use when checking deployment health, investigating errors, reading logs, or working with Tiltfiles. +description: Use when checking deployment health, investigating errors, reading logs, or working with Tiltfiles. Queries Tilt resource status, logs, and manages dev environments. --- # Tilt @@ -62,24 +62,17 @@ tilt down # Stop and clean up ## Running tilt up -**tmux session rules** (mandatory — see `tmux` skill for full patterns): - -- **MUST** check `tmux has-session` before `tmux new-session` — never create duplicate sessions -- **MUST** derive session name from git root — never hardcode -- **MUST** add a window to an existing session — never create a parallel session -- **MUST** use `send-keys` — never pass inline commands to `new-session` +Follow `zmx` skill patterns — check for existing sessions, derive name from git root, use `zmx run` (not attach): ```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n tilt - tmux send-keys -t "$SESSION:tilt" 'tilt up' Enter -elif ! tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^tilt$"; then - tmux new-window -t "$SESSION" -n tilt - tmux send-keys -t "$SESSION:tilt" 'tilt up' Enter +PROJECT=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" || basename "$PWD") +SESSION="${PROJECT}-tilt" + +if zmx list --short 2>/dev/null | grep -q "^${SESSION}$"; then + echo "Tilt session already exists: $SESSION" else - echo "Tilt window already exists in session: $SESSION" + zmx run "$SESSION" 'tilt up' + echo "Started tilt in zmx session: $SESSION" fi ``` diff --git a/.claude/skills/tiltup/SKILL.md b/.claude/skills/tiltup/SKILL.md index 9cf9919..73a99ee 100644 --- a/.claude/skills/tiltup/SKILL.md +++ b/.claude/skills/tiltup/SKILL.md @@ -1,6 +1,6 @@ --- name: tiltup -description: Start Tilt dev environment in tmux, monitor bootstrap to healthy state, fix Tiltfile bugs without hard-coding or fallbacks. Use when starting tilt, debugging Tiltfile errors, or bootstrapping a dev environment. +description: Use when starting tilt, debugging Tiltfile errors, or bootstrapping a dev environment. Starts Tilt in zmx, monitors bootstrap to healthy state, fixes Tiltfile bugs without hard-coding or fallbacks. --- # Tilt Up @@ -40,8 +40,8 @@ Restart only for: Tilt version upgrades, port/host config changes, crashes, clus 1. Check if tilt is already running: ```bash - SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - tmux list-windows -t "$SESSION" -F '#{window_name}' 2>/dev/null | grep -q "^tilt$" + PROJECT=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" || basename "$PWD") + zmx list --short 2>/dev/null | grep -q "^${PROJECT}-tilt$" ``` If running, check health via `tilt get uiresources -o json` and skip to Step 3. @@ -52,20 +52,18 @@ Restart only for: Tilt version upgrades, port/host config changes, crashes, clus 3. Check for k3d cluster or Docker prerequisites. -### Step 2: Start Tilt in tmux +### Step 2: Start Tilt in zmx -Follow the `tmux` skill patterns: +Follow the `zmx` skill patterns: ```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n tilt - tmux send-keys -t "$SESSION:tilt" 'tilt up' Enter -elif ! tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^tilt$"; then - tmux new-window -t "$SESSION" -n tilt - tmux send-keys -t "$SESSION:tilt" 'tilt up' Enter +PROJECT=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" || basename "$PWD") +SESSION="${PROJECT}-tilt" + +if zmx list --short 2>/dev/null | grep -q "^${SESSION}$"; then + echo "Tilt session already exists: $SESSION" else - echo "Tilt window already exists in session: $SESSION" + zmx run "$SESSION" 'tilt up' + echo "Started tilt in zmx session: $SESSION" fi ``` @@ -103,7 +101,7 @@ After 3 fix iterations on the same resource without progress: ## Tilt Status: **Resources**: X/Y ok -**Session**: tmux $SESSION:tilt +**Session**: zmx $SESSION ### Errors (if any) - : diff --git a/.claude/skills/tmux/SKILL.md b/.claude/skills/tmux/SKILL.md deleted file mode 100644 index f9e84fc..0000000 --- a/.claude/skills/tmux/SKILL.md +++ /dev/null @@ -1,216 +0,0 @@ ---- -name: tmux -description: Patterns for running long-lived processes in tmux. Use when starting dev servers, watchers, tilt, or any process expected to outlive the conversation. ---- - -# tmux Process Management - -## Session Reuse Rules - -These are **hard requirements**, not suggestions: - -- **MUST** check `tmux has-session` before ever calling `tmux new-session` -- **MUST** derive session name from `git rev-parse --show-toplevel`, never hardcode -- **MUST** add windows to an existing project session, never create a parallel session -- **MUST** use `send-keys` to run commands, never pass inline commands to `new-session` -- **NEVER** create a new session if one already exists for the current project - -One project = one tmux session. Multiple processes = multiple windows within that session. - -## Interactive Shell Requirement - -**Use send-keys pattern for reliable shell initialization.** Creating a session spawns an interactive shell automatically. Use `send-keys` to run commands within that shell, ensuring PATH, direnv, and other initialization runs properly. - -```bash -# WRONG - inline command bypasses shell init, breaks PATH/direnv -tmux new-session -d -s "$SESSION" -n main 'tilt up' - -# CORRECT - check for session, then use send-keys in interactive shell -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n main -fi -tmux send-keys -t "$SESSION:main" 'tilt up' Enter -``` - -## Session Naming Convention - -Always derive session name from the project: - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) -``` - -For multiple processes in one project, use windows not separate sessions: -- Session: `myapp` -- Windows: `server`, `tests`, `logs` - -## Starting Processes - -### Single Process - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n main - tmux send-keys -t "$SESSION:main" '' Enter -else - echo "Session $SESSION already exists" -fi -``` - -### Adding a Window to Existing Session - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Add a new window if it doesn't exist -if ! tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^server$"; then - tmux new-window -t "$SESSION" -n server - tmux send-keys -t "$SESSION:server" 'npm run dev' Enter -else - echo "Window 'server' already exists" -fi -``` - -### Multiple Processes (Windows) - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Create session if needed, then add windows -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n server - tmux send-keys -t "$SESSION:server" 'npm run dev' Enter -fi - -# Add more windows (idempotent) -for win in tests logs; do - if ! tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^${win}$"; then - tmux new-window -t "$SESSION" -n "$win" - fi -done -tmux send-keys -t "$SESSION:tests" 'npm run test:watch' Enter -tmux send-keys -t "$SESSION:logs" 'tail -f logs/app.log' Enter -``` - -## Monitoring Output - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Last 50 lines from first window -tmux capture-pane -p -t "$SESSION" -S -50 - -# From specific window -tmux capture-pane -p -t "$SESSION:server" -S -50 - -# Check for errors -tmux capture-pane -p -t "$SESSION" -S -100 | rg -i "error|fail|exception" - -# Check for ready indicators -tmux capture-pane -p -t "$SESSION:server" -S -50 | rg -i "listening|ready|started" -``` - -## Lifecycle Management - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# List all sessions (see what exists) -tmux ls - -# List windows in current session -tmux list-windows -t "$SESSION" - -# Kill only this project's session -tmux kill-session -t "$SESSION" - -# Kill specific window -tmux kill-window -t "$SESSION:tests" - -# Send keys to a window (e.g., Ctrl+C to stop) -tmux send-keys -t "$SESSION:server" C-c -``` - -## Isolation Rules - -- **Never** use `tmux kill-server` -- **Never** kill sessions not matching current project -- **Always** derive session name from git root or pwd -- **Always** verify session name before kill operations -- Other Claude Code instances may have their own sessions running - -## When to Use tmux - -| Scenario | Use tmux? | -|----------|-----------| -| `tilt up` | Yes, always | -| Dev server (`npm run dev`, `rails s`) | Yes | -| File watcher (`npm run watch`) | Yes | -| Test watcher (`npm run test:watch`) | Yes | -| Database server | Yes | -| One-shot build (`npm run build`) | No | -| Quick command (<10s) | No | -| Need stdout directly in conversation | No | - -## Checking Process Status - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Check session exists -tmux has-session -t "$SESSION" 2>/dev/null && echo "session exists" || echo "no session" - -# List windows and their status -tmux list-windows -t "$SESSION" -F '#{window_name}: #{pane_current_command}' - -# Check if specific window exists -tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^server$" && echo "server window exists" -``` - -## Restarting a Process - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Send Ctrl+C then restart command -tmux send-keys -t "$SESSION:server" C-c -sleep 1 -tmux send-keys -t "$SESSION:server" 'npm run dev' Enter -``` - -## Common Patterns - -### Start dev server if not running - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -if ! tmux has-session -t "$SESSION" 2>/dev/null; then - tmux new-session -d -s "$SESSION" -n server - tmux send-keys -t "$SESSION:server" 'npm run dev' Enter - echo "Started dev server in tmux session: $SESSION" -elif ! tmux list-windows -t "$SESSION" -F '#{window_name}' | grep -q "^server$"; then - tmux new-window -t "$SESSION" -n server - tmux send-keys -t "$SESSION:server" 'npm run dev' Enter - echo "Added server window to session: $SESSION" -else - echo "Server already running in session: $SESSION" -fi -``` - -### Wait for server ready - -```bash -SESSION=$(basename $(git rev-parse --show-toplevel 2>/dev/null) || basename $PWD) - -# Poll for ready message -for i in {1..30}; do - if tmux capture-pane -p -t "$SESSION:server" -S -20 | rg -q "listening|ready"; then - echo "Server ready" - break - fi - sleep 1 -done -``` diff --git a/.claude/skills/typescript-best-practices/SKILL.md b/.claude/skills/typescript-best-practices/SKILL.md index 703db2b..7d1a488 100644 --- a/.claude/skills/typescript-best-practices/SKILL.md +++ b/.claude/skills/typescript-best-practices/SKILL.md @@ -1,24 +1,17 @@ --- name: typescript-best-practices -description: Provides TypeScript patterns for type-first development, making illegal states unrepresentable, exhaustive handling, and runtime validation. Must use when reading or writing TypeScript/JavaScript files. +description: Use when reading or writing TypeScript or JavaScript files (.ts, .tsx, .js, tsconfig.json). --- # TypeScript Best Practices +Follows type-first, functional, and error handling patterns from CLAUDE.md. This skill covers language-specific idioms only. + ## Pair with React Best Practices When working with React components (`.tsx`, `.jsx` files or `@react` imports), always load `react-best-practices` alongside this skill. This skill covers TypeScript fundamentals; React-specific patterns (effects, hooks, refs, component design) are in the dedicated React skill. -## Type-First Development - -Types define the contract before implementation. Follow this workflow: - -1. **Define the data model** - types, interfaces, and schemas first -2. **Define function signatures** - input/output types before logic -3. **Implement to satisfy types** - let the compiler guide completeness -4. **Validate at boundaries** - runtime checks where data enters the system - -### Make Illegal States Unrepresentable +## Make Illegal States Unrepresentable Use the type system to prevent invalid states at compile time. @@ -46,10 +39,6 @@ type OrderId = string & { readonly __brand: 'OrderId' }; // Compiler prevents passing OrderId where UserId expected function getUser(id: UserId): Promise { /* ... */ } - -function createUserId(id: string): UserId { - return id as UserId; -} ``` **Const assertions for literal unions:** @@ -63,58 +52,11 @@ function isValidRole(role: string): role is Role { } ``` -**Required vs optional fields - be explicit:** -```ts -// Creation: some fields required -type CreateUser = { - email: string; - name: string; -}; - -// Update: all fields optional -type UpdateUser = Partial; - -// Database row: all fields present -type User = CreateUser & { - id: UserId; - createdAt: Date; -}; -``` - -## Module Structure - -Prefer smaller, focused files: one component, hook, or utility per file. Split when a file handles multiple concerns or exceeds ~200 lines. Colocate tests with implementation (`foo.test.ts` alongside `foo.ts`). Group related files by feature rather than by type. - -## Functional Patterns - -- Prefer `const` over `let`; use `readonly` and `Readonly` for immutable data. -- Use `array.map/filter/reduce` over `for` loops; chain transformations in pipelines. -- Write pure functions for business logic; isolate side effects in dedicated modules. -- Avoid mutating function parameters; return new objects/arrays instead. - -## Instructions - -- Enable `strict` mode; model data with interfaces and types. Strong typing catches bugs at compile time. -- Every code path returns a value or throws; use exhaustive `switch` with `never` checks in default. Unhandled cases become compile errors. -- Propagate errors with context; catching requires re-throwing or returning a meaningful result. Hidden failures delay debugging. -- Handle edge cases explicitly: empty arrays, null/undefined inputs, boundary values. Defensive checks prevent runtime surprises. -- Use `await` for async calls; wrap external calls with contextual error messages. Unhandled rejections crash Node processes. -- Add or update focused tests when changing logic; test behavior, not implementation details. - -## Examples - -Explicit failure for unimplemented logic: -```ts -export function buildWidget(widgetType: string): never { - throw new Error(`buildWidget not implemented for type: ${widgetType}`); -} -``` - -Exhaustive switch with never check: +**Exhaustive switch with never check:** ```ts type Status = "active" | "inactive"; -export function processStatus(status: Status): string { +function processStatus(status: Status): string { switch (status) { case "active": return "processing"; @@ -128,42 +70,13 @@ export function processStatus(status: Status): string { } ``` -Wrap external calls with context: -```ts -export async function fetchWidget(id: string): Promise { - const response = await fetch(`/api/widgets/${id}`); - if (!response.ok) { - throw new Error(`fetch widget ${id} failed: ${response.status}`); - } - return response.json(); -} -``` - -Debug logging with namespaced logger: -```ts -import debug from "debug"; - -const log = debug("myapp:widgets"); - -export function createWidget(name: string): Widget { - log("creating widget: %s", name); - const widget = { id: crypto.randomUUID(), name }; - log("created widget: %s", widget.id); - return widget; -} -``` - ## Runtime Validation with Zod - Define schemas as single source of truth; infer TypeScript types with `z.infer<>`. Avoid duplicating types and schemas. - Use `safeParse` for user input where failure is expected; use `parse` at trust boundaries where invalid data is a bug. - Compose schemas with `.extend()`, `.pick()`, `.omit()`, `.merge()` for DRY definitions. - Add `.transform()` for data normalization at parse time (trim strings, parse dates). -- Include descriptive error messages; use `.refine()` for custom validation logic. -### Examples - -Schema as source of truth with type inference: ```ts import { z } from "zod"; @@ -175,74 +88,22 @@ const UserSchema = z.object({ }); type User = z.infer; -``` - -Return parse results to callers (never swallow errors): -```ts -import { z, SafeParseReturnType } from "zod"; - -export function parseUserInput(raw: unknown): SafeParseReturnType { - return UserSchema.safeParse(raw); -} - -// Caller handles both success and error: -const result = parseUserInput(formData); -if (!result.success) { - setErrors(result.error.flatten().fieldErrors); - return; -} -await submitUser(result.data); -``` -Strict parsing at trust boundaries: -```ts +// Strict parsing at trust boundaries — throws if API contract violated export async function fetchUser(id: string): Promise { const response = await fetch(`/api/users/${id}`); if (!response.ok) { throw new Error(`fetch user ${id} failed: ${response.status}`); } - const data = await response.json(); - return UserSchema.parse(data); // throws if API contract violated + return UserSchema.parse(await response.json()); } -``` - -Schema composition: -```ts -const CreateUserSchema = UserSchema.omit({ id: true, createdAt: true }); -const UpdateUserSchema = CreateUserSchema.partial(); -const UserWithPostsSchema = UserSchema.extend({ - posts: z.array(PostSchema), -}); -``` -## Configuration - -- Load config from environment variables at startup; validate with Zod before use. Invalid config should crash immediately. -- Define a typed config object as single source of truth; avoid accessing `process.env` throughout the codebase. -- Use sensible defaults for development; require explicit values for production secrets. - -### Examples - -Typed config with Zod validation: -```ts -import { z } from "zod"; - -const ConfigSchema = z.object({ - PORT: z.coerce.number().default(3000), - DATABASE_URL: z.string().url(), - API_KEY: z.string().min(1), - NODE_ENV: z.enum(["development", "production", "test"]).default("development"), -}); - -export const config = ConfigSchema.parse(process.env); -``` - -Access config values (not process.env directly): -```ts -import { config } from "./config"; - -const server = app.listen(config.PORT); -const db = connect(config.DATABASE_URL); +// Caller handles both success and error from user input +const result = UserSchema.safeParse(formData); +if (!result.success) { + setErrors(result.error.flatten().fieldErrors); + return; +} ``` ## Optional: type-fest @@ -252,19 +113,12 @@ For advanced type utilities beyond TypeScript builtins, consider [type-fest](htt - `Opaque` - cleaner branded types than manual `& { __brand }` pattern - `PartialDeep` - recursive partial for nested objects - `ReadonlyDeep` - recursive readonly for immutable data -- `LiteralUnion` - literals with autocomplete + string fallback - `SetRequired` / `SetOptional` - targeted field modifications - `Simplify` - flatten complex intersection types in IDE tooltips ```ts -import type { Opaque, PartialDeep, SetRequired } from 'type-fest'; +import type { Opaque, PartialDeep } from 'type-fest'; -// Branded type (cleaner than manual approach) type UserId = Opaque; - -// Deep partial for patch operations type UserPatch = PartialDeep; - -// Make specific fields required -type UserWithEmail = SetRequired, 'email'>; ``` diff --git a/.claude/skills/zig-best-practices/SKILL.md b/.claude/skills/zig-best-practices/SKILL.md index a222f43..e747d77 100644 --- a/.claude/skills/zig-best-practices/SKILL.md +++ b/.claude/skills/zig-best-practices/SKILL.md @@ -1,497 +1,92 @@ --- name: zig-best-practices -description: Provides Zig patterns for type-first development with tagged unions, explicit error sets, comptime validation, and memory management. Must use when reading or writing Zig files. +description: Use when reading or writing Zig files (.zig, build.zig, build.zig.zon). --- # Zig Best Practices -## Type-First Development +Follows type-first, functional, and error handling patterns from CLAUDE.md. This skill covers Zig-specific idioms only. -Types define the contract before implementation. Follow this workflow: +## Type System Patterns -1. **Define data structures** - structs, unions, and error sets first -2. **Define function signatures** - parameters, return types, and error unions -3. **Implement to satisfy types** - let the compiler guide completeness -4. **Validate at comptime** - catch invalid configurations during compilation - -### Make Illegal States Unrepresentable - -Use Zig's type system to prevent invalid states at compile time. - -**Tagged unions for mutually exclusive states:** +**Tagged unions for mutually exclusive states** — prevents invalid combinations that a struct with multiple nullable fields would allow: ```zig -// Good: only valid combinations possible const RequestState = union(enum) { idle, loading, success: []const u8, failure: anyerror, }; - -fn handleState(state: RequestState) void { - switch (state) { - .idle => {}, - .loading => showSpinner(), - .success => |data| render(data), - .failure => |err| showError(err), - } -} - -// Bad: allows invalid combinations -const RequestState = struct { - loading: bool, - data: ?[]const u8, - err: ?anyerror, -}; ``` -**Explicit error sets for failure modes:** +**Explicit error sets** — documents exactly what can fail; `anyerror` hides failure modes: ```zig -// Good: documents exactly what can fail -const ParseError = error{ - InvalidSyntax, - UnexpectedToken, - EndOfInput, -}; - -fn parse(input: []const u8) ParseError!Ast { - // implementation -} - -// Bad: anyerror hides failure modes -fn parse(input: []const u8) anyerror!Ast { - // implementation -} +const ParseError = error{ InvalidSyntax, UnexpectedToken, EndOfInput }; +fn parse(input: []const u8) ParseError!Ast { ... } ``` -**Distinct types for domain concepts:** +**Distinct types for domain IDs** — compiler prevents mixing up different ID types: ```zig -// Prevent mixing up IDs of different types const UserId = enum(u64) { _ }; const OrderId = enum(u64) { _ }; - -fn getUser(id: UserId) !User { - // Compiler prevents passing OrderId here -} - -fn createUserId(raw: u64) UserId { - return @enumFromInt(raw); -} ``` -**Comptime validation for invariants:** +**Comptime validation** — catch invalid configurations at compile time, not runtime: ```zig fn Buffer(comptime size: usize) type { - if (size == 0) { - @compileError("buffer size must be greater than 0"); - } - if (size > 1024 * 1024) { - @compileError("buffer size exceeds 1MB limit"); - } - return struct { - data: [size]u8 = undefined, - len: usize = 0, - }; -} -``` - -**Non-exhaustive enums for extensibility:** -```zig -// External enum that may gain variants -const Status = enum(u8) { - active = 1, - inactive = 2, - pending = 3, - _, -}; - -fn processStatus(status: Status) !void { - switch (status) { - .active => {}, - .inactive => {}, - .pending => {}, - _ => return error.UnknownStatus, - } + if (size == 0) @compileError("buffer size must be greater than 0"); + return struct { data: [size]u8 = undefined, len: usize = 0 }; } ``` -## Module Structure - -Larger cohesive files are idiomatic in Zig. Keep related code together: tests alongside implementation, comptime generics at file scope, public/private controlled by `pub`. Split only when a file handles genuinely separate concerns. The standard library demonstrates this pattern with files like `std/mem.zig` containing 2000+ lines of cohesive memory operations. - -## Instructions - -- Return errors with context using error unions (`!T`); every function returns a value or an error. Explicit error sets document failure modes. -- Use `errdefer` for cleanup on error paths; use `defer` for unconditional cleanup. This prevents resource leaks without try-finally boilerplate. -- Handle all branches in `switch` statements; include an `else` clause that returns an error or uses `unreachable` for truly impossible cases. -- Pass allocators explicitly to functions requiring dynamic memory; prefer `std.testing.allocator` in tests for leak detection. -- Prefer `const` over `var`; prefer slices over raw pointers for bounds safety. Immutability signals intent and enables optimizations. -- Avoid `anytype`; prefer explicit `comptime T: type` parameters. Explicit types document intent and produce clearer error messages. -- Use `std.log.scoped` for namespaced logging; define a module-level `log` constant for consistent scope across the file. -- Add or update tests for new logic; use `std.testing.allocator` to catch memory leaks automatically. - -## Examples - -Explicit failure for unimplemented logic: -```zig -fn buildWidget(widget_type: []const u8) !Widget { - return error.NotImplemented; -} -``` +## Memory Management -Propagate errors with try: -```zig -fn readConfig(path: []const u8) !Config { - const file = try std.fs.cwd().openFile(path, .{}); - defer file.close(); - const contents = try file.readToEndAlloc(allocator, max_size); - return parseConfig(contents); -} -``` +- Pass allocators explicitly to every function that allocates; no global allocator state. +- Place `defer resource.deinit()` immediately after acquisition — keeps cleanup co-located with creation. +- Use `errdefer` for cleanup on error paths; `defer` for unconditional cleanup. +- Use arena allocators for batch/temporary work; they free everything at once. +- Use `std.testing.allocator` in tests — reports leaks with stack traces. -Resource cleanup with errdefer: ```zig fn createResource(allocator: std.mem.Allocator) !*Resource { const resource = try allocator.create(Resource); - errdefer allocator.destroy(resource); - + errdefer allocator.destroy(resource); // runs only on error resource.* = try initializeResource(); return resource; } ``` -Exhaustive switch with explicit default: -```zig -fn processStatus(status: Status) ![]const u8 { - return switch (status) { - .active => "processing", - .inactive => "skipped", - _ => error.UnhandledStatus, - }; -} -``` - -Testing with memory leak detection: -```zig -const std = @import("std"); +## Key Conventions -test "widget creation" { - const allocator = std.testing.allocator; - var list: std.ArrayListUnmanaged(u32) = .empty; - defer list.deinit(allocator); - - try list.append(allocator, 42); - try std.testing.expectEqual(1, list.items.len); -} -``` - -## Memory Management - -- Pass allocators explicitly; never use global state for allocation. Functions declare their allocation needs in parameters. -- Use `defer` immediately after acquiring a resource. Place cleanup logic next to acquisition for clarity. -- Prefer arena allocators for temporary allocations; they free everything at once when the arena is destroyed. -- Use `std.testing.allocator` in tests; it reports leaks with stack traces showing allocation origins. - -### Examples - -Allocator as explicit parameter: -```zig -fn processData(allocator: std.mem.Allocator, input: []const u8) ![]u8 { - const result = try allocator.alloc(u8, input.len * 2); - errdefer allocator.free(result); - - // process input into result - return result; -} -``` - -Arena allocator for batch operations: -```zig -fn processBatch(items: []const Item) !void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - - for (items) |item| { - const processed = try processItem(allocator, item); - try outputResult(processed); - } - // All allocations freed when arena deinits -} -``` - -## Logging - -- Use `std.log.scoped` to create namespaced loggers; each module should define its own scoped logger for filtering. -- Define a module-level `const log` at the top of the file; use it consistently throughout the module. -- Use appropriate log levels: `err` for failures, `warn` for suspicious conditions, `info` for state changes, `debug` for tracing. - -### Examples - -Scoped logger for a module: -```zig -const std = @import("std"); -const log = std.log.scoped(.widgets); - -pub fn createWidget(name: []const u8) !Widget { - log.debug("creating widget: {s}", .{name}); - const widget = try allocateWidget(name); - log.debug("created widget id={d}", .{widget.id}); - return widget; -} - -pub fn deleteWidget(id: u32) void { - log.info("deleting widget id={d}", .{id}); - // cleanup -} -``` - -Multiple scopes in a codebase: -```zig -// In src/db.zig -const log = std.log.scoped(.db); - -// In src/http.zig -const log = std.log.scoped(.http); - -// In src/auth.zig -const log = std.log.scoped(.auth); -``` - -## Comptime Patterns - -- Use `comptime` parameters for generic functions; type information is available at compile time with zero runtime cost. -- Prefer compile-time validation over runtime checks when possible. Catch errors during compilation rather than in production. -- Use `@compileError` for invalid configurations that should fail the build. - -### Examples - -Generic function with comptime type: -```zig -fn max(comptime T: type, a: T, b: T) T { - return if (a > b) a else b; -} -``` - -Compile-time validation: -```zig -fn createBuffer(comptime size: usize) [size]u8 { - if (size == 0) { - @compileError("buffer size must be greater than 0"); - } - return [_]u8{0} ** size; -} -``` - -## Avoiding anytype - -- Prefer `comptime T: type` over `anytype`; explicit type parameters document expected constraints and produce clearer errors. -- Use `anytype` only when the function genuinely accepts any type (like `std.debug.print`) or for callbacks/closures. -- When using `anytype`, add a doc comment describing the expected interface or constraints. - -### Examples - -Prefer explicit comptime type (good): -```zig -fn sum(comptime T: type, items: []const T) T { - var total: T = 0; - for (items) |item| { - total += item; - } - return total; -} -``` - -Avoid anytype when type is known (bad): -```zig -// Unclear what types are valid; error messages will be confusing -fn sum(items: anytype) @TypeOf(items[0]) { - // ... -} -``` - -Acceptable anytype for callbacks: -```zig -/// Calls `callback` for each item. Callback must accept (T) and return void. -fn forEach(comptime T: type, items: []const T, callback: anytype) void { - for (items) |item| { - callback(item); - } -} -``` - -Using @TypeOf when anytype is necessary: -```zig -fn debugPrint(value: anytype) void { - const T = @TypeOf(value); - if (@typeInfo(T) == .Pointer) { - std.debug.print("ptr: {*}\n", .{value}); - } else { - std.debug.print("val: {}\n", .{value}); - } -} -``` - -## Error Handling Patterns - -- Define specific error sets for functions; avoid `anyerror` when possible. Specific errors document failure modes. -- Use `catch` with a block for error recovery or logging; use `catch unreachable` only when errors are truly impossible. -- Merge error sets with `||` when combining operations that can fail in different ways. - -### Examples - -Specific error set: -```zig -const ConfigError = error{ - FileNotFound, - ParseError, - InvalidFormat, -}; - -fn loadConfig(path: []const u8) ConfigError!Config { - // implementation -} -``` - -Error handling with catch block: -```zig -const value = operation() catch |err| { - std.log.err("operation failed: {}", .{err}); - return error.OperationFailed; -}; -``` - -## Configuration - -- Load config from environment variables at startup; validate required values before use. Missing config should cause a clean exit with a descriptive message. -- Define a Config struct as single source of truth; avoid `std.posix.getenv` scattered throughout code. -- Use sensible defaults for development; require explicit values for production secrets. - -### Examples - -Typed config struct: -```zig -const std = @import("std"); - -pub const Config = struct { - port: u16, - database_url: []const u8, - api_key: []const u8, - env: []const u8, -}; - -pub fn loadConfig() !Config { - const db_url = std.posix.getenv("DATABASE_URL") orelse - return error.MissingDatabaseUrl; - const api_key = std.posix.getenv("API_KEY") orelse - return error.MissingApiKey; - const port_str = std.posix.getenv("PORT") orelse "3000"; - const port = std.fmt.parseInt(u16, port_str, 10) catch - return error.InvalidPort; - - return .{ - .port = port, - .database_url = db_url, - .api_key = api_key, - .env = std.posix.getenv("ENV") orelse "development", - }; -} -``` - -## Optionals - -- Use `orelse` to provide default values for optionals; use `.?` only when null is a program error. -- Prefer `if (optional) |value|` pattern for safe unwrapping with access to the value. - -### Examples - -Safe optional handling: -```zig -fn findWidget(id: u32) ?*Widget { - // lookup implementation -} - -fn processWidget(id: u32) !void { - const widget = findWidget(id) orelse return error.WidgetNotFound; - try widget.process(); -} -``` - -Optional with if unwrapping: -```zig -if (maybeValue) |value| { - try processValue(value); -} else { - std.log.warn("no value present", .{}); -} -``` +- Prefer `const` over `var`; prefer slices over raw pointers. +- Prefer `comptime T: type` over `anytype`; explicit types produce clearer errors. Use `anytype` only for genuinely polymorphic cases (callbacks, `std.debug.print`-style). +- Exhaustive `switch`: include an `else` returning an error or `unreachable` for truly impossible cases. +- Use `std.log.scoped(.module_name)` for namespaced logging; define a module-level `const log` constant. +- Larger cohesive files are idiomatic — tests alongside implementation, comptime generics at file scope. ## Advanced Topics -Reference these guides for specialized patterns: - -- **Building custom containers** (queues, stacks, trees): See [GENERICS.md](GENERICS.md) -- **Interfacing with C libraries** (raylib, SDL, curl, system APIs): See [C-INTEROP.md](C-INTEROP.md) +- **Generic containers** (queues, stacks, trees): See [GENERICS.md](GENERICS.md) +- **C library interop** (raylib, SDL, curl): See [C-INTEROP.md](C-INTEROP.md) - **Debugging memory leaks** (GPA, stack traces): See [DEBUGGING.md](DEBUGGING.md) ## Tooling -### zigdoc - Documentation Lookup - -CLI tool for browsing Zig std library and project dependency docs. - -**Install:** -```bash -git clone https://github.com/rockorager/zigdoc -cd zigdoc -zig build install -Doptimize=ReleaseFast --prefix $HOME/.local -``` - -**Usage:** +**zigdoc** — browse std library and dependency docs: ```bash -zigdoc std.ArrayList # std lib symbol -zigdoc std.mem.Allocator # nested symbol -zigdoc vaxis.Window # project dependency (if in build.zig) +zigdoc std.mem.Allocator # std lib symbol +zigdoc vaxis.Window # project dependency zigdoc @init # create AGENTS.md with API patterns ``` -### ziglint - Static Analysis - -Linter for Zig source code enforcing coding standards. - -**Install:** +**ziglint** — static analysis with `.ziglint.zon` config: ```bash -git clone https://github.com/rockorager/ziglint -cd ziglint -zig build install -Doptimize=ReleaseFast --prefix $HOME/.local -``` - -**Usage:** -```bash -ziglint # lint current directory (uses .ziglint.zon if present) -ziglint src build.zig # lint specific paths +ziglint # lint current directory ziglint --ignore Z001 # suppress specific rule ``` -**Configuration (`.ziglint.zon`):** -```zig -.{ - .paths = .{ "src", "build.zig" }, - .rules = .{ - .Z001 = .{ .enabled = false }, - .Z024 = .{ .max_length = 80 }, - }, -} -``` - -**Inline suppression:** -```zig -fn MyBadName() void {} // ziglint-ignore: Z001 -``` - ## References - Language Reference: https://ziglang.org/documentation/0.15.2/ - Standard Library: https://ziglang.org/documentation/0.15.2/std/ -- Code Samples: https://ziglang.org/learn/samples/ - Zig Guide: https://zig.guide/ diff --git a/.claude/skills/zmx/SKILL.md b/.claude/skills/zmx/SKILL.md new file mode 100644 index 0000000..d98b436 --- /dev/null +++ b/.claude/skills/zmx/SKILL.md @@ -0,0 +1,117 @@ +--- +name: zmx +description: Use when starting dev servers, watchers, tilt, or any process expected to outlive the conversation. Provides zmx session management patterns for long-lived processes. +--- + +# zmx Process Management + +## Session Rules + +- Check `zmx list --short` before creating sessions — duplicates cause port conflicts and confusing output +- Derive session name from `git rev-parse --show-toplevel` — hardcoded names collide when multiple agent instances run concurrently +- Use `zmx run` to send commands without attaching — `zmx attach` blocks the agent's shell and makes it unresponsive +- Use separate sessions with a common project prefix for multiple processes + +One project = one session prefix. Multiple processes = multiple sessions sharing the prefix. + +## Session Naming + +```bash +PROJECT=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" || basename "$PWD") +``` + +All subsequent examples assume `PROJECT` is set. Session names follow `${PROJECT}-`: +- `myapp-server`, `myapp-tests`, `myapp-tilt` + +## Starting Processes + +```bash +SESSION="${PROJECT}-server" + +# Idempotent: skip if already running +if ! zmx list --short 2>/dev/null | grep -q "^${SESSION}$"; then + zmx run "$SESSION" 'npm run dev' +fi +``` + +For multiple processes, loop over name:command pairs: + +```bash +for name_cmd in "server:npm run dev" "tests:npm run test:watch"; do + name="${name_cmd%%:*}" + cmd="${name_cmd#*:}" + SESSION="${PROJECT}-${name}" + if ! zmx list --short 2>/dev/null | grep -q "^${SESSION}$"; then + zmx run "$SESSION" "$cmd" + fi +done +``` + +## Sending Commands + +```bash +# Run a command in a session (creates session if needed) +zmx run "${PROJECT}-main" 'cat README.md' + +# Pipe via stdin +echo "ls -lah" | zmx r "${PROJECT}-main" +``` + +## Monitoring Output + +```bash +zmx history "${PROJECT}-server" # full scrollback +zmx history "${PROJECT}-server" | tail -50 # last 50 lines +zmx history "${PROJECT}-server" | rg -i "error|fail" # check for errors +zmx history "${PROJECT}-server" | rg -i "listening|ready" # check for ready +``` + +## Waiting for Completion + +```bash +zmx wait "${PROJECT}-tests" # block until done +zmx wait "${PROJECT}-build" "${PROJECT}-lint" # wait for multiple +``` + +## Lifecycle + +```bash +zmx list # all sessions +zmx list --short # names only +zmx kill "${PROJECT}-server" # kill one session + +# Kill all project sessions +zmx list --short 2>/dev/null | grep "^${PROJECT}-" | while read -r s; do + zmx kill "$s" +done +``` + +## Isolation + +- Only kill sessions matching the current project prefix — other agent instances may have their own sessions running +- Always verify the session name before kill operations + +## When to Use zmx + +| Scenario | Use zmx? | +|----------|----------| +| `tilt up` | Yes, always | +| Dev server (`npm run dev`, `rails s`) | Yes | +| File watcher (`npm run watch`) | Yes | +| Test watcher (`npm run test:watch`) | Yes | +| Database server | Yes | +| One-shot build (`npm run build`) | No | +| Quick command (<10s) | No | +| Need stdout directly in conversation | No | + +## Polling for Readiness + +```bash +for i in {1..30}; do + if zmx history "${PROJECT}-server" 2>/dev/null | tail -20 | rg -q "listening|ready"; then + echo "Server ready" + break + fi + sleep 1 +done +``` diff --git a/.github/workflows/codex-plugin-scanner.yml b/.github/workflows/codex-plugin-scanner.yml new file mode 100644 index 0000000..0618c08 --- /dev/null +++ b/.github/workflows/codex-plugin-scanner.yml @@ -0,0 +1,24 @@ +name: Codex Plugin Quality Gate + +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: codex-plugin-scanner-${{ github.ref }} + cancel-in-progress: true + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 10 + permissions: + contents: read + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - name: Codex plugin scanner + uses: hashgraph-online/hol-codex-plugin-scanner-action@b45d6b583afe05819b24edc8e6418c9ad2e1f1d0 # v1 + with: + plugin_dir: "." diff --git a/.gitignore b/.gitignore index 09af451..902e5c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # Ralph loop state (managed by hook, not tracked in VCS) -.claude/ralph-loop.local.md +.rl/ # Python bytecode __pycache__/ diff --git a/CLAUDE.local.md b/CLAUDE.local.md index 4775860..f7a76b9 100644 --- a/CLAUDE.local.md +++ b/CLAUDE.local.md @@ -68,13 +68,25 @@ claude plugin update ## Publishing Plugin Updates -Keep plugin versions in sync in both locations: +**Any change to plugin source files requires a version bump.** Lefthook pre-commit and pre-push hooks enforce this automatically via `claude-code/scripts/check-plugin-versions.sh`. -1. `claude-code/plugins//.claude-plugin/plugin.json` -2. `claude-code/.claude-plugin/marketplace.json` +### Version bump checklist -Then refresh marketplace metadata: +1. Bump `version` in `claude-code/plugins//.claude-plugin/plugin.json` +2. Bump `version` for the same plugin in `claude-code/.claude-plugin/marketplace.json` +3. Both versions must match — the hook fails on mismatch +4. Stage `plugin.json` alongside your source changes — the hook warns if source files changed without it + +### After committing + +Refresh the local marketplace so the runtime picks up the new version: ```bash claude plugin marketplace update 0xbigboss-plugins ``` + +### Manual check + +```bash +claude-code/scripts/check-plugin-versions.sh +``` diff --git a/CLAUDE.md b/CLAUDE.md index 03baf5a..a592a19 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,54 +8,42 @@ Applies to agents. Follow these directives as system-level behavior. ## Core principles - Explore relevant code before proposing changes; understand context first. -- Work idiomatically and safely; align with project conventions and architecture (contributions integrate seamlessly). -- Keep changes minimal and focused; implement only what is requested or clearly necessary (avoid unrequested features, refactoring, or flexibility). -- Treat `## Agentic delivery flow (canonical)` as the process of record; if deviating, record a waiver with rationale. -- Fail fast with visible evidence; validate understanding with minimal repros/tests (quick feedback prevents wasted effort). -- Use available tools/documentation before coding; verify assumptions (evidence-based development catches errors early). +- Work idiomatically and safely; align with project conventions and architecture. +- Keep changes minimal and focused; implement only what is requested or clearly necessary. +- Fail fast with visible evidence; validate understanding with minimal repros/tests. +- Use available tools/documentation before coding; verify assumptions. - Verify changes with project tooling (tests, linters, builds) before claiming done. -- Document project context inline when needed; complete implementations or fail explicitly with descriptive errors (partial work masks bugs). -- Security: require explicit authorization before accessing secrets/keychains. +- Complete implementations or fail explicitly with descriptive errors; partial work masks bugs. - Extract configuration immediately; magic numbers, URLs, ports, timeouts, and feature flags belong in config, not code. -## Agentic delivery flow (canonical) -- Flow: `SPEC -> PLAN -> TDD -> DEV -> E2E -> REVIEW -> CI -> MERGE`. -- Command discovery order (`DEV`/`E2E`): repo task runner/scripts -> repo docs -> project defaults (`tilt up`, `silo up`) -> ask user. -- High-risk changes (approval required in `PLAN`): schema/data migrations, auth/security boundaries, public API/contract changes, infra/deploy/runtime config. -- Low-risk skip path: docs/comments/non-runtime changes may use `SPEC -> PLAN -> REVIEW -> CI -> MERGE`. -- Traceability rule: every change maps `REQ-*` -> tests -> commit/PR. -- Handoff contract (`REVIEW`, required): assumptions, changed files, commands run, results, unresolved risks. -- Phase gates: - - `SPEC` gate: IDs, invariants, non-goals, acceptance criteria present; risk tags required when high-risk items exist. Load `spec-best-practices` skill. File must be named `SPEC.md`, colocated with the code it describes. - - `PLAN` gate: task graph with files/types/tests and explicit risk classification. - - `TDD` gate: failing tests first; required test layers added (unit/integration/contract/property/regression as applicable). - - `DEV` gate: local environment boots; deterministic health checks pass (readiness, migrations, seed data, key APIs, timeout budgets). - - `E2E` gate: happy path and failure modes pass (timeouts, retries, auth edge cases, partial outages). - - `CI` gate: bounded auto-repair retries; flake policy enforced (retry cap + quarantine rule); fail hard on policy/security violations. - - `MERGE` gate: all required gates pass, or waiver recorded with rationale. +## Agentic delivery flow + +Agent owns `SPEC → PLAN → TDD → DEV → E2E`. Stop here. Review, CI, and merge are human decisions. + +- Command discovery order: repo task runner/scripts → repo docs → project defaults (`tilt up`, `silo up`) → ask user. +- High-risk changes (approval required in PLAN): schema/data migrations, auth/security boundaries, public API/contract changes, infra/deploy/runtime config. +- Low-risk skip path: docs/comments/non-runtime changes may use `SPEC → PLAN → DEV`. +- Traceability: every change maps REQ-* → tests → commit. +- If deviating from this flow, record a waiver with rationale. +- Gates: + - SPEC: IDs, invariants, non-goals, acceptance criteria. Risk tags when high-risk items exist. Load `spec-best-practices`. File named `SPEC.md`, colocated. + - PLAN: task graph with files/types/tests and risk classification. + - TDD: failing tests first. + - DEV: local environment boots; health checks pass. + - E2E: happy path and failure modes pass against live dev environment. ## Secret handling Treat secret safety as a hard requirement. -- Assume all chat content, tool inputs, and tool outputs are persisted in internal history; do not place secret values in them. +- Assume all chat content, tool inputs, and tool outputs are persisted; do not place secret values in them. - Never ask for or accept secrets in plain text via chat. - Never echo, print, or log secret values to stdout/stderr. - Never pass secrets as command arguments (`--token ...`) or inline env assignments (`TOKEN=... cmd`). -- Never write secrets to disk (`.env`, temp files, scripts, fixtures, shell profiles) unless explicitly authorized for an approved secure store. -- Use secret references and secure one-way piping from a secret manager directly into commands that read from stdin. -- Prefer commands that support `--*-stdin`; if a tool only accepts argv/env/file plaintext, stop and ask for an approved secure alternative. -- Redact suspected secrets immediately if they appear in output and notify the user. - -Preferred patterns: -- `op read | ` -- ` | docker login --password-stdin` - -Forbidden patterns: -- ` --token "$SECRET"` -- `export SECRET=...` -- `echo "secret" > .env` -- Any command that reveals a secret in chat, logs, or command output +- Never write secrets to disk unless explicitly authorized for an approved secure store. +- Pipe from secret manager to stdin: `op read | `. +- If a tool only accepts argv/env/file plaintext, stop and ask for an approved alternative. +- Redact suspected secrets immediately if they appear in output. ## Type-first development - Define types, interfaces, and data models before implementing logic. @@ -64,64 +52,29 @@ Forbidden patterns: - Schema changes drive implementation; if the types are right, the code follows. ## Functional style -- Prefer immutability: `const`, `frozen`, `readonly` types; mutate only when necessary for performance. -- Write pure functions; isolate side effects at system boundaries (I/O, network, state updates). -- Use `map`/`filter`/`reduce` and comprehensions over imperative loops where readable. -- Compose small functions over large stateful procedures; prefer pipelines over in-place mutation. -- Avoid shared mutable state; pass data explicitly rather than relying on side effects. +- Prefer immutability and pure functions; isolate side effects at system boundaries. +- Compose small functions; prefer pipelines over in-place mutation. ## Skills -Load all relevant best-practices skills immediately as your first action when working with supported languages or tools. Do not wait for the user to request skills. When multiple contexts apply, load multiple skills in parallel. +Load relevant best-practices skills immediately when working with supported languages or tools. Load multiple when contexts overlap (e.g., typescript + react for `.tsx` files). Do not wait for the user to request skills. | Context | Skill | |---------|-------| -| Python: `.py`, `pyproject.toml`, `requirements.txt` | python-best-practices | -| TypeScript: `.ts`, `.tsx`, `tsconfig.json` | typescript-best-practices | -| Electrobun: `electrobun.config.ts`, `electrobun/bun`, `electrobun/view`, Electrobun CLI commands | electrobun-best-practices | -| React: `.tsx`, `.jsx`, `@react` imports | react-best-practices | -| Go: `.go`, `go.mod` | go-best-practices | -| Zig: `.zig`, `build.zig`, `build.zig.zon` | zig-best-practices | -| Playwright: `.spec.ts`, `.test.ts` with `@playwright/test` | playwright-best-practices | -| Tilt: `Tiltfile`, tilt commands | tilt | -| Tamagui: `tamagui.config.ts`, `@tamagui` imports | tamagui-best-practices | -| Atlas: `atlas.hcl`, `.hcl` schema files, Atlas CLI commands | atlas-best-practices | -| Spec authoring: creating, reviewing, or updating `SPEC.md` files | spec-best-practices | -| Spec-derived test design: `*.spec.md`, `spec/*.md`, `SPEC.md` when designing tests | testing-best-practices | -| Spec alignment: spec file + implementation in context | specalign | -| Git: commits, branches, PRs, history rewriting | git-best-practices | - -### Multi-skill combinations - -Load all applicable skills together when contexts overlap: -- **TypeScript + React**: All React components (`.tsx`, `.jsx`) - always load both skills together -- **TypeScript + Electrobun**: Electrobun desktop apps (`electrobun.config.ts`, `electrobun/bun`, `electrobun/view`) - always load both skills together -- **TypeScript + React + Playwright**: React component E2E tests with `@playwright/test` -- **TypeScript + React + Tamagui**: React Native/web components with `@tamagui` imports -- **TypeScript + Playwright**: Non-React test files with `@playwright/test` imports -- **Python + Tilt**: Python services in a Tilt-managed dev environment -- **Go + Tilt**: Go services in a Tilt-managed dev environment -- **testing-best-practices + [language]**: Load testing skill alongside the project's language skill when designing tests from specs -- **tilt + tmux**: Always load both when running `tilt up` or any long-lived process in tmux -- **tilt + tiltup**: Always load both when starting tilt or fixing Tiltfile errors -- **spec-best-practices + specalign**: Load both when reviewing or updating existing specs against implementation -- **spec-best-practices + testing-best-practices**: Load both when deriving test strategy from a spec -- **spec-best-practices + /specout**: Load skill before running specout interview -- **specalign + testing-best-practices**: Load both when a spec file and its implementation are in context -- **e2e + playwright-best-practices**: Load both when running or fixing Playwright e2e tests -- **e2e + specalign**: Load both when e2e failures may indicate spec drift -- **git-best-practices + /commit**: Load skill when using the commit command or making any commits -- **git-best-practices + /rewrite-history**: Load skill when rewriting branch history before PR - -### When to invoke skills - -Invoke skills proactively: -- Reading code: understand expected patterns before analyzing -- Writing or modifying code: apply correct conventions during implementation -- Reviewing or debugging: identify violations against established patterns -- Exploring unfamiliar code: load the language skill to interpret idioms correctly - -Skills provide error handling conventions, code quality patterns, type-first development guidance, and review standards specific to each language or tool. +| Python (`.py`, `pyproject.toml`) | python-best-practices | +| TypeScript (`.ts`, `.tsx`, `tsconfig.json`) | typescript-best-practices | +| Electrobun (`electrobun.config.ts`, `electrobun/*`) | electrobun-best-practices | +| React (`.tsx`, `.jsx`, `@react` imports) | react-best-practices | +| Go (`.go`, `go.mod`) | go-best-practices | +| Zig (`.zig`, `build.zig`) | zig-best-practices | +| Playwright (`.spec.ts`, `.test.ts` with `@playwright/test`) | playwright-best-practices | +| Tilt (`Tiltfile`, tilt commands) | tilt | +| Tamagui (`tamagui.config.ts`, `@tamagui` imports) | tamagui-best-practices | +| Atlas (`atlas.hcl`, `.hcl` schema, Atlas CLI) | atlas-best-practices | +| SPEC.md authoring | spec-best-practices | +| Test design from specs | testing-best-practices | +| Spec vs implementation drift | specalign | +| Git operations | git-best-practices | ## Communication style - Concise teammate tone; plain text without emojis; brevity over perfect grammar. @@ -129,42 +82,34 @@ Skills provide error handling conventions, code quality patterns, type-first dev - Use brief bullets when it improves scanability; paths in backticks; code fences only when helpful. - Technical documentation in third person; instructions in second person; avoid first person. -## Error handling and completeness -- **Errors must be handled or returned to callers**; every error requires explicit handling at every level of the stack (universal principle across all languages). -- Fail loudly with clear messages on missing data or unsupported cases (silent failures compound into system-wide issues). -- Propagate errors up the call stack; transform exceptions into meaningful results or rethrow. -- Handle edge cases explicitly (empty inputs, nil/null, default branches). +## Code comments -## Idempotency and resilience -- Check state before changes; skip if already correct; prefer declarative over imperative. +Comment liberally. Every comment must explain intent, rationale, or non-obvious constraints — never restate what the code does. Good comments answer "why this approach?" and "what would break if this changed?" + +## Error handling +- Errors must be handled or returned to callers at every level of the stack. +- Fail loudly with clear messages; silent failures compound into system-wide issues. +- Handle edge cases explicitly (empty inputs, nil/null, default branches). - External calls need explicit timeouts; retries must be bounded with backoff. ## Test integrity -Tests verify correctness—they do not define the solution. Implement general-purpose solutions that solve the actual problem, not code that merely satisfies test cases. - -When tests fail, investigate root cause and fix the underlying issue. Do not: -- Hard-code values matching test assertions -- Add conditionals detecting test scenarios -- Weaken or remove assertions to avoid failures -- Change test expectations to match broken behavior -- Create workarounds or helper scripts that bypass the real problem - -If a test appears incorrect or the task seems infeasible, report the issue rather than gaming around it. Solutions should work correctly for all valid inputs and follow the principle that drove the test—not just its literal assertions. - -## Module structure and cohesion +Tests verify correctness — they do not define the solution. When tests fail, investigate root cause and fix the underlying issue. Do not hard-code values, weaken assertions, or game around tests. If a test appears incorrect, report the issue. -Organize code by single responsibility: each file/module handles one coherent concern. Split when a file handles genuinely separate concerns or different parts change for different reasons. Keep code together when related functionality shares types, helpers, or state. Prioritize cohesion and clear interfaces over arbitrary line counts; follow language-idiomatic conventions (see language skill files for specifics). +## Test realism +- Prefer integration tests over mocked unit tests for data flow and permissions. +- Mocks are acceptable for external services but not for your own data layer. +- If a test passes with mocks but would fail against the real system, the test is wrong. +- Before claiming done: "would this survive a manual walkthrough?" -## Refactoring rules +## Refactoring - Update all callers when changing interfaces; clean breaks over backward-compatibility shims. -- Fail on unexpected inputs; support legacy formats only when explicitly specified. - Prefer clean, complete migrations over gradual transitions. -- Commit to one implementation and delete superseded code; trust version control for history. +- Commit to one implementation and delete superseded code; trust version control. ## Implementation checklist - Functions implemented or explicitly error. -- TODOs accompanied by failing stubs that surface the incomplete work. +- TODOs accompanied by failing stubs. - Solutions work for all valid inputs; avoid hard-coded values that only satisfy test cases. - All paths handled; external calls checked for errors/timeouts. - Edge cases covered; switch/default cases present. diff --git a/codex-overrides/skills/canton-nodes/SKILL.md b/codex-overrides/skills/canton-nodes/SKILL.md new file mode 100644 index 0000000..e7fbcdc --- /dev/null +++ b/codex-overrides/skills/canton-nodes/SKILL.md @@ -0,0 +1,113 @@ +--- +name: canton-nodes +description: Canton validator node reference data. Use for participant IDs, database names, port availability, and architecture context. +--- + +# Canton Validator Nodes + +Reference data for Send's Canton validators. For connection commands, use the `sinfra` CLI. + +## Quick Access + +```bash +sinfra hosts --filter testnet # List testnet hosts +sinfra psql canton-testnet-docker --exec # Connect to postgres (with with-secrets) +sinfra grpc canton-testnet-docker health # Health check +``` + +See the `sinfra` skill for full CLI documentation. + +## Participant Info + +| Environment | UID | User | +|-------------|-----|------| +| Devnet | `send-dev-1::122033c9...` | - | +| Testnet | `send-test-cantonwallet-1::1220f760...` | `tn-validator-waxuq421oyl8wdbbj3gwizlkycqpfsyl@clients` | +| Mainnet | `send-cantonwallet-1::1220f1b0...` | `cantonwallet_validator@clients` | + +## Port Availability + +| Port | Service | Devnet | Testnet | Mainnet | +|------|---------|--------|---------|---------| +| 5001 | Ledger API | closed | open | open | +| 5002 | Admin API | open | open | open | +| 5003 | Validator HTTP | closed | open | open | +| 7575 | JSON API | open | open | open | +| 8080 | Scan API | closed | open | open | +| 8090 | External Admin | closed | open | open | +| 8091 | External Ledger | closed | open | open | +| 45432 | PQS Postgres | - | - | open | + +## Postgres Databases + +| Environment | Host | Databases | +|-------------|------|-----------| +| Devnet | canton-devnet-docker | `participant-1`, `validator` | +| Testnet | canton-testnet-docker | `participant-0`, `participant-1`, `validator` | +| Mainnet | canton-mainnet-docker | `participant-3`, `participant-4`, `validator` | +| Mainnet PQS | canton-mainnet-docker:45432 | `pqs-app-provider-4` (via pqs-postgres) | + +## API URLs + +**Testnet:** +``` +http://canton-testnet-docker.tail6be6de.ts.net:{5001,5002,5003,7575,8080} +``` + +**Mainnet:** +``` +http://canton-mainnet-docker.tail6be6de.ts.net:{5001,5002,5003,7575,8080} +``` + +**Kubernetes (in-cluster):** +``` +http://canton-testnet-proxy.tailscale.svc.cluster.local:{5001,5002,5003,8080} +http://canton-mainnet-proxy.tailscale.svc.cluster.local:{5001,5002,5003,8080,45432} +``` + +### Scan API (port 8080) + +Nginx caching reverse proxy round-robining across 13 SV scan endpoints. GET cached 30s, POST cached 5s. Returns `X-Cache-Status` header (MISS/HIT). + +```bash +# Health check +curl http://canton-mainnet-docker.tail6be6de.ts.net:8080/healthz + +# DSO info +curl http://canton-mainnet-docker.tail6be6de.ts.net:8080/api/scan/v0/dso + +# From K8s pod +curl http://canton-mainnet-proxy.tailscale.svc.cluster.local:8080/api/scan/v0/dso +``` + +Config on server: `/data/canton/{testnet,mainnet}/compose-scan-proxy.yaml` + +## Public Endpoints (Cloudflare Tunnels) + +| Endpoint | Env | API | Backend | +|----------|-----|-----|---------| +| `grpc-ta.cantonwallet.com` | testnet | Admin | `envoy-proxy:8090` | +| `grpc-tl.cantonwallet.com` | testnet | Ledger | `envoy-proxy:8091` | +| `json-api-testnet.cantonwallet.com` | testnet | JSON API | `participant:7575` | +| `grpc-ma.cantonwallet.com` | mainnet | Admin | `envoy-proxy:8090` | +| `grpc-ml.cantonwallet.com` | mainnet | Ledger | `envoy-proxy:8091` | +| `json-api-mainnet.cantonwallet.com` | mainnet | JSON API | `participant:7575` | +| `grpc-da.cantonwallet.com` | devnet | Admin | `envoy-proxy:8090` | +| `grpc-dl.cantonwallet.com` | devnet | Ledger | `envoy-proxy:8091` | + +gRPC convention: `grpc-{t|m|d}{a|l}.cantonwallet.com`. No validator gRPC routes via CF. + +All routes authenticated via CF Access service tokens. gRPC routes use Envoy for gRPC-Web conversion. JSON API credentials are in 1Password `api-gateway-secrets` items (`CF_ACCESS_CLIENT_ID`, `CF_ACCESS_CLIENT_SECRET`). gRPC credentials are in `grpc-tunnel` items. + +DNS and tunnel config: `terraform/infra/dns-cantonwallet.tf` + +## Architecture + +- Canton environments run as Docker containers with Tailscale sidecars +- Each environment exposes services on a unique Tailscale FQDN (`canton-testnet-docker`, etc.) +- Host machines (`send-canton01`, `send-canton02`) run multiple environment containers + +## Related + +- CLI: `sinfra` skill +- Tailscale egress: `kubernetes/infrastructure/swiss/tailscale/egress-canton.yaml` diff --git a/commands/handoff.md b/commands/handoff.md index 72ec974..62e8177 100644 --- a/commands/handoff.md +++ b/commands/handoff.md @@ -1,5 +1,5 @@ --- -allowed-tools: Bash(git:*), Bash(pwd:*), Bash(cat:*), Bash(basename:*), Bash(mkdir:*), Bash(date:*), Write(~/.claude/handoffs/**), Read(~/.claude/handoffs/**) +allowed-tools: Bash(git:*), Bash(pwd:*), Bash(cat:*), Bash(basename:*), Bash(mkdir:*), Bash(date:*), Write(~/.handoffs/**), Read(~/.handoffs/**) argument-hint: [optional focus area or additional notes] description: Generate concise handoff summary with context --- @@ -37,7 +37,7 @@ $ARGUMENTS ## Task -Write a handoff prompt to `~/.claude/handoffs/handoff---.md` where: +Write a handoff prompt to `~/.handoffs/handoff---.md` where: - `` is the repository name (or directory basename if not a git repo) - `` is derived from the branch name, or use `main` if not in a git repo - `` is the current date/time as `YYYYMMDD-HHMM` (e.g., `20260303-1430`) @@ -128,10 +128,10 @@ Omit if no special constraints beyond normal development.] ### Output Method -1. Ensure directory exists: `mkdir -p ~/.claude/handoffs` +1. Ensure directory exists: `mkdir -p ~/.handoffs` -2. Write the handoff prompt to `~/.claude/handoffs/handoff---.md` +2. Write the handoff prompt to `~/.handoffs/handoff---.md` 3. Generate the timestamp using: `date +%Y%m%d-%H%M` -4. Confirm with the path: "Handoff saved to `~/.claude/handoffs/`" +4. Confirm with the path: "Handoff saved to `~/.handoffs/`" diff --git a/commands/ralph.md b/commands/ralph.md deleted file mode 100644 index faae96c..0000000 --- a/commands/ralph.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -description: Generate handoff and start Ralph loop in one command -argument-hint: [completion criteria] [--max-iterations N] [--max-reviews N] [--no-review] [--debug] ---- - -# Ralph: Handoff + Loop Combined - -Chain `/ralphoff` and `/ralph-reviewed:ralph-loop` into a single workflow. - -## Parse Arguments - -Arguments: $ARGUMENTS - -Split arguments into two groups: -- **HANDOFF_ARGS**: Everything before any `--` flags (passed to ralphoff as completion criteria) -- **LOOP_FLAGS**: Any `--max-iterations`, `--max-reviews`, `--completion-promise`, `--no-review`, `--debug` flags (passed to ralph-loop) - -Default loop flags if not specified: -- `--max-iterations 15` -- `--max-reviews 10` -- `--completion-promise "COMPLETE"` - -## Workflow - -### Step 1: Generate Handoff Context - -Invoke the `/ralphoff` skill with HANDOFF_ARGS. - -This will: -- Analyze the current session context -- Write a context file to `~/.claude/handoffs/ralph---.md` -- Prepare the task description with success criteria and verification loops - -**Important**: Note the exact filename created (e.g., `ralph-myrepo-feature-x-20260303-1430.md`). - ---- - -### MANDATORY: Step 2 - Start Ralph Loop - -**CRITICAL: After the handoff context is saved, you MUST continue with this step. The Ralph loop is NOT active until you invoke ralph-loop. Do not stop after the handoff.** - -Invoke `/ralph-reviewed:ralph-loop` with: -- The task prompt: `Read ~/.claude/handoffs/ and complete the task described there. Work through each step, verify with the "Done when" commands. Output COMPLETE when all verifications pass, or BLOCKED if stuck after 15 iterations.` -- The parsed LOOP_FLAGS (or defaults: `--max-iterations 15 --max-reviews 10 --completion-promise "COMPLETE"`) - ---- - -### MANDATORY: Step 3 - Verify Loop Started - -**CRITICAL: You MUST complete this step. Verify the loop state file was created.** - -1. **Verify the state file exists**: - ```bash - cat "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/.claude/ralph-loop.local.md" | head -5 - ``` - -2. If the state file exists, the loop is active. Begin working on the task immediately. - -## Example Usage - -``` -/ralph # Use session context, default settings -/ralph fix all type errors # With specific completion criteria -/ralph --max-iterations 50 # Override iteration limit -/ralph --max-reviews 3 # Limit review cycles -/ralph complete the refactor --no-review # Skip Codex reviews -/ralph implement feature --debug # Enable debug logging -``` - -## Output - -After starting the loop, work begins immediately on the task from the handoff context. diff --git a/commands/ralphoff.md b/commands/ralphoff.md deleted file mode 100644 index 353d7ff..0000000 --- a/commands/ralphoff.md +++ /dev/null @@ -1,173 +0,0 @@ ---- -allowed-tools: Bash(git:*), Bash(pwd:*), Bash(cat:*), Bash(basename:*), Bash(mkdir:*), Bash(date:*), Write(~/.claude/handoffs/**), Read(~/.claude/handoffs/**) -argument-hint: [completion criteria] [--max-iterations N] [--max-reviews N] [--no-review] [--debug] -description: Generate Ralph-loop-ready handoff prompt ---- - -# Generate Ralph Loop Handoff Prompt - -Generate a prompt for handing off work to a Ralph Reviewed loop (`/ralph-reviewed:ralph-loop`). The receiving session runs in an iterative self-improvement loop with Codex review gates. The prompt must be self-contained, include clear success criteria, and support automatic verification. - -## Parse Arguments - -Arguments: $ARGUMENTS - -Split arguments into two groups: -- **HANDOFF_ARGS**: Everything before any `--` flags (used as completion criteria) -- **LOOP_FLAGS**: Any `--max-iterations`, `--max-reviews`, `--no-review`, `--debug` flags (passed to ralph-loop) - -Default loop flags if not specified: -- `--max-iterations 30` -- `--completion-promise "COMPLETE"` - -## Git Context - -**Working Directory**: !`pwd` - -**Repository**: !`git rev-parse --show-toplevel 2>/dev/null || echo "Not a git repository"` - -**Branch**: !`git branch --show-current 2>/dev/null || echo "detached/unknown"` - -**Uncommitted changes**: !`git diff --stat 2>/dev/null || echo "None"` - -**Staged changes**: !`git diff --cached --stat 2>/dev/null || echo "None"` - -**Recent commits (last 4 hours)**: !`git log --oneline -5 --since="4 hours ago" 2>/dev/null || echo "None"` - -## Session Context - -Review the conversation history from this session to understand: -- What task was requested and why -- What approach was taken -- Decisions made or tradeoffs discussed -- Current state: what's done, in progress, or blocked -- What verification exists (tests, linters, type checks, builds) -- Known issues or incomplete items - -## Additional Focus / Completion Criteria - -HANDOFF_ARGS (the completion criteria portion of $ARGUMENTS, excluding any `--` flags) - -## Task - -Write a Ralph-loop context file to `~/.claude/handoffs/ralph---.md` where: -- `` is the repository name -- `` is derived from the branch name -- `` is the current date/time as `YYYYMMDD-HHMM` - -Example: `ralph-myapp-sen-69-20260303-1430.md` - -### Core Principle - -**A ralph handoff is just a handoff with verification commands.** Apply the same principles as `/handoff`: describe what to type, be concrete, link don't summarize, keep it short. The ralph-loop runner handles iteration state, TODO.md tracking, BLOCKED escapes, and completion promises — don't duplicate that machinery in the handoff. - -### What belongs in the ralph handoff (task-specific) - -- What to build/fix/implement — concrete steps with file paths and function names -- How to verify it's done — exact shell commands -- Task-specific gotchas and fallback strategies -- Constraints and scope boundaries - -### What does NOT belong (handled by ralph-loop runner) - -- TODO.md format or iteration workflow instructions -- Generic "if stuck, document the blocker" guidance -- Completion promise syntax (`COMPLETE`) -- State tracking file management -- Generic BLOCKED escape conditions - -### Prompting Guidelines - -- **Be concrete over comprehensive** — file paths, function names, shell commands, specific values -- **Link, don't summarize** — "See `SPEC.md` for requirements" beats paraphrasing the spec -- **Include constraints** — "Only modify files under `src/`" and "Do NOT modify `packages/core/`" -- **Merge criteria and verification** — don't list success criteria separately from verification commands. One section: "Done when these commands all pass." -- **Front-load the task** — the agent should know what to do after reading the first 10 lines -- **Keep it proportional** — single-phase tasks: 60-100 lines. Multi-phase (3+): up to 200. Over 200 means you're probably summarizing things the agent can read themselves. - -### Output Structure - -Use plain markdown (not XML tags): - -```markdown -# [1-line task summary] - -[2-4 sentences: what exists, why, key decisions already made] - -## What to do - -### 1. [First concrete task] -[Details: file paths, function names, expected behavior, code snippets if helpful] - -### 2. [Second concrete task] -[Details] - -### 3. [Continue as needed] - -## Key files - -- `path/to/file.ts` — what it does / why it matters - -## Spec - -[OPTIONAL — link to spec file, don't repeat its contents] - -## Done when - -All of these pass: -```bash -command-to-build -command-to-test -command-to-check-scope -``` -[Plus any non-command criteria like "E2E tests exist for each waitFor state"] - -## Gotchas - -[OPTIONAL — things that will trip you up: -- "Build requires `DEVELOPER_DIR=...` prefix" -- "Pre-existing changes in git stash — pop after committing" -Keep to 2-5 bullets. Omit if nothing non-obvious.] - -## Constraints - -[OPTIONAL — hard boundaries: -- "Only modify files under `apps/gui-swift/`" -- "Do not modify `packages/core/`" -Omit if no special constraints.] - -## Fallbacks - -[OPTIONAL — task-specific escape hatches for if a phase is genuinely blocked: -- "Phase 6: If native screenshot module won't compile, keep JS-based capture" -- "If sandbox blocks module cache writes, pass `-Xcc -fmodules-cache-path=/tmp/mc`" -Only include if there are known risk areas. Omit for straightforward tasks.] -``` - -### Anti-Patterns to Avoid - -- **Duplicating ralph-loop runner instructions** — TODO.md format, iteration workflow, BLOCKED syntax, completion promise format. The runner handles all of this. -- **Separate success criteria and verification sections** — merge them into "Done when" -- **Generic "if stuck" instructions** — only include task-specific fallback strategies -- **Prose architecture summaries** — link to README or SPEC -- **Over 200 lines** — if it's this long, split into smaller tasks or link to existing docs - -### Output Method - -1. Ensure directory exists: `mkdir -p ~/.claude/handoffs` - -2. Write the Ralph-loop context file to `~/.claude/handoffs/ralph---.md` - -3. Confirm with the path: "Ralph-loop context saved to `~/.claude/handoffs/`" - -### Timestamp Generation - -Generate the timestamp using: `date +%Y%m%d-%H%M` - -### Wrapper Command Format - -When using this context file with `/ralph-reviewed:ralph-loop`, the command format is: - -``` -/ralph-reviewed:ralph-loop "Read ~/.claude/handoffs/ and complete the task described there. Follow the success criteria and verification loop. Output COMPLETE when all verifications pass, or BLOCKED if stuck after 15 iterations." --completion-promise "COMPLETE" -``` diff --git a/hooks/direnv-bash-env b/hooks/direnv-bash-env new file mode 100755 index 0000000..846a5e9 --- /dev/null +++ b/hooks/direnv-bash-env @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# BASH_ENV script for Claude Code: loads direnv environment automatically. +# Bash sources this file before executing non-interactive commands (bash -c "..."). +# Paired with a cd() override so directory changes mid-command also trigger direnv. + +# Guard against recursion: direnv spawns bash to evaluate .envrc, +# which would source BASH_ENV again, causing an infinite loop. +if [ -n "$_DIRENV_BASH_ENV_ACTIVE" ]; then + return 0 2>/dev/null || exit 0 +fi +export _DIRENV_BASH_ENV_ACTIVE=1 + +# Load direnv for the initial working directory +eval "$(direnv export bash 2>/dev/null)" || true + +# Override directory-changing builtins so direnv reloads on any dir change +_direnv_reload() { eval "$(direnv export bash 2>/dev/null)" || true; } + +cd() { builtin cd "$@" || return $?; _direnv_reload; } +pushd() { builtin pushd "$@" || return $?; _direnv_reload; } +popd() { builtin popd "$@" || return $?; _direnv_reload; } diff --git a/plugins/codex-reviewer/commands/review.md b/plugins/codex-reviewer/commands/review.md index ee55f6c..f639182 100644 --- a/plugins/codex-reviewer/commands/review.md +++ b/plugins/codex-reviewer/commands/review.md @@ -1,7 +1,7 @@ --- description: Start a Codex review gate - generates handoff context for the reviewer argument-hint: ["review focus"] [--max-cycles N] -allowed-tools: Bash(git:*), Bash(pwd:*), Bash(cat:*), Bash(head:*), Bash(grep:*), Bash(basename:*), Bash(mkdir:*), Bash(date:*), Write(**/.claude/codex-review.local.md), Read(~/.claude/handoffs/**), Read(**/.claude/codex-review.local.md) +allowed-tools: Bash(git:*), Bash(pwd:*), Bash(cat:*), Bash(head:*), Bash(grep:*), Bash(basename:*), Bash(mkdir:*), Bash(date:*), Write(**/.claude/codex-review.local.md), Read(~/.handoffs/**), Read(**/.claude/codex-review.local.md) --- # Start Codex Review Gate @@ -14,7 +14,7 @@ Arguments: $ARGUMENTS Parse the following from arguments: - **FOCUS**: Everything before the first `--` flag (the review focus) -- **--max-cycles**: Number (default: 10) - maximum review cycles before auto-approve +- **--max-cycles**: Number (default: 30) - maximum review cycles before auto-approve **Parsing rules:** 1. Text before any `--` flags is the review focus @@ -22,8 +22,8 @@ Parse the following from arguments: 3. If no focus text provided, use the default focus **Examples:** -- `/codex-reviewer:review` → default focus, max 10 cycles -- `/codex-reviewer:review "focus on security vulnerabilities"` → security review, max 10 cycles +- `/codex-reviewer:review` → default focus, max 30 cycles +- `/codex-reviewer:review "focus on security vulnerabilities"` → security review, max 30 cycles - `/codex-reviewer:review --max-cycles 3` → default focus, max 3 cycles - `/codex-reviewer:review "verify error handling" --max-cycles 10` → error handling review, max 10 cycles @@ -57,7 +57,7 @@ The review gate is already active. **ALLOWED when gate is active:** - Regenerate the handoff file (`/handoff`) to capture your latest work -- The handoff file at `~/.claude/handoffs/` can be updated freely +- The handoff file at `~/.handoffs/` can be updated freely **FORBIDDEN when gate is active:** - Writing to the state file (`.claude/codex-review.local.md`) @@ -92,7 +92,7 @@ Invoke the `/handoff` skill with the review focus. **Default focus (no arguments):** > Generate a handoff for a code reviewer who will verify the changes made in this session. Focus on what was changed, why, and how to verify correctness. -The handoff will be written to `~/.claude/handoffs/handoff--.md`. +The handoff will be written to `~/.handoffs/handoff--.md`. --- @@ -116,11 +116,11 @@ The handoff will be written to `~/.claude/handoffs/handoff--.md ```yaml --- active: true -handoff_path: "~/.claude/handoffs/handoff--.md" +handoff_path: "~/.handoffs/handoff--.md" task_description: null files_changed: ["file1.ts", "file2.ts"] review_count: 0 -max_review_cycles: +max_review_cycles: review_history: [] timestamp: "[current ISO timestamp]" debug: false @@ -133,7 +133,7 @@ Review gate active. Run `/codex-reviewer:cancel` to abort. **Important:** - Use the actual handoff path from Step 1 -- Use the `--max-cycles` value if provided, otherwise default to 10 +- Use the `--max-cycles` value if provided, otherwise default to 30 - The stop hook reads `handoff_path` at review time --- @@ -164,6 +164,6 @@ Review gate active. Run `/codex-reviewer:cancel` to abort. ## Notes - Codex review can take 5-20+ minutes depending on complexity -- Max 10 review cycles by default; use `--max-cycles N` to customize +- Max 30 review cycles by default; use `--max-cycles N` to customize - Use `/codex-reviewer:cancel` to abort the review gate - Debug logs at `~/.claude/codex/{session_id}/crash.log` diff --git a/plugins/codex-reviewer/hooks/codex-reviewer-stop-hook.ts b/plugins/codex-reviewer/hooks/codex-reviewer-stop-hook.ts index 24a6233..23352de 100755 --- a/plugins/codex-reviewer/hooks/codex-reviewer-stop-hook.ts +++ b/plugins/codex-reviewer/hooks/codex-reviewer-stop-hook.ts @@ -525,7 +525,7 @@ function parseStateFile(content: string): ReviewState | null { task_description: state.task_description ?? null, files_changed: state.files_changed ?? [], review_count: state.review_count ?? 0, - max_review_cycles: state.max_review_cycles ?? 10, + max_review_cycles: state.max_review_cycles ?? 30, review_history: state.review_history ?? [], timestamp: state.timestamp || new Date().toISOString(), debug: state.debug ?? false, diff --git a/plugins/ralph-reviewed/.claude-plugin/plugin.json b/plugins/ralph-reviewed/.claude-plugin/plugin.json index 4c832d3..cd82f74 100644 --- a/plugins/ralph-reviewed/.claude-plugin/plugin.json +++ b/plugins/ralph-reviewed/.claude-plugin/plugin.json @@ -5,6 +5,6 @@ "name": "Allen", "email": "bigboss@metalrodeo.xyz" }, - "version": "1.8.8", + "version": "3.0.0", "commands": "./commands/" } diff --git a/plugins/ralph-reviewed/QA.md b/plugins/ralph-reviewed/QA.md new file mode 100644 index 0000000..8ba820b --- /dev/null +++ b/plugins/ralph-reviewed/QA.md @@ -0,0 +1,124 @@ +# Ralph Reviewed QA + +End-to-end test of the ralph-reviewed plugin v3.0.0. Run this from a test repo to verify the full loop works — `rl` CLI, stop hook, Codex review gate, feedback cycle. + +## Prerequisites + +- Test repo at `/tmp/ralph-test` with a broken `math.ts` (see setup below) +- Plugin loaded via `--plugin-dir ~/code/dotfiles/claude-code/plugins/ralph-reviewed` +- `rl` CLI on PATH (auto-installed by `/ralph-reviewed:ralph-loop` on first use, or manually: `git clone https://github.com/0xbigboss/rl /tmp/rl && cd /tmp/rl && bun install && bun build src/cli.ts --compile --outfile ~/.local/bin/rl`) +- `codex` CLI installed and authenticated (for review gate tests) + +## Setup + +If the test repo doesn't exist, create it: + +```bash +rm -rf /tmp/ralph-test && mkdir -p /tmp/ralph-test && cd /tmp/ralph-test && git init -q && echo '# test' > README.md && git add -A && git commit -q -m "init" +cat > /tmp/ralph-test/math.ts <<'EOF' +// TODO: implement add, subtract, multiply, divide +export function add(a: number, b: number): number { + return 0; // broken +} + +export function divide(a: number, b: number): number { + return a / b; // no zero check +} +EOF +git -C /tmp/ralph-test add -A && git -C /tmp/ralph-test commit -q -m "add broken math module" +``` + +## Test: Full loop with review + +``` +/ralph-reviewed:ralph-loop "Fix math.ts: 1) add() should return a+b, 2) add subtract(a,b) and multiply(a,b), 3) divide() needs to throw on division by zero. Commit each fix separately. Use .rl/rl log for each phase." --max-iterations 10 --max-reviews 3 +``` + +### What to verify during the loop + +**rl discovery:** +- [ ] `rl` is found on PATH (via `command -v rl`) or falls back to `bunx @0xbigboss/rl` +- [ ] No references to `find ~/.claude/plugins` or embedded script paths + +**rl init:** +- [ ] `.rl/state.json` exists with lean schema: `active`, `iteration`, `max_iterations`, `timestamp`, `review_enabled`, `review_count`, `max_review_cycles`, `debug` — no `completion_promise`, `original_prompt`, `pending_feedback`, `review_history` +- [ ] `.rl/prompt.md` exists with the task text +- [ ] `.rl/rl` symlink exists and works (`rl status` returns output) +- [ ] `.rl/` is in `.git/info/exclude` + +**Agent work:** +- [ ] Agent uses `.rl/rl log phase` for each phase +- [ ] Agent uses `.rl/rl log commit` after each commit +- [ ] Agent uses `.rl/rl done` to signal completion (not bare `COMPLETE` text) +- [ ] Iteration headers from the stop hook show no denominators (e.g. `Iteration 1` not `Iteration 1/10`) + +**Stop hook uses rl CLI:** +- [ ] Stop hook reads state via `rl status --json` (no local `parseStateFile`) +- [ ] Stop hook reads prompt via `rl prompt --json` (no local `readPrompt`) +- [ ] Stop hook updates state via `rl state set` (no local `serializeState`) +- [ ] Stop hook logs reviews via `rl log review` (no local `appendLog`) + +**Codex review gate (if review enabled):** +- [ ] Stop hook triggers Codex review after `.rl/rl done` +- [ ] Review output saved to `.rl/codex-review-*.txt` (not deleted) +- [ ] If rejected: feedback is plain text, fed back to agent, agent gets another iteration +- [ ] If approved: loop exits cleanly with approval message +- [ ] Reviewer does NOT flag `.rl/` directory, process compliance, or git hygiene +- [ ] Reviewer focuses on code correctness and task requirements + +**After loop ends:** +- [ ] `state.json` is deleted (cleanup on approve/max/blocked) +- [ ] `prompt.md`, `log.jsonl`, review outputs persist in `.rl/` +- [ ] `rl clean` removes the entire `.rl/` directory + +### What to watch for (bugs and edge cases) + +**rl CLI:** +- Does `rl init` fail if `.rl/` already exists from a previous run? +- Does `rl done` fail if `state.json` is missing? +- Does `rl log` fail if `.rl/` doesn't exist yet? +- Does the `.rl/rl` symlink survive across iterations? + +**Stop hook:** +- Does the hook correctly detect `completion_claimed` from `rl status --json`? +- Does the hook clear `completion_claimed` via `rl state set` on the next iteration? +- Does the `blocked_claimed` flag work the same way? +- If the agent runs `.rl/rl done` but then the review rejects, does the next iteration NOT think completion is still claimed? +- What happens if max iterations and max reviews are both hit simultaneously? + +**Codex review:** +- Does the reviewer actually run `.rl/rl prompt` to read the task? +- Does the reviewer use `cat .rl/log.jsonl` for context? +- Does the reviewer log its own findings via `.rl/rl log decision`? +- Is the review output in `.rl/` (not `/tmp/`)? +- Does the review prompt avoid triggering pedantic behavior (process compliance, delivery flow, git hygiene)? + +**Feedback cycle:** +- Is the reviewer's free-text feedback passed through to the agent cleanly? +- Does the agent get the original prompt re-injected on rejection? +- Does `getLastRejectFeedback` correctly find the last reject entry in log.jsonl? + +## Test: No-review mode + +``` +/ralph-reviewed:ralph-loop "Fix add() in math.ts to return a+b. Add subtract and multiply. Commit." --max-iterations 5 --no-review +``` + +- [ ] Loop runs without Codex +- [ ] `.rl/rl done` exits the loop immediately (no review gate) +- [ ] `.rl/rl done --blocked` exits with BLOCKED message + +## Test: rl clean + +After any test: + +```bash +rl clean +ls .rl/ # should fail — directory gone +``` + +## Cleanup + +```bash +rm -rf /tmp/ralph-test +``` diff --git a/plugins/ralph-reviewed/README.md b/plugins/ralph-reviewed/README.md index e6bd564..8a7bca8 100644 --- a/plugins/ralph-reviewed/README.md +++ b/plugins/ralph-reviewed/README.md @@ -73,23 +73,23 @@ Start an iterative loop with review gates. | Flag | Default | Description | |------|---------|-------------| -| `--max-iterations` | 50 | Max work iterations before auto-stop | +| `--max-iterations` | 30 | Max work iterations before auto-stop | | `--max-reviews` | max-iterations | Max review cycles before force-complete | -| `--completion-promise` | COMPLETE | Phrase that signals completion | | `--no-review` | false | Disable Codex review gate | -| `--debug` | false | Write debug logs to `/tmp/ralph-reviewed-{session_id}.log` | +| `--debug` | false | Write debug logs | + +**Completion:** The agent runs `.rl/rl done` when finished, or `.rl/rl done --blocked` if stuck. **Examples:** ```bash # Basic usage -/ralph-reviewed:ralph-loop "Build a REST API with CRUD for todos. Include tests. Output COMPLETE when done." +/ralph-reviewed:ralph-loop "Build a REST API with CRUD for todos. Include tests." # With options /ralph-reviewed:ralph-loop "Fix the auth bug in src/auth.ts" \ --max-iterations 20 \ - --max-reviews 2 \ - --completion-promise "FIXED" + --max-reviews 2 # Without review (original ralph behavior) /ralph-reviewed:ralph-loop "Refactor the utils module" --no-review @@ -118,7 +118,7 @@ Requirements: - Return 400 with error message on invalid input - Tests for all cases -When all tests pass, output COMPLETE +When all tests pass, run `.rl/rl done` ``` ### Include Verification Steps @@ -131,23 +131,21 @@ Verification: 2. Run `npm run lint` - no errors 3. Manual check: login flow works in dev -When verified, output FIXED +When verified, run `.rl/rl done` ``` ### Set Escape Conditions -The `BLOCKED` signal is a built-in escape hatch that terminates the loop immediately without triggering a Codex review. Use it when genuinely stuck: +`.rl/rl done --blocked` is the escape hatch — terminates the loop immediately without triggering a Codex review. Use it when genuinely stuck: ``` Implement the search feature. If blocked by external issues (missing deps, pre-existing bugs, etc.): - Document what's blocking and what you tried -- Output BLOCKED +- Run `.rl/rl done --blocked` ``` -Note: BLOCKED terminates the loop without review since there's nothing to review when blocked. - ## Review Gate When Claude outputs the completion promise, Codex CLI is invoked to review: @@ -222,7 +220,7 @@ This allows Codex to run build tools, linters, and tests during review. Without ### Loop State -State is stored in `.claude/ralph-loop.local.md` at the **git repository root**. This ensures the loop survives directory changes within the repo. The file tracks: +State is stored in `.rl/state.json` at the **git repository root**. This ensures the loop survives directory changes within the repo. A structured log is appended to `.rl/log.jsonl` for progression tracking. The state file tracks: - Current iteration count - Max iterations @@ -254,13 +252,13 @@ Do not edit this file manually. Use `/ralph-reviewed:cancel-ralph` to stop. **State file not found after directory change:** - Ensure you're in a git repository (`git rev-parse --show-toplevel`) -- State file is at repo root: `{GIT_ROOT}/.claude/ralph-loop.local.md` +- State file is at repo root: `{GIT_ROOT}/.rl/state.json` - Outside git repos, directory changes will break the loop **Reviews not happening:** - Check `codex` CLI is installed: `which codex` - Check `--no-review` is not set -- Check `.claude/ralph-loop.local.md` has `review_enabled: true` +- Check `.rl/state.json` has `"review_enabled": true` **Codex can't run tooling (EPERM errors, tsc/lint/test fails):** - By default, Codex runs in read-only sandbox mode diff --git a/plugins/ralph-reviewed/commands/cancel-ralph.md b/plugins/ralph-reviewed/commands/cancel-ralph.md index 790f72f..bd8ef22 100644 --- a/plugins/ralph-reviewed/commands/cancel-ralph.md +++ b/plugins/ralph-reviewed/commands/cancel-ralph.md @@ -1,44 +1,28 @@ --- description: Cancel active Ralph Reviewed loop -allowed-tools: Bash(git:*), Bash(cat:*), Bash(rm:*), Bash(test:*) +allowed-tools: Bash(rl:*), Bash(.rl/rl:*), Bash(git:*), Bash(rm:*), Bash(test:*) --- # Cancel Ralph Reviewed Loop Stop an active Ralph loop immediately. -## Find State File - -Get git repository root (state file is at repo root): -```bash -git rev-parse --show-toplevel 2>/dev/null || pwd -``` -Store this as GIT_ROOT. - ## Check for Active Loop ```bash -test -f {GIT_ROOT}/.claude/ralph-loop.local.md && echo "found" || echo "not_found" +rl status --json 2>/dev/null || .rl/rl status --json 2>/dev/null || echo '{"error": "no loop"}' ``` -## If Found +## If Active -1. Extract current iteration for reporting: - ```bash - cat {GIT_ROOT}/.claude/ralph-loop.local.md | grep "^iteration:" | cut -d' ' -f2 - ``` - -2. Extract review count: - ```bash - cat {GIT_ROOT}/.claude/ralph-loop.local.md | grep "^review_count:" | cut -d' ' -f2 - ``` +1. Note the iteration and review count from the status output. -3. Delete the state file: +2. Delete the state file to end the loop: ```bash - rm {GIT_ROOT}/.claude/ralph-loop.local.md + rm "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/.rl/state.json" ``` -4. Report: +3. Report: ``` Cancelled Ralph Reviewed loop. - Was at iteration: {N} diff --git a/plugins/ralph-reviewed/commands/help.md b/plugins/ralph-reviewed/commands/help.md index 44cf8e5..061ffa4 100644 --- a/plugins/ralph-reviewed/commands/help.md +++ b/plugins/ralph-reviewed/commands/help.md @@ -26,14 +26,16 @@ Start an iterative loop with review gates. ``` **Options:** -- `--max-iterations ` - Max work iterations before auto-stop (default: 50) +- `--max-iterations ` - Max work iterations before auto-stop (default: 30) - `--max-reviews ` - Max review cycles before force-complete (default: --max-iterations) -- `--completion-promise ` - Phrase that signals completion (default: COMPLETE) - `--no-review` - Disable Codex review gate +- `--debug` - Enable debug logging + +**Completion:** The agent runs `.rl/rl done` when finished, or `.rl/rl done --blocked` if stuck. **Examples:** ``` -/ralph-reviewed:ralph-loop "Build a REST API with CRUD for todos. Include tests." --completion-promise "COMPLETE" --max-iterations 30 +/ralph-reviewed:ralph-loop "Build a REST API with CRUD for todos. Include tests." --max-iterations 30 /ralph-reviewed:ralph-loop "Fix the authentication bug in src/auth.ts. Tests must pass." --max-reviews 2 ``` @@ -50,7 +52,7 @@ Show this help message. **Loop won't stop:** - Use `/ralph-reviewed:cancel-ralph` to force stop -- Check that your completion promise matches exactly +- Ensure the agent ran `.rl/rl done` before stopping **Codex not reviewing:** - Ensure `codex` CLI is installed and authenticated diff --git a/plugins/ralph-reviewed/commands/ralph-loop.md b/plugins/ralph-reviewed/commands/ralph-loop.md index 8e3584f..3a922b6 100644 --- a/plugins/ralph-reviewed/commands/ralph-loop.md +++ b/plugins/ralph-reviewed/commands/ralph-loop.md @@ -1,7 +1,7 @@ --- description: Start Ralph Reviewed loop in current session -allowed-tools: Bash(git:*), Bash(mkdir:*), Bash(date:*), Bash(cat:*), Write(**/ralph-loop.local.md) -argument-hint: "task description" [--max-iterations N] [--max-reviews N] [--completion-promise TEXT] [--no-review] [--debug] +allowed-tools: Bash(rl:*), Bash(.rl/rl:*), Bash(bun:*), Bash(git:*), Bash(cat:*), Bash(command:*), Bash(mkdir:*) +argument-hint: "task description" [--max-iterations N] [--max-reviews N] [--no-review] [--debug] --- # Start Ralph Reviewed Loop @@ -14,81 +14,66 @@ Arguments: $ARGUMENTS Parse the following from arguments: - **PROMPT**: Everything before the first `--` flag (the task description) -- **--max-iterations**: Number (default: 50) +- **--max-iterations**: Number (default: 30) - **--max-reviews**: Number (optional, defaults to --max-iterations if not specified) -- **--completion-promise**: String (default: "COMPLETE") - **--no-review**: Boolean flag (default: false) -- **--debug**: Boolean flag (default: false) - writes debug logs to ~/.claude/ralphs/{session_id}/debug.log +- **--debug**: Boolean flag (default: false) ## Setup -1. Get git repository root (state file must be at repo root to survive directory changes): +1. Ensure the `rl` CLI is installed. If not on PATH, build from source: ```bash - git rev-parse --show-toplevel + command -v rl >/dev/null 2>&1 || ( + echo "Installing rl CLI..." && + git clone https://github.com/0xbigboss/rl /tmp/rl-build 2>/dev/null && + cd /tmp/rl-build && + bun install --frozen-lockfile && + mkdir -p ~/.local/bin && + bun build src/cli.ts --compile --outfile ~/.local/bin/rl && + rm -rf /tmp/rl-build && + echo "rl installed to ~/.local/bin/rl" + ) + ``` + Verify it's available: + ```bash + rl --version ``` - Store this as GIT_ROOT. If not in a git repo, use current directory. -2. Create state directory at repo root: +2. Initialize the loop (creates `.rl/` with state.json, prompt.md, and `.rl/rl` wrapper): ```bash - mkdir -p {GIT_ROOT}/.claude + rl init "{PROMPT}" --max-iterations {MAX_ITERATIONS} --max-reviews {MAX_REVIEWS} {--no-review if set} {--debug if set} ``` -3. Generate timestamp: +3. Verify setup: ```bash - date -u +"%Y-%m-%dT%H:%M:%SZ" + .rl/rl status ``` -4. Write the state file to `{GIT_ROOT}/.claude/ralph-loop.local.md`: +All subsequent `rl` calls use `.rl/rl` (wrapper created by init). -```markdown ---- -active: true -iteration: 0 -max_iterations: {MAX_ITERATIONS} -completion_promise: "{COMPLETION_PROMISE}" -original_prompt: | - {PROMPT} -timestamp: "{TIMESTAMP}" -review_enabled: {true unless --no-review} -review_count: 0 -max_review_cycles: {MAX_REVIEWS} -pending_feedback: null -debug: {true if --debug, else false} ---- -``` +## Completion and Escape -## Confirmation Output +- **Done:** run `.rl/rl done` — triggers Codex review on next stop. +- **Blocked:** run `.rl/rl done --blocked` — terminates without review. -After creating the state file, output: +## Working Guidelines -``` -Ralph Reviewed loop started. +**Pacing.** Each iteration should produce thoughtful work. Researching, loading skills, and studying patterns IS productive — don't rush to `.rl/rl done`. -Task: {first 100 chars of PROMPT}... +**Churn breaker.** If a reviewer flags the same area twice, your next iteration must be research — load skills, read docs, study the codebase. No code fix until you understand why the previous fix was wrong. -Configuration: -- Max iterations: {MAX_ITERATIONS} -- Max review cycles: {MAX_REVIEWS} -- Completion promise: {COMPLETION_PROMISE} -- Review enabled: {yes/no} -- Debug: {yes/no} (logs to ~/.claude/ralphs/{session_id}/debug.log) +**Depth before breadth.** Complete each phase fully before starting the next. -The stop hook will now intercept exit attempts. When you believe the task is complete, output: +**Skill loading.** Check `.claude/skills/` for relevant skills before writing code. -{COMPLETION_PROMISE} +**Live verification.** Before claiming completion: run e2e/integration tests if they exist, boot the dev environment if available, or note the gap. Passing unit tests with a broken application is not done. -Your work will be reviewed by Codex before the loop can end. +**Log progress** with `.rl/rl`: +- `.rl/rl log phase "starting migration"` — new phase +- `.rl/rl log commit "summary"` — after commits +- `.rl/rl log decision "chose X because..."` — design decisions +- `.rl/rl log summary "status update"` — every ~5 iterations --- -Beginning work on task... -``` - -## Completion and Escape - -- When the task is done, output `{COMPLETION_PROMISE}` — triggers Codex review before the loop ends. -- If genuinely blocked (missing dependency, impossible constraint), document the blocker and output `BLOCKED` — terminates the loop without review. - -## Begin Working - -After setup, immediately begin working on the task described in PROMPT. The stop hook handles iteration logic automatically. +Begin working on the task. The stop hook handles iteration logic automatically. diff --git a/plugins/ralph-reviewed/hooks/ralph-reviewed-stop-hook.ts b/plugins/ralph-reviewed/hooks/ralph-reviewed-stop-hook.ts index 33b6498..b41833c 100755 --- a/plugins/ralph-reviewed/hooks/ralph-reviewed-stop-hook.ts +++ b/plugins/ralph-reviewed/hooks/ralph-reviewed-stop-hook.ts @@ -5,16 +5,15 @@ * Intercepts exit attempts during an active Ralph loop. * When completion is claimed, triggers Codex review gate. * - * NOTE: Ralph loops only work within git repositories. The state file is stored - * at the git repo root (.claude/ralph-loop.local.md) to ensure it survives - * directory changes within the repo. Outside of git repos, falls back to cwd - * but directory changes will break the loop. + * State, prompt, and log operations are delegated to the `rl` CLI + * (@0xbigboss/rl). The hook only handles stdin parsing, Codex review, + * and the block/allow decision. * * Flow: * 1. Check for active loop state file (at git repo root) * 2. If no loop, allow exit - * 3. Extract last assistant message from transcript - * 4. Check for completion promise + * 3. Read state via `rl status --json` + * 4. Check for completion/blocked flags * - Not found: increment iteration, block exit, re-feed prompt * - Found: trigger review gate * 5. Review gate: @@ -23,26 +22,24 @@ * - REJECT: inject feedback, block exit, continue */ -import { readFileSync, writeFileSync, existsSync, appendFileSync, unlinkSync, mkdirSync } from "node:fs"; -import { execSync, spawnSync } from "node:child_process"; +import { readFileSync, existsSync, appendFileSync, unlinkSync, mkdirSync } from "node:fs"; +import { spawnSync } from "node:child_process"; import { homedir } from "node:os"; +import { join } from "node:path"; // --- Version --- -// Update this when making changes to help diagnose cached code issues -const HOOK_VERSION = "2026-01-08T05:00:00Z"; -const HOOK_BUILD = "v1.8.4"; +const HOOK_VERSION = "2026-03-21T14:00:00Z"; +const HOOK_BUILD = "v3.0.0"; const STDIN_TIMEOUT_MS = 2000; // --- User Config --- -// User preferences stored in ~/.claude/codex.json -// Legacy fallback: ~/.claude/ralphs/config.json interface CodexConfig { sandbox?: "read-only" | "workspace-write" | "danger-full-access"; approval_policy?: "untrusted" | "on-failure" | "on-request" | "never"; bypass_sandbox?: boolean; extra_args?: string[]; - timeout_seconds?: number; // Timeout for Codex CLI call (default: 1200 = 20 min) + timeout_seconds?: number; } interface UserConfig { @@ -55,21 +52,18 @@ const DEFAULT_CONFIG: UserConfig = { approval_policy: "never", bypass_sandbox: false, extra_args: [], - timeout_seconds: 1200, // 20 minutes + timeout_seconds: 1200, }, }; let userConfig: UserConfig = DEFAULT_CONFIG; // --- Crash Reporting --- -// Session-specific logs stored in ~/.claude/ralphs/{session_id}/ -// Pre-session logs go to ~/.claude/ralphs/startup.log const ralphsDir = `${homedir()}/.claude/ralphs`; let sessionId = "unknown"; -let sessionLogDir = ralphsDir; // Updated when session ID is known -let crashLogPath = `${ralphsDir}/startup.log`; // Before we know session ID +let sessionLogDir = ralphsDir; +let crashLogPath = `${ralphsDir}/startup.log`; -// Ensure base ralphs directory exists try { mkdirSync(ralphsDir, { recursive: true }); } catch { /* ignore */ } @@ -77,9 +71,7 @@ try { // --- Config Loading --- function loadUserConfig(): UserConfig { - // Standard location: ~/.claude/codex.json const standardPath = `${homedir()}/.claude/codex.json`; - // Legacy fallback: ~/.claude/ralphs/config.json const legacyPath = `${ralphsDir}/config.json`; for (const configPath of [standardPath, legacyPath]) { @@ -87,7 +79,6 @@ function loadUserConfig(): UserConfig { if (existsSync(configPath)) { const content = readFileSync(configPath, "utf-8"); const parsed = JSON.parse(content) as Partial; - // Merge with defaults return { codex: { ...DEFAULT_CONFIG.codex, @@ -96,7 +87,6 @@ function loadUserConfig(): UserConfig { }; } } catch (e) { - // Log but don't fail - use defaults try { appendFileSync(`${ralphsDir}/startup.log`, `[${new Date().toISOString()}] Failed to load config from ${configPath}: ${e}\n`); } catch { /* ignore */ } @@ -105,7 +95,6 @@ function loadUserConfig(): UserConfig { return DEFAULT_CONFIG; } -// Load config at startup userConfig = loadUserConfig(); function crash(msg: string, error?: unknown) { @@ -122,7 +111,6 @@ function crash(msg: string, error?: unknown) { try { appendFileSync(crashLogPath, line); } catch { - // Last resort: stderr console.error(line); } } @@ -130,103 +118,65 @@ function crash(msg: string, error?: unknown) { function setSessionId(id: string) { sessionId = id; sessionLogDir = `${ralphsDir}/${id}`; - - // Create session-specific directory try { mkdirSync(sessionLogDir, { recursive: true }); } catch { /* ignore */ } - crashLogPath = `${sessionLogDir}/crash.log`; } -// Log startup immediately to help diagnose "operation aborted" errors crash(`Hook starting - version: ${HOOK_BUILD} (${HOOK_VERSION}), PID: ${process.pid}`); // Global error handlers +let stateFilePath: string | null = null; + process.on("uncaughtException", (err) => { crash("Uncaught exception", err); - // Clean up state file to avoid re-triggering loop if (stateFilePath) { - try { - unlinkSync(stateFilePath); - crash(`Cleaned up state file on uncaught exception: ${stateFilePath}`); - } catch { /* ignore cleanup errors */ } + try { unlinkSync(stateFilePath); crash(`Cleaned up state file on uncaught exception: ${stateFilePath}`); } catch { /* ignore */ } } - // Output empty JSON to avoid trapping user console.log(JSON.stringify({})); process.exit(1); }); process.on("unhandledRejection", (reason) => { crash("Unhandled rejection", reason); - // Clean up state file to avoid re-triggering loop if (stateFilePath) { - try { - unlinkSync(stateFilePath); - crash(`Cleaned up state file on unhandled rejection: ${stateFilePath}`); - } catch { /* ignore cleanup errors */ } + try { unlinkSync(stateFilePath); crash(`Cleaned up state file on unhandled rejection: ${stateFilePath}`); } catch { /* ignore */ } } - // Output empty JSON to avoid trapping user console.log(JSON.stringify({})); process.exit(1); }); -let debugLogPath = `${ralphsDir}/debug.log`; // Updated with session ID later +let debugLogPath = `${ralphsDir}/debug.log`; let debugEnabled = process.env.RALPH_DEBUG === "1"; -let stateFilePath: string | null = null; // Set in main() for error handler access function debug(msg: string) { - // Always log to crash log for traceability const timestamp = new Date().toISOString(); const line = `[${timestamp}] [${sessionId}] ${msg}\n`; - - // Always append to session crash log (for debugging crashes) try { appendFileSync(crashLogPath, `[DEBUG] ${line}`); } catch { /* ignore */ } - - // Only write to debug log if debug mode is enabled if (!debugEnabled) return; appendFileSync(debugLogPath, line); } -import { join } from "node:path"; // --- Git Utilities --- -/** - * Get the true root git repository, walking up through submodules. - * If cwd is inside a submodule, returns the top-level parent repo. - * Returns null if not in a git repo or git command fails. - */ function getGitRoot(cwd: string): string | null { try { let dir = cwd; - - // Walk up through submodule hierarchy to find true root while (true) { - // Check if we're in a submodule (has a parent superproject) const superResult = spawnSync("git", ["rev-parse", "--show-superproject-working-tree"], { - cwd: dir, - encoding: "utf-8", - timeout: 5000, + cwd: dir, encoding: "utf-8", timeout: 5000, }); - const superproject = superResult.status === 0 ? superResult.stdout.trim() : ""; - if (!superproject) { - // No parent superproject - this is the true root (or we're not in a submodule) const rootResult = spawnSync("git", ["rev-parse", "--show-toplevel"], { - cwd: dir, - encoding: "utf-8", - timeout: 5000, + cwd: dir, encoding: "utf-8", timeout: 5000, }); - if (rootResult.status === 0 && rootResult.stdout) { - return rootResult.stdout.trim(); - } + if (rootResult.status === 0 && rootResult.stdout) return rootResult.stdout.trim(); return null; } - - // Move up to the parent repo and check again (handles nested submodules) dir = superproject; } } catch { @@ -234,584 +184,234 @@ function getGitRoot(cwd: string): string | null { } } -/** - * Determine the state file path. - * Uses git repo root if available, otherwise falls back to cwd. - */ function getStateFilePath(cwd: string): string { const gitRoot = getGitRoot(cwd); - const baseDir = gitRoot || cwd; - return join(baseDir, ".claude", "ralph-loop.local.md"); -} - -// --- Types --- - -interface HookInput { - session_id: string; - transcript_path?: string; - cwd?: string; - permission_mode?: string; - hook_event_name: "Stop"; - stop_hook_active?: boolean; -} - -/** - * Hook output schema for Claude Code stop hooks. - * See: https://code.claude.com/docs/en/hooks.md - * - * - decision: "block" prevents stopping (omit to allow) - * - reason: Message shown to Claude when blocking - * - systemMessage: Optional message shown to user regardless of decision - */ -interface HookOutput { - decision?: "block"; - reason?: string; - systemMessage?: string; - continue?: boolean; - stopReason?: string; -} - -interface ReviewIssue { - id: number; - severity: "critical" | "major" | "minor"; - description: string; -} - -interface ResolvedIssue { - id: number; - verification: string; -} - -interface ReviewHistoryEntry { - cycle: number; - decision: "APPROVE" | "REJECT"; - issues: ReviewIssue[]; - resolved: ResolvedIssue[]; - notes: string | null; + return join(gitRoot || cwd, ".rl", "state.json"); } -interface LoopState { - active: boolean; - iteration: number; - max_iterations: number; - completion_promise: string; - original_prompt: string; - timestamp: string; - review_enabled: boolean; - review_count: number; - max_review_cycles: number; - pending_feedback: string | null; - review_history: ReviewHistoryEntry[]; - debug: boolean; -} +// --- rl CLI integration --- -interface TranscriptEntry { - type?: string; - message?: { - role: string; - content: Array<{ type: string; text?: string }>; - }; - // Legacy format fallback - role?: string; - content?: Array<{ type: string; text?: string }>; -} +let rlBinary: string | null = null; -// --- State File Parsing --- +function findRl(cwd: string): string { + if (rlBinary) return rlBinary; -/** - * Normalize a review history entry to handle old schema versions. - * Ensures all required fields exist with sensible defaults. - */ -function normalizeHistoryEntry(entry: unknown): ReviewHistoryEntry { - if (typeof entry !== "object" || entry === null) { - return { - cycle: 0, - decision: "REJECT", - issues: [], - resolved: [], - notes: null, - }; - } - - const obj = entry as Record; - - // Normalize issues array - const issues: ReviewIssue[] = []; - if (Array.isArray(obj.issues)) { - for (const issue of obj.issues) { - if (typeof issue === "object" && issue !== null) { - const i = issue as Record; - issues.push({ - id: typeof i.id === "number" ? i.id : 0, - severity: (["critical", "major", "minor"].includes(String(i.severity)) - ? String(i.severity) - : "minor") as "critical" | "major" | "minor", - description: typeof i.description === "string" ? i.description : "", - }); - } - } + // 1. Check if rl is on PATH + const which = spawnSync("which", ["rl"], { encoding: "utf-8" }); + if (which.status === 0 && which.stdout.trim()) { + rlBinary = "rl"; + return rlBinary; } - // Normalize resolved array - const resolved: ResolvedIssue[] = []; - if (Array.isArray(obj.resolved)) { - for (const r of obj.resolved) { - if (typeof r === "object" && r !== null) { - const res = r as Record; - resolved.push({ - id: typeof res.id === "number" ? res.id : 0, - verification: typeof res.verification === "string" ? res.verification : "", - }); - } - } + // 2. Use .rl/rl wrapper (created by rl init with absolute path fallback) + const wrapper = join(cwd, ".rl", "rl"); + if (existsSync(wrapper)) { + rlBinary = wrapper; + return rlBinary; } - return { - cycle: typeof obj.cycle === "number" ? obj.cycle : 0, - decision: obj.decision === "APPROVE" ? "APPROVE" : "REJECT", - issues, - resolved, - notes: typeof obj.notes === "string" ? obj.notes : null, - }; + // 3. Last resort: try rl anyway (may fail) + rlBinary = "rl"; + return rlBinary; } -function parseStateFile(content: string): LoopState | null { - // Extract YAML frontmatter - const match = content.match(/^---\n([\s\S]*?)\n---/); - if (!match) return null; - - const yaml = match[1]; - const state: Partial = {}; - - // Parse each field - const lines = yaml.split("\n"); - let inPrompt = false; - let promptLines: string[] = []; - - for (const line of lines) { - if (inPrompt) { - if (line.startsWith(" ")) { - promptLines.push(line.slice(2)); - continue; - } else { - inPrompt = false; - state.original_prompt = promptLines.join("\n").trim(); - } - } - - if (line.startsWith("active:")) { - state.active = line.includes("true"); - } else if (line.startsWith("iteration:")) { - state.iteration = parseInt(line.split(":")[1].trim(), 10); - } else if (line.startsWith("max_iterations:")) { - state.max_iterations = parseInt(line.split(":")[1].trim(), 10); - } else if (line.startsWith("completion_promise:")) { - state.completion_promise = line.split(":").slice(1).join(":").trim().replace(/^["']|["']$/g, ""); - } else if (line.startsWith("original_prompt:")) { - const inline = line.split(":").slice(1).join(":").trim(); - if (inline === "|") { - inPrompt = true; - promptLines = []; - } else { - state.original_prompt = inline.replace(/^["']|["']$/g, ""); - } - } else if (line.startsWith("timestamp:")) { - state.timestamp = line.split(":").slice(1).join(":").trim().replace(/^["']|["']$/g, ""); - } else if (line.startsWith("review_enabled:")) { - state.review_enabled = line.includes("true"); - } else if (line.startsWith("review_count:")) { - state.review_count = parseInt(line.split(":")[1].trim(), 10); - } else if (line.startsWith("max_review_cycles:")) { - state.max_review_cycles = parseInt(line.split(":")[1].trim(), 10); - } else if (line.startsWith("pending_feedback:")) { - const val = line.split(":").slice(1).join(":").trim(); - state.pending_feedback = val === "null" ? null : val.replace(/^["']|["']$/g, ""); - } else if (line.startsWith("debug:")) { - state.debug = line.includes("true"); - } else if (line.startsWith("review_history:")) { - const val = line.split(":").slice(1).join(":").trim(); - if (val && val !== "[]") { - try { - const parsed = JSON.parse(val); - // Normalize each entry to handle old schema versions - state.review_history = Array.isArray(parsed) - ? parsed.map((entry: unknown) => normalizeHistoryEntry(entry)) - : []; - } catch { - state.review_history = []; - } - } else { - state.review_history = []; - } - } +function callRl(args: string[], cwd: string): { ok: boolean; stdout: string } { + const rl = findRl(cwd); + const result = spawnSync(rl, args, { cwd, encoding: "utf-8", timeout: 10000 }); + if (result.status !== 0) { + crash(`${rl} ${args.join(" ")} failed: ${result.stderr || result.stdout}`); + return { ok: false, stdout: result.stdout || "" }; } + return { ok: true, stdout: result.stdout || "" }; +} - // Validate required fields - if ( - state.active === undefined || - state.iteration === undefined || - state.max_iterations === undefined || - !state.completion_promise || - !state.original_prompt - ) { +function rlStatusJson(cwd: string): Record | null { + const result = callRl(["status", "--json"], cwd); + if (!result.ok) return null; + try { + return JSON.parse(result.stdout) as Record; + } catch { + crash(`Failed to parse rl status output: ${result.stdout.slice(0, 200)}`); return null; } - - return { - active: state.active, - iteration: state.iteration, - max_iterations: state.max_iterations, - completion_promise: state.completion_promise, - original_prompt: state.original_prompt, - timestamp: state.timestamp || new Date().toISOString(), - review_enabled: state.review_enabled ?? true, - review_count: state.review_count ?? 0, - max_review_cycles: state.max_review_cycles ?? state.max_iterations, - pending_feedback: state.pending_feedback ?? null, - review_history: state.review_history ?? [], - debug: state.debug ?? false, - }; } -function serializeState(state: LoopState): string { - const promptIndented = state.original_prompt - .split("\n") - .map((line) => ` ${line}`) - .join("\n"); - - return `--- -active: ${state.active} -iteration: ${state.iteration} -max_iterations: ${state.max_iterations} -completion_promise: "${state.completion_promise}" -original_prompt: | -${promptIndented} -timestamp: "${state.timestamp}" -review_enabled: ${state.review_enabled} -review_count: ${state.review_count} -max_review_cycles: ${state.max_review_cycles} -pending_feedback: ${state.pending_feedback ? `"${state.pending_feedback.replace(/"/g, '\\"')}"` : "null"} -review_history: ${JSON.stringify(state.review_history)} -debug: ${state.debug} ---- -`; +function rlPrompt(cwd: string): string | null { + const result = callRl(["prompt", "--json"], cwd); + if (!result.ok) return null; + try { + const parsed = JSON.parse(result.stdout); + return typeof parsed === "string" ? parsed : null; + } catch { + return result.stdout.trim() || null; + } } // --- State File Cleanup --- -function cleanupStateFile(stateFilePath: string): void { +function cleanupStateFile(path: string): void { try { - if (existsSync(stateFilePath)) { - unlinkSync(stateFilePath); - crash(`State file deleted: ${stateFilePath}`); - debug(`[ralph-reviewed] Cleaned up state file: ${stateFilePath}`); + if (existsSync(path)) { + unlinkSync(path); + crash(`State file deleted: ${path}`); + debug(`[ralph-reviewed] Cleaned up state file: ${path}`); } } catch (e) { - crash(`Failed to delete state file: ${stateFilePath}`, e); - debug(`[ralph-reviewed] Failed to cleanup state file: ${e}`); + crash(`Failed to delete state file: ${path}`, e); } } -// --- Transcript Parsing --- - -function getLastAssistantMessage(transcriptPath: string): string | null { - if (!existsSync(transcriptPath)) return null; +// --- Last Reject Feedback --- +function getLastRejectFeedback(rlDir: string): string | null { try { - const content = readFileSync(transcriptPath, "utf-8"); - const lines = content.trim().split("\n").filter(Boolean); + const logFilePath = join(rlDir, "log.jsonl"); + if (!existsSync(logFilePath)) return null; + const content = readFileSync(logFilePath, "utf-8").trim(); + if (!content) return null; - // Find last assistant message (iterate backwards) + const lines = content.split("\n"); for (let i = lines.length - 1; i >= 0; i--) { try { - const entry: TranscriptEntry = JSON.parse(lines[i]); - - // Handle new format: { type: "assistant", message: { role, content } } - const role = entry.message?.role || entry.role; - const msgContent = entry.message?.content || entry.content; - - if (role === "assistant" && Array.isArray(msgContent)) { - const textParts = msgContent - .filter((c) => c.type === "text" && c.text) - .map((c) => c.text) - .join("\n"); - if (textParts) return textParts; - } - } catch { - continue; - } + const parsed = JSON.parse(lines[i]); + if (parsed.type !== "review") continue; + if (parsed.decision !== "reject") return null; + return typeof parsed.feedback === "string" ? parsed.feedback : null; + } catch { continue; } } - } catch { - return null; - } - + } catch { /* ignore */ } return null; } -// --- Codex Review --- +// --- Types --- -interface ReviewResult { - approved: boolean; - issues: ReviewIssue[]; - resolved: ResolvedIssue[]; - notes: string | null; +interface HookInput { + session_id: string; + transcript_path?: string; + cwd?: string; + permission_mode?: string; + hook_event_name: "Stop"; + stop_hook_active?: boolean; } -function formatIssuesForDisplay(issues: ReviewIssue[]): string { - return issues - .map((issue) => ` - [ISSUE-${issue.id}] ${issue.severity}: ${issue.description}`) - .join("\n"); +interface HookOutput { + decision?: "block"; + reason?: string; + systemMessage?: string; + continue?: boolean; + stopReason?: string; } -function formatResolvedForDisplay(resolved: ResolvedIssue[]): string { - return resolved - .map((r) => ` - [ISSUE-${r.id}] ✓ ${r.verification}`) - .join("\n"); +interface ReviewResult { + approved: boolean; + feedback: string; } -function buildReviewHistorySection(history: ReviewHistoryEntry[]): string { - if (history.length === 0) return ""; +// --- Codex Review --- - const sections = history.map((entry) => { - const parts: string[] = [`### Cycle ${entry.cycle}: ${entry.decision}`]; +function callCodexReview(reviewCount: number, cwd: string): ReviewResult { + crash(`callCodexReview() started - reviewCount=${reviewCount}, cwd=${cwd}`); - if (entry.resolved.length > 0) { - parts.push(`**Resolved:**\n${formatResolvedForDisplay(entry.resolved)}`); - } + const whichResult = spawnSync("which", ["codex"], { encoding: "utf-8" }); + if (whichResult.status !== 0) { + crash("Codex CLI not found, approving by default"); + return { approved: true, feedback: "" }; + } - if (entry.issues.length > 0) { - parts.push(`**Issues:**\n${formatIssuesForDisplay(entry.issues)}`); - } + const reviewPrompt = `# Code Review - if (entry.notes) { - parts.push(`**Notes:** ${entry.notes}`); - } +An agent worked on a task in an iterative loop and claims it's done. Review the work. - return parts.join("\n"); - }); +## Context - return `## Previous Reviews +The \`.rl/\` directory contains loop state and tools. Start here: +- \`.rl/rl prompt\` — read the original task assignment +- \`.rl/rl status\` — check loop state +- \`cat .rl/log.jsonl\` — read the event log (phases, commits, decisions the agent made) +- \`.rl/rl log decision "your review notes"\` — log your own findings -${sections.join("\n\n")} +## How to Review -`; -} +1. Read the task with \`.rl/rl prompt\` +2. Read the event log to understand what the agent did and why +3. Review the actual code — read changed files, check both committed and uncommitted work +4. If the task includes verification commands or tests, run them +5. Judge: does this implementation satisfy the original request? -function callCodexReview( - originalPrompt: string, - reviewHistory: ReviewHistoryEntry[], - reviewCount: number, - maxReviews: number, - cwd: string -): ReviewResult { - crash(`callCodexReview() started - reviewCount=${reviewCount}, cwd=${cwd}`); +Review the code, not the process. The agent may not have committed everything — that's fine. What matters is whether the work is correct and complete relative to the task. - // Check if codex is available - const whichResult = spawnSync("which", ["codex"], { encoding: "utf-8" }); - if (whichResult.status !== 0) { - crash("Codex CLI not found, approving by default"); - debug("Codex CLI not found, approving by default"); - return { approved: true, issues: [], resolved: [], notes: null }; - } - crash(`Codex found at: ${whichResult.stdout?.trim()}`); +\`.rl/\` is loop infrastructure. Do not flag it. - // Build review history section - const historySection = buildReviewHistorySection(reviewHistory); +## Verdict - // Build review prompt with formal issue format - const reviewPrompt = `# Code Review +End your response with exactly one of: +- \`APPROVE\` — the work satisfies the task +- \`REJECT\` — something is broken or a requirement is unmet -Review work completed by Claude in an iterative loop. Claude claims the task is complete. - -## Assignment -${originalPrompt} - -## Git Context -**Working Directory**: \`pwd\` -**Repository**: \`basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)"\` -**Branch**: \`git branch --show-current 2>/dev/null || echo "detached/unknown"\` -**Uncommitted changes**: \`git diff --stat 2>/dev/null || echo "None"\` -**Staged changes**: \`git diff --cached --stat 2>/dev/null || echo "None"\` -**Recent commits (last 4 hours)**: \`git log --oneline -5 --since="4 hours ago" 2>/dev/null || echo "None"\` - -${historySection}## Review Process -1. Understand the task (read referenced files as needed) -2. Review git changes (\`git diff\`, \`git diff --cached\`, \`git log\`, etc.) -3. Run verification commands from success criteria if applicable -4. Check ALL requirements - be thorough, not superficial - -## Output Format - -If approved: -\`\`\` -APPROVE -Optional notes for the record -\`\`\` - -If issues found: -\`\`\` -REJECT - -[ISSUE-1] How you verified this previous issue is now fixed - - -[ISSUE-1] severity: Description of the issue -[ISSUE-2] severity: Description of another issue - -Optional notes visible to future review cycles -\`\`\` - -- Severity levels: \`critical\` (blocking), \`major\` (significant), \`minor\` (nice to fix) -- Issue IDs must be unique across all cycles - continue numbering from previous reviews (don't restart at ISSUE-1) -- \`\` section: List any previous issues you verified as fixed (omit if none or first review) -- \`\` section: Optional, visible to future review cycles -- Be thorough - report ALL issues found - -Review ${reviewCount + 1}/${maxReviews}.`; - - // Use unique file paths based on timestamp to avoid collisions - const uniqueId = Date.now(); - const outputFile = `/tmp/codex-review-output-${uniqueId}.txt`; +If rejecting, explain what's wrong and what needs to change. Be specific and actionable. + +Review ${reviewCount + 1}.`; - crash(`Calling Codex with output file: ${outputFile}`); - crash(`Review prompt length: ${reviewPrompt.length} chars`); + const uniqueId = Date.now(); + const rlDirPath = stateFilePath ? join(stateFilePath, "..") : "/tmp"; + const outputFile = join(rlDirPath, `codex-review-${uniqueId}.txt`); try { - // Build args dynamically from user config const codexConfig = userConfig.codex || DEFAULT_CONFIG.codex!; - const codexArgs: string[] = [ - "exec", - "-", // read prompt from stdin - ]; + const codexArgs: string[] = ["exec", "-"]; - // Sandbox/approval settings (bypass_sandbox overrides both) if (codexConfig.bypass_sandbox) { codexArgs.push("--dangerously-bypass-approvals-and-sandbox"); } else { codexArgs.push("--sandbox", codexConfig.sandbox || "read-only"); - // Use -c config override style (exec doesn't have -a flag) codexArgs.push("-c", `approval_policy="${codexConfig.approval_policy || "never"}"`); } - // Output file (extra_args could override, but parsing would break) codexArgs.push("-o", outputFile); - // Extra user-provided args (validated as string array, appended last) - // Note: These can override earlier flags if user intends to customize behavior if (Array.isArray(codexConfig.extra_args)) { for (const arg of codexConfig.extra_args) { - if (typeof arg === "string") { - codexArgs.push(arg); - } + if (typeof arg === "string") codexArgs.push(arg); } } - // Convert timeout from seconds to milliseconds const timeoutMs = (codexConfig.timeout_seconds || 1200) * 1000; + crash(`Codex args: ${JSON.stringify(codexArgs)}, timeout: ${timeoutMs}ms`); - crash(`Codex config: ${JSON.stringify(codexConfig)}`); - crash(`Codex args: ${JSON.stringify(codexArgs)}`); - crash(`Codex timeout: ${timeoutMs}ms (${codexConfig.timeout_seconds || 1200}s)`); - - // NOTE: This timeout must be less than plugin.json hook timeout (1800s) const result = spawnSync("codex", codexArgs, { - cwd, - encoding: "utf-8", - timeout: timeoutMs, - maxBuffer: 16 * 1024 * 1024, - input: reviewPrompt, // pass prompt via stdin + cwd, encoding: "utf-8", timeout: timeoutMs, maxBuffer: 16 * 1024 * 1024, input: reviewPrompt, }); - crash(`Codex returned - status: ${result.status}, signal: ${result.signal}, error: ${result.error}`); - if (result.stderr) { - crash(`Codex stderr: ${result.stderr.slice(0, 500)}`); - } - debug(`[ralph-reviewed] Codex exit code: ${result.status}, stderr: ${result.stderr?.slice(0, 200)}`); + crash(`Codex returned - status: ${result.status}, signal: ${result.signal}`); + if (result.stderr) crash(`Codex stderr: ${result.stderr.slice(0, 500)}`); - // Read output from file and clean up - let output = ""; + let codexOutput = ""; if (existsSync(outputFile)) { - output = readFileSync(outputFile, "utf-8"); - crash(`Codex output file contents: ${output.slice(0, 500)}`); - debug(`[ralph-reviewed] Codex output: ${output.slice(0, 500)}`); - try { unlinkSync(outputFile); } catch { /* ignore cleanup errors */ } + codexOutput = readFileSync(outputFile, "utf-8"); + crash(`Codex output: ${codexOutput.slice(0, 500)}`); } else { crash("No Codex output file created"); - debug(`[ralph-reviewed] No output file created`); } - // Parse verdict from the END of output to avoid matching echoed examples - // Find the last ... tag in the output - const reviewMatches = [...output.matchAll(/\s*(APPROVE|REJECT)\s*<\/review>/gi)]; - const lastReviewMatch = reviewMatches.length > 0 ? reviewMatches[reviewMatches.length - 1] : null; - const verdict = lastReviewMatch ? lastReviewMatch[1].toUpperCase() : null; + const reviewMatches = [...codexOutput.matchAll(/\s*(APPROVE|REJECT)\s*<\/review>/gi)]; + const verdict = reviewMatches.length > 0 + ? reviewMatches[reviewMatches.length - 1][1].toUpperCase() + : null; - crash(`Verdict parsing: found ${reviewMatches.length} review tags, verdict=${verdict}`); - - // Parse notes (present in both APPROVE and REJECT) - also use last match - const notesMatches = [...output.matchAll(/([\s\S]*?)<\/notes>/gi)]; - const lastNotesMatch = notesMatches.length > 0 ? notesMatches[notesMatches.length - 1] : null; - const notes = lastNotesMatch ? lastNotesMatch[1].trim() : null; - - // Parse response based on extracted verdict - if (verdict === "APPROVE") { - crash("Codex approved"); - return { approved: true, issues: [], resolved: [], notes }; - } + crash(`Verdict: ${verdict}`); + if (verdict === "APPROVE") return { approved: true, feedback: codexOutput }; if (verdict === "REJECT") { - // Parse issues - use last block - const issues: ReviewIssue[] = []; - const issuesMatches = [...output.matchAll(/([\s\S]*?)<\/issues>/gi)]; - const lastIssuesMatch = issuesMatches.length > 0 ? issuesMatches[issuesMatches.length - 1] : null; - if (lastIssuesMatch) { - // Use [\s\S]+? for multi-line descriptions, terminated by next issue or end - const issuePattern = /\[ISSUE-(\d+)\]\s*(critical|major|minor):\s*([\s\S]+?)(?=\[ISSUE-|\s*$)/gi; - let match; - while ((match = issuePattern.exec(lastIssuesMatch[1])) !== null) { - issues.push({ - id: parseInt(match[1], 10), - severity: match[2].toLowerCase() as "critical" | "major" | "minor", - description: match[3].trim(), - }); - } - } - - // Parse resolved - use last block - const resolved: ResolvedIssue[] = []; - const resolvedMatches = [...output.matchAll(/([\s\S]*?)<\/resolved>/gi)]; - const lastResolvedMatch = resolvedMatches.length > 0 ? resolvedMatches[resolvedMatches.length - 1] : null; - if (lastResolvedMatch) { - // Use [\s\S]+? for multi-line verifications - const resolvedPattern = /\[ISSUE-(\d+)\]\s*([\s\S]+?)(?=\[ISSUE-|\s*$)/gi; - let match; - while ((match = resolvedPattern.exec(lastResolvedMatch[1])) !== null) { - resolved.push({ - id: parseInt(match[1], 10), - verification: match[2].trim(), - }); - } - } - - // Handle REJECT with no parsed issues - auto-approve with warning to avoid deadlock - if (issues.length === 0) { - crash("REJECT verdict but no issues parsed - auto-approving with warning to avoid deadlock"); - debug("[ralph-reviewed] WARNING: Codex rejected but no issues could be parsed. Auto-approving to avoid deadlock."); - return { approved: true, issues: [], resolved: [], notes: notes ? `[AUTO-APPROVED: REJECT with unparseable issues] ${notes}` : "[AUTO-APPROVED: REJECT with unparseable issues]" }; - } - - crash(`Codex rejected with ${issues.length} issues, ${resolved.length} resolved`); - return { approved: false, issues, resolved, notes }; + const lastTag = codexOutput.lastIndexOf(""); + const feedback = lastTag >= 0 ? codexOutput.slice(0, lastTag).trim() : codexOutput.trim(); + return { approved: false, feedback }; } - // Unclear response - default to approve - crash("Unclear Codex response (no APPROVE/REJECT verdict found), approving by default"); - debug("Unclear Codex response, approving by default"); - return { approved: true, issues: [], resolved: [], notes: null }; + crash("No APPROVE/REJECT found, approving by default"); + return { approved: true, feedback: codexOutput }; } catch (e) { - crash("Codex review call threw exception", e); - debug(`Codex review failed: ${e}, approving by default`); - return { approved: true, issues: [], resolved: [], notes: null }; + crash("Codex review failed", e); + return { approved: true, feedback: "" }; } } @@ -839,29 +439,12 @@ async function readStdin(): Promise { resolved = true; cleanup(); resolve(data); - } catch { - // keep reading - } + } catch { /* keep reading */ } }; - const onData = (chunk: string | Buffer) => { - data += chunk.toString(); - tryResolve(); - }; - - const onEnd = () => { - if (resolved) return; - resolved = true; - cleanup(); - resolve(data); - }; - - const onError = (err: Error) => { - if (resolved) return; - resolved = true; - cleanup(); - reject(err); - }; + const onData = (chunk: string | Buffer) => { data += chunk.toString(); tryResolve(); }; + const onEnd = () => { if (resolved) return; resolved = true; cleanup(); resolve(data); }; + const onError = (err: Error) => { if (resolved) return; resolved = true; cleanup(); reject(err); }; const timer = setTimeout(() => { if (resolved) return; @@ -905,128 +488,100 @@ async function main() { input = JSON.parse(trimmed); } catch (parseErr) { crash("Failed to parse input JSON", parseErr); - crash(`Raw input was: ${trimmed.slice(0, 500)}`); throw parseErr; } } - // Switch to session-specific crash log immediately setSessionId(input.session_id || "unknown"); const cwd = input.cwd || process.env.CLAUDE_PROJECT_DIR || process.cwd(); - crash(`Input parsed: session_id=${input.session_id}, cwd=${cwd}, event=${input.hook_event_name}`); + crash(`Input parsed: session_id=${input.session_id}, cwd=${cwd}`); - // Use git repo root for state file to handle directory changes within repo const gitRoot = getGitRoot(cwd); stateFilePath = getStateFilePath(cwd); - - // Set session-specific file paths debugLogPath = `${sessionLogDir}/debug.log`; - crash(`State file: ${stateFilePath}, Git root: ${gitRoot || "none"}, cwd: ${cwd}, logs: ${sessionLogDir}`); + crash(`State file: ${stateFilePath}, Git root: ${gitRoot || "none"}, cwd: ${cwd}`); - // Check for active loop + // Fast gate: no state file means no active loop if (!existsSync(stateFilePath)) { crash("No state file found, approving exit"); output({}); return; } - crash("State file exists, reading..."); - // Parse state - const stateContent = readFileSync(stateFilePath, "utf-8"); - const state = parseStateFile(stateContent); + // Read full state via rl CLI (single call, cached for the hook invocation) + const rlCwd = gitRoot || cwd; + const state = rlStatusJson(rlCwd); if (!state) { - // Corrupt state file - clean up and exit - crash("Failed to parse state file, cleaning up"); + crash("Failed to read state via rl, cleaning up"); cleanupStateFile(stateFilePath); output({}); return; } if (!state.active) { - // Loop was deactivated - clean up stale file and exit crash("Loop inactive, cleaning up stale state file"); cleanupStateFile(stateFilePath); output({}); return; } - // Enable debug if set in state if (state.debug) { debugEnabled = true; debug(`[ralph-reviewed] Debug enabled via state file`); } - // Get last assistant message - const transcriptPath = input.transcript_path || ""; - const lastMessage = transcriptPath ? getLastAssistantMessage(transcriptPath) : null; - - // Debug logging - debug(`[ralph-reviewed] Iteration: ${state.iteration}, Transcript: ${transcriptPath || "none"}`); - debug(`[ralph-reviewed] Last message (truncated): ${lastMessage?.slice(-200) || "null"}`); - - // Check for completion promise - const promisePattern = new RegExp( - `\\s*${state.completion_promise}\\s*`, - "i" - ); - const completionClaimed = lastMessage && promisePattern.test(lastMessage); - debug(`[ralph-reviewed] Promise pattern: ${promisePattern}, Claimed: ${completionClaimed}`); + const iteration = state.iteration as number; + const maxIterations = state.max_iterations as number; + const reviewEnabled = state.review_enabled as boolean; + const reviewCount = state.review_count as number; + const maxReviewCycles = state.max_review_cycles as number; + const completionClaimed = state.completion_claimed === true; + const blockedClaimed = state.blocked_claimed === true; - // Check for BLOCKED signal (special termination without review) - const blockedPattern = /\s*BLOCKED\s*<\/promise>/i; - const blockedClaimed = lastMessage && blockedPattern.test(lastMessage); - debug(`[ralph-reviewed] Blocked pattern check: ${blockedClaimed}`); + debug(`[ralph-reviewed] Iteration: ${iteration}, done: ${completionClaimed}, blocked: ${blockedClaimed}`); if (blockedClaimed) { - // BLOCKED is a special termination signal - exit without Codex review crash("BLOCKED claimed - terminating loop without review"); - debug(`[ralph-reviewed] BLOCKED signal received. Terminating loop without review.`); cleanupStateFile(stateFilePath); output({ - systemMessage: `# Ralph Loop: BLOCKED - -**Iteration:** ${state.iteration}/${state.max_iterations} - -Task reported as blocked. Loop terminated without review.` + systemMessage: `# Ralph Loop: BLOCKED\n\n**Iteration:** ${iteration}\n\nTask reported as blocked. Loop terminated without review.` }); return; } if (!completionClaimed) { // Normal iteration - no completion claimed - state.iteration++; + const nextIteration = iteration + 1; - // Check max iterations - if (state.iteration >= state.max_iterations) { - // Max iterations reached - allow exit - debug(`[ralph-reviewed] Max iterations (${state.max_iterations}) reached, exiting loop`); + if (nextIteration >= maxIterations) { + debug(`[ralph-reviewed] Max iterations (${maxIterations}) reached, exiting loop`); cleanupStateFile(stateFilePath); - output({ - systemMessage: `# Ralph Loop: Max Iterations Reached - -**Iteration:** ${state.iteration}/${state.max_iterations} - -Loop ended without completion claim. Review the work and consider restarting if needed.` + output({ + systemMessage: `# Ralph Loop: Max Iterations Reached\n\n**Iteration:** ${nextIteration}\n\nLoop ended without completion claim. Review the work and consider restarting if needed.` }); return; } - // Update state file - writeFileSync(stateFilePath, serializeState(state)); + // Update iteration via rl + callRl(["state", "set", "iteration", String(nextIteration)], rlCwd); + + // Clear any stale completion/blocked flags + callRl(["state", "set", "completion_claimed", "false"], rlCwd); + callRl(["state", "set", "blocked_claimed", "false"], rlCwd); // Build continuation prompt - let prompt = `# Ralph Loop - Iteration ${state.iteration}/${state.max_iterations}\n\n`; + const originalPrompt = rlPrompt(rlCwd) || "(no prompt found)"; + let prompt = `# Ralph Loop \u2014 Iteration ${nextIteration}\n\n`; - if (state.pending_feedback) { - prompt += `## Review Feedback from Previous Attempt\n\n${state.pending_feedback}\n\nAddress the above feedback.\n\n---\n\n`; - // Clear pending feedback after injecting - state.pending_feedback = null; - writeFileSync(stateFilePath, serializeState(state)); + const rlDir = join(rlCwd, ".rl"); + const pendingFeedback = getLastRejectFeedback(rlDir); + if (pendingFeedback) { + prompt += `## Review Feedback from Previous Attempt\n\n${pendingFeedback}\n\nAddress the above feedback.\n\n---\n\n`; } - prompt += state.original_prompt; - prompt += `\n\nWhen complete, output: ${state.completion_promise}`; + prompt += originalPrompt; + prompt += `\n\nWhen complete, run: .rl/rl done`; output({ decision: "block", reason: prompt }); return; @@ -1035,150 +590,78 @@ Loop ended without completion claim. Review the work and consider restarting if // Completion claimed - enter review gate debug(`[ralph-reviewed] Completion claimed! Entering review gate...`); - if (!state.review_enabled) { - // Reviews disabled - allow exit + if (!reviewEnabled) { debug(`[ralph-reviewed] Reviews disabled, approving exit`); cleanupStateFile(stateFilePath); output({}); return; } - // Require git repository - Codex needs a trusted directory if (!gitRoot) { crash("Not in a git repository - BLOCKING (Codex requires git repo)"); output({ decision: "block", - reason: `# Review Gate Error: Not a Git Repository - -Codex requires a git repository to run. The current directory is not inside a git repo. - -**Current directory:** \`${cwd}\` - -**To fix:** Initialize a git repository with \`git init\`, or move the project into an existing git repo. - -**To escape this loop:** Run \`/ralph-reviewed:cancel-ralph\` to remove the loop, then exit normally.` + reason: `# Review Gate Error: Not a Git Repository\n\nCodex requires a git repository to run.\n\n**Current directory:** \`${cwd}\`\n\n**To fix:** Initialize a git repository with \`git init\`.\n\n**To escape this loop:** Run \`/ralph-reviewed:cancel-ralph\` to remove the loop, then exit normally.` }); return; } - // Perform Codex review debug(`[ralph-reviewed] Calling Codex for review...`); - const reviewResult = callCodexReview( - state.original_prompt, - state.review_history, - state.review_count, - state.max_review_cycles, - cwd - ); - - debug(`[ralph-reviewed] Review result: approved=${reviewResult.approved}, issues=${reviewResult.issues.length}`); - - // Record this review in history - const historyEntry: ReviewHistoryEntry = { - cycle: state.review_count + 1, - decision: reviewResult.approved ? "APPROVE" : "REJECT", - issues: reviewResult.issues, - resolved: reviewResult.resolved, - notes: reviewResult.notes, - }; - state.review_history.push(historyEntry); + const reviewResult = callCodexReview(reviewCount, gitRoot); + + debug(`[ralph-reviewed] Review result: approved=${reviewResult.approved}`); + + // Log review via rl + callRl(["log", "review", "--decision", reviewResult.approved ? "approve" : "reject", "--feedback", reviewResult.feedback], rlCwd); if (reviewResult.approved) { - // Approved - allow exit debug(`[ralph-reviewed] Codex approved! Exiting loop.`); cleanupStateFile(stateFilePath); - - // Build approval summary for user visibility - const notesLine = reviewResult.notes ? `\n**Reviewer notes:** ${reviewResult.notes}` : ""; - const approvalMessage = `# Ralph Loop: Codex APPROVED - -**Iteration:** ${state.iteration}/${state.max_iterations} -**Review cycle:** ${state.review_count + 1}/${state.max_review_cycles}${notesLine} - -The review gate has been cleared. Task completed successfully.`; - - output({ systemMessage: approvalMessage }); + output({ + systemMessage: `# Ralph Loop: Codex APPROVED\n\n**Iteration:** ${iteration} | **Review cycle:** ${reviewCount + 1}\n\nReview gate cleared.` + }); return; } - // Rejected - check review count - state.review_count++; + // Rejected + const newReviewCount = reviewCount + 1; - if (state.review_count >= state.max_review_cycles) { - // Max reviews reached - allow exit with warning - debug( - `[ralph-reviewed] Max review cycles (${state.max_review_cycles}) reached. Issues: ${reviewResult.issues.length}` - ); + if (newReviewCount >= maxReviewCycles) { + debug(`[ralph-reviewed] Max review cycles (${maxReviewCycles}) reached.`); cleanupStateFile(stateFilePath); - - // Build summary with remaining issues - const remainingIssues = reviewResult.issues.length > 0 - ? reviewResult.issues.map(i => `- [ISSUE-${i.id}] ${i.severity}: ${i.description}`).join("\n") - : "(no issues parsed)"; - output({ - systemMessage: `# Ralph Loop: Max Review Cycles Reached - -**Iteration:** ${state.iteration}/${state.max_iterations} -**Review cycle:** ${state.review_count}/${state.max_review_cycles} - -**Unresolved issues:** -${remainingIssues} - -Loop ended without Codex approval. Review remaining issues manually.` + systemMessage: `# Ralph Loop: Max Review Cycles Reached\n\n**Iteration:** ${iteration} | **Review cycle:** ${newReviewCount}\n\nLoop ended without approval. Review feedback manually.` }); return; } - // Format issues for Claude's feedback - const issuesList = reviewResult.issues - .map((issue) => `- [ISSUE-${issue.id}] ${issue.severity}: ${issue.description}`) - .join("\n"); + const nextIteration = iteration + 1; + callRl(["state", "set", "iteration", String(nextIteration)], rlCwd); + callRl(["state", "set", "review_count", String(newReviewCount)], rlCwd); + callRl(["state", "set", "completion_claimed", "false"], rlCwd); - const resolvedList = reviewResult.resolved.length > 0 - ? `\n\n**Resolved from previous cycle:**\n${reviewResult.resolved.map((r) => `- [ISSUE-${r.id}] ✓ ${r.verification}`).join("\n")}` - : ""; + const reviewPromptText = rlPrompt(rlCwd) || "(no prompt found)"; + const feedbackPrompt = `# Ralph Loop \u2014 Iteration ${nextIteration} - const notesSection = reviewResult.notes - ? `\n\n**Reviewer notes:** ${reviewResult.notes}` - : ""; - - // Store formatted feedback for state - state.pending_feedback = issuesList; - state.iteration++; // Increment iteration for the feedback round - writeFileSync(stateFilePath, serializeState(state)); - - // Build prompt with structured feedback - const feedbackPrompt = `# Ralph Loop - Iteration ${state.iteration}/${state.max_iterations} - -## Review Feedback (Cycle ${state.review_count}/${state.max_review_cycles}) +## Review Feedback (Cycle ${newReviewCount}) Your previous completion was reviewed and requires changes. -${resolvedList} -**Open Issues:** -${issuesList} -${notesSection} +${reviewResult.feedback} -Address ALL open issues above, then output ${state.completion_promise} when truly complete. +Address ALL open issues above, then run \`.rl/rl done\` when truly complete. --- -${state.original_prompt}`; +${reviewPromptText}`; output({ decision: "block", reason: feedbackPrompt }); } catch (e) { crash("main() caught exception", e); - debug(`Stop hook error: ${e}`); - // Clean up state file to avoid re-triggering loop if (stateFilePath) { - try { - unlinkSync(stateFilePath); - crash(`Cleaned up state file on main() exception: ${stateFilePath}`); - } catch { /* ignore cleanup errors */ } + try { unlinkSync(stateFilePath); crash(`Cleaned up state file on main() exception: ${stateFilePath}`); } catch { /* ignore */ } } - // On error, allow exit to avoid trapping user output({}); } crash("main() exiting normally"); @@ -1187,12 +670,8 @@ ${state.original_prompt}`; crash("About to call main()"); main().catch((e) => { crash("main() promise rejected", e); - // Clean up state file to avoid re-triggering loop if (stateFilePath) { - try { - unlinkSync(stateFilePath); - crash(`Cleaned up state file on main() rejection: ${stateFilePath}`); - } catch { /* ignore cleanup errors */ } + try { unlinkSync(stateFilePath); crash(`Cleaned up state file on main() rejection: ${stateFilePath}`); } catch { /* ignore */ } } console.log(JSON.stringify({})); process.exit(1); diff --git a/plugins/ralph-reviewed/qa.sh b/plugins/ralph-reviewed/qa.sh new file mode 100755 index 0000000..898eb7a --- /dev/null +++ b/plugins/ralph-reviewed/qa.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Reset test environment and run QA suite for ralph-reviewed plugin +set -euo pipefail + +PLUGIN_DIR="$(cd "$(dirname "$0")" && pwd)" +TEST_DIR="/tmp/ralph-test" + +# Reset test repo +rm -rf "$TEST_DIR" +mkdir -p "$TEST_DIR" +cd "$TEST_DIR" +git init -q +echo '# test' > README.md +git add -A && git commit -q -m "init" + +cat > math.ts <<'EOF' +// TODO: implement add, subtract, multiply, divide +export function add(a: number, b: number): number { + return 0; // broken +} + +export function divide(a: number, b: number): number { + return a / b; // no zero check +} +EOF +git add -A && git commit -q -m "add broken math module" + +echo "test repo ready at $TEST_DIR" + +# Start claude with QA prompt +exec claude --plugin-dir "$PLUGIN_DIR" "Read $PLUGIN_DIR/QA.md and execute the full QA suite. Report any bugs, edge cases, or unexpected behavior." diff --git a/scripts/check-plugin-versions.sh b/scripts/check-plugin-versions.sh new file mode 100755 index 0000000..16d6095 --- /dev/null +++ b/scripts/check-plugin-versions.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# check-plugin-versions.sh — Verify plugin.json and marketplace.json versions are in sync. +# Optionally accepts a file list (from lefthook) to warn about missing version bumps. +# +# Usage: +# check-plugin-versions.sh [changed-file ...] +# +# Exit codes: +# 0 All versions consistent +# 1 Version mismatch or missing version bump detected + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CLAUDE_CODE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +MARKETPLACE="$CLAUDE_CODE_DIR/.claude-plugin/marketplace.json" + +if [[ ! -f "$MARKETPLACE" ]]; then + echo "error: marketplace.json not found at $MARKETPLACE" >&2 + exit 1 +fi + +errors=0 + +# --- Check 1: Version consistency between marketplace.json and each plugin.json --- + +plugin_count=$(jq '.plugins | length' "$MARKETPLACE") + +for ((i = 0; i < plugin_count; i++)); do + name=$(jq -r ".plugins[$i].name" "$MARKETPLACE") + source=$(jq -r ".plugins[$i].source" "$MARKETPLACE") + marketplace_version=$(jq -r ".plugins[$i].version" "$MARKETPLACE") + + plugin_json="$CLAUDE_CODE_DIR/$source/.claude-plugin/plugin.json" + + if [[ ! -f "$plugin_json" ]]; then + echo "error: plugin.json not found for '$name' at $plugin_json" >&2 + errors=$((errors + 1)) + continue + fi + + plugin_version=$(jq -r '.version' "$plugin_json") + + if [[ "$marketplace_version" != "$plugin_version" ]]; then + echo "error: version mismatch for '$name'" >&2 + echo " marketplace.json: $marketplace_version" >&2 + echo " plugin.json: $plugin_version" >&2 + errors=$((errors + 1)) + fi +done + +# --- Check 2: Changed plugin source files without a plugin.json bump --- + +if [[ $# -gt 0 ]]; then + for ((i = 0; i < plugin_count; i++)); do + name=$(jq -r ".plugins[$i].name" "$MARKETPLACE") + source=$(jq -r ".plugins[$i].source" "$MARKETPLACE") + source_prefix="${source#./}" + + has_source_change=false + has_plugin_json_change=false + + for file in "$@"; do + if [[ "$file" == *"$source_prefix"* ]]; then + has_source_change=true + if [[ "$file" == *"$source_prefix/.claude-plugin/plugin.json" ]]; then + has_plugin_json_change=true + fi + fi + done + + if $has_source_change && ! $has_plugin_json_change; then + echo "warning: plugin '$name' has source changes but plugin.json version was not bumped" >&2 + errors=$((errors + 1)) + fi + done +fi + +if [[ $errors -gt 0 ]]; then + echo "" >&2 + echo "Plugin version check failed. Update versions in both:" >&2 + echo " 1. claude-code/plugins//.claude-plugin/plugin.json" >&2 + echo " 2. claude-code/.claude-plugin/marketplace.json" >&2 + echo "Then run: claude plugin marketplace update 0xbigboss-plugins" >&2 + exit 1 +fi diff --git a/settings/settings.json b/settings/settings.json index 671f50b..7819939 100644 --- a/settings/settings.json +++ b/settings/settings.json @@ -6,7 +6,8 @@ }, "permissions": { "allow": [ - "Edit(~/.claude/handoffs/**)", + "Edit(~/.handoffs/**)", + "Write(~/.handoffs/**)", "Skill(typescript-best-practices)", "Skill(react-best-practices)", @@ -90,10 +91,33 @@ "Bash(wc:*)" ], "additionalDirectories": [ - "~/.claude/handoffs" + "~/.handoffs" + ] + }, + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "sid=$(jq -r '.session_id // empty' 2>/dev/null); [ -n \"$sid\" ] && date '+%H:%M' > ~/.claude/.idle-since-\"$sid\"; find ~/.claude -maxdepth 1 -name '.idle-since-*' -mtime +7 -delete 2>/dev/null; echo '{}'", + "timeout": 5 + } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "sid=$(jq -r '.session_id // empty' 2>/dev/null); [ -n \"$sid\" ] && rm -f ~/.claude/.idle-since-\"$sid\"; echo '{}'", + "timeout": 5 + } + ] + } ] }, - "hooks": {}, "statusLine": { "type": "command", "command": "statusline" @@ -113,6 +137,9 @@ "silo@0xBigBoss-silo": true, "send-infra@send-infra-plugins": true }, + "env": { + "BASH_ENV": "/Users/allen/code/dotfiles/claude-code/hooks/direnv-bash-env" + }, "alwaysThinkingEnabled": true, "feedbackSurveyState": { "lastShownTime": 1754076652911 diff --git a/statusline/CLAUDE.md b/statusline/CLAUDE.md index b04241d..e36816e 100644 --- a/statusline/CLAUDE.md +++ b/statusline/CLAUDE.md @@ -30,7 +30,7 @@ zig build -Doptimize=ReleaseFast - Git metadata is read from the current workspace directory. - Review gate state files are read from: - - `{git_root}/.claude/ralph-loop.local.md` + - `{git_root}/.rl/state.json` - `{git_root}/.claude/codex-review.local.md` ## Guardrails diff --git a/statusline/src/main.zig b/statusline/src/main.zig index fac8496..0280d92 100644 --- a/statusline/src/main.zig +++ b/statusline/src/main.zig @@ -534,6 +534,31 @@ fn formatLinesChanged(input: StatuslineInput, writer: anytype) !bool { return true; } +/// Read idle-since file for this session and write the indicator directly. +/// Reads and formats in one call to avoid returning a dangling stack slice. +/// Returns true if indicator was written, false if not idle or file missing. +fn formatIdleSince(writer: anytype, session_id: ?[]const u8) !bool { + const sid = session_id orelse return false; + if (sid.len == 0) return false; + const home = std.posix.getenv("HOME") orelse return false; + var path_buf: [512]u8 = undefined; + const path = std.fmt.bufPrint(&path_buf, "{s}/.claude/.idle-since-{s}", .{ home, sid }) catch return false; + + const file = std.fs.cwd().openFile(path, .{}) catch return false; + defer file.close(); + + // File contains a short time string like "14:45\n" + var buf: [32]u8 = undefined; + const bytes_read = file.read(&buf) catch return false; + if (bytes_read == 0) return false; + + const trimmed = std.mem.trim(u8, buf[0..bytes_read], " \t\n\r"); + if (trimmed.len == 0) return false; + + try writer.print(" 💤{s}{s}{s}", .{ colors.light_gray, trimmed, colors.reset }); + return true; +} + /// Get the last segment of a path (e.g., "/foo/bar/baz" -> "baz") fn getLastPathSegment(path: []const u8) []const u8 { if (path.len == 0) return path; @@ -774,56 +799,48 @@ fn parseYamlInt(line: []const u8, key: []const u8) ?u32 { return std.fmt.parseInt(u32, value, 10) catch null; } -/// Parse Ralph state from file content string (YAML frontmatter) +/// Parse Ralph state from JSON content string /// Exposed for testing; returns default RalphState if parsing fails -/// Note: Only reads fields at the top of frontmatter; large fields like -/// review_history are ignored, so we don't need the full file content. fn parseRalphStateFromContent(content: []const u8) RalphState { var state = RalphState{}; - // Must start with --- - if (!std.mem.startsWith(u8, content, "---")) return state; - const after_first = content[3..]; - // Skip newline after first --- - const start_idx: usize = if (after_first.len > 0 and after_first[0] == '\n') 1 else 0; + // Use std.json to parse the JSON state file + const JsonState = struct { + active: ?bool = null, + iteration: ?u32 = null, + max_iterations: ?u32 = null, + review_enabled: ?bool = null, + review_count: ?u32 = null, + max_review_cycles: ?u32 = null, + }; - // Find closing --- if present, otherwise parse what we have - // (state files can be large due to review_history, but our fields are at the top) - const frontmatter = if (std.mem.indexOf(u8, after_first[start_idx..], "\n---")) |end_idx| - after_first[start_idx..][0..end_idx] - else - after_first[start_idx..]; + const parsed = std.json.parseFromSlice(JsonState, std.heap.page_allocator, content, .{ + .ignore_unknown_fields = true, + }) catch return state; + defer parsed.deinit(); - // Parse lines until we hit closing delimiter or exhaust content - var lines = std.mem.splitScalar(u8, frontmatter, '\n'); - while (lines.next()) |line| { - const trimmed = std.mem.trim(u8, line, " \t\r"); - // Stop if we hit the closing delimiter - if (std.mem.eql(u8, trimmed, "---")) break; - if (parseYamlBool(trimmed, "active:")) |v| state.active = v; - if (parseYamlInt(trimmed, "iteration:")) |v| state.iteration = v; - if (parseYamlInt(trimmed, "max_iterations:")) |v| state.max_iterations = v; - if (parseYamlBool(trimmed, "review_enabled:")) |v| state.review_enabled = v; - if (parseYamlInt(trimmed, "review_count:")) |v| state.review_count = v; - if (parseYamlInt(trimmed, "max_review_cycles:")) |v| state.max_review_cycles = v; - } + const v = parsed.value; + if (v.active) |a| state.active = a; + if (v.iteration) |i| state.iteration = i; + if (v.max_iterations) |m| state.max_iterations = m; + if (v.review_enabled) |r| state.review_enabled = r; + if (v.review_count) |r| state.review_count = r; + if (v.max_review_cycles) |m| state.max_review_cycles = m; return state; } /// Parse Ralph loop state from state file at git root fn parseRalphState(allocator: Allocator, git_root: []const u8) RalphState { - // Construct path: {git_root}/.claude/ralph-loop.local.md - const path = std.fmt.allocPrint(allocator, "{s}/.claude/ralph-loop.local.md", .{git_root}) catch return RalphState{}; + // Construct path: {git_root}/.rl/state.json + const path = std.fmt.allocPrint(allocator, "{s}/.rl/state.json", .{git_root}) catch return RalphState{}; defer allocator.free(path); - // Read only first 2KB - our fields (active, iteration, etc.) are at the top - // review_history can grow to 8KB+ but comes after our fields - // Using fixed buffer avoids allocation and handles any file size + // Read first 4KB - JSON state file should be well under this const file = std.fs.cwd().openFile(path, .{}) catch return RalphState{}; defer file.close(); - var buf: [2048]u8 = undefined; + var buf: [4096]u8 = undefined; const bytes_read = file.read(&buf) catch return RalphState{}; if (bytes_read == 0) return RalphState{}; @@ -1018,6 +1035,13 @@ pub fn main() !void { } } + // zmx session indicator + if (std.posix.getenv("ZMX_SESSION")) |zmx_session| { + if (zmx_session.len > 0) { + try writer.print(" {s}zmx:{s}{s}", .{ colors.gray, zmx_session, colors.reset }); + } + } + // Add model display with gauge if (input.model) |model| { if (model.display_name) |name| { @@ -1079,6 +1103,9 @@ pub fn main() !void { } } + // Idle-since indicator (visible only when agent is waiting for input) + _ = try formatIdleSince(writer, input.session_id); + // Output the complete statusline at once const output = output_stream.getWritten(); @@ -1752,17 +1779,9 @@ test "parseYamlInt function" { try std.testing.expect(parseYamlInt("other: 50", "iteration:") == null); } -test "parseRalphStateFromContent with valid frontmatter" { +test "parseRalphStateFromContent with valid JSON" { const content = - \\--- - \\active: true - \\iteration: 5 - \\max_iterations: 30 - \\review_enabled: true - \\review_count: 2 - \\max_review_cycles: 10 - \\--- - \\# Some markdown content + \\{"active":true,"iteration":5,"max_iterations":30,"review_enabled":true,"review_count":2,"max_review_cycles":10} ; const state = parseRalphStateFromContent(content); @@ -1776,10 +1795,7 @@ test "parseRalphStateFromContent with valid frontmatter" { test "parseRalphStateFromContent with partial fields" { const content = - \\--- - \\active: true - \\iteration: 3 - \\--- + \\{"active":true,"iteration":3} ; const state = parseRalphStateFromContent(content); @@ -1792,8 +1808,8 @@ test "parseRalphStateFromContent with partial fields" { try std.testing.expectEqual(@as(u32, 10), state.max_review_cycles); } -test "parseRalphStateFromContent with no frontmatter" { - const content = "# Just markdown, no frontmatter"; +test "parseRalphStateFromContent with invalid JSON" { + const content = "# Just markdown, not JSON"; const state = parseRalphStateFromContent(content); // Should return defaults @@ -1801,22 +1817,6 @@ test "parseRalphStateFromContent with no frontmatter" { try std.testing.expectEqual(@as(u32, 0), state.iteration); } -test "parseRalphStateFromContent with unclosed frontmatter" { - // Now we parse what we have even without closing delimiter - // (supports truncated reads of large state files) - const content = - \\--- - \\active: true - \\iteration: 5 - \\# Missing closing delimiter - ; - - const state = parseRalphStateFromContent(content); - // Should parse available fields even without closing --- - try std.testing.expect(state.active); - try std.testing.expectEqual(@as(u32, 5), state.iteration); -} - test "parseRalphStateFromContent with empty content" { const state = parseRalphStateFromContent(""); try std.testing.expect(!state.active); @@ -1824,13 +1824,7 @@ test "parseRalphStateFromContent with empty content" { test "parseRalphStateFromContent with extra fields ignored" { const content = - \\--- - \\active: true - \\iteration: 7 - \\unknown_field: some_value - \\completion_promise: "COMPLETE" - \\timestamp: "2025-01-01T00:00:00Z" - \\--- + \\{"active":true,"iteration":7,"unknown_field":"some_value","completion_promise":"COMPLETE","timestamp":"2025-01-01T00:00:00Z"} ; const state = parseRalphStateFromContent(content); @@ -1954,3 +1948,28 @@ test "parseCodexReviewStateFromContent with empty content" { const state = parseCodexReviewStateFromContent(""); try std.testing.expect(!state.active); } + +test "formatIdleSince returns false without session_id" { + var buf: [128]u8 = undefined; + var stream = std.io.fixedBufferStream(&buf); + const writer = stream.writer(); + + const result_null = try formatIdleSince(writer, null); + try std.testing.expect(!result_null); + try std.testing.expectEqual(@as(usize, 0), stream.getWritten().len); + + const result_empty = try formatIdleSince(writer, ""); + try std.testing.expect(!result_empty); + try std.testing.expectEqual(@as(usize, 0), stream.getWritten().len); +} + +test "formatIdleSince returns false for missing file" { + var buf: [128]u8 = undefined; + var stream = std.io.fixedBufferStream(&buf); + const writer = stream.writer(); + + // Nonexistent session ID -> file won't exist -> returns false + const result = try formatIdleSince(writer, "nonexistent-session-id-12345"); + try std.testing.expect(!result); + try std.testing.expectEqual(@as(usize, 0), stream.getWritten().len); +}