diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 0000000..aeeff1a --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,79 @@ +name: πŸ“ˆ Coverage gate + +# Batch P1-B β€” enforces the >= 80 % coverage threshold declared in +# pyproject.toml [tool.coverage.report]. Runs on every PR + push to main. + +on: + push: + branches: [main] + pull_request: + workflow_dispatch: + +permissions: + contents: read + pull-requests: write + +concurrency: + group: coverage-${{ github.ref }} + cancel-in-progress: true + +jobs: + coverage: + name: πŸ§ͺ Pytest + coverage + runs-on: ubuntu-latest + timeout-minutes: 8 + + steps: + - name: πŸ“₯ Checkout code + uses: actions/checkout@v4 + + - name: 🐍 Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + cache-dependency-glob: | + pyproject.toml + uv.lock + + - name: 🐍 Set up Python 3.11 + run: uv python install 3.11 + + - name: πŸ“¦ Install project + dev extras + run: | + uv venv + uv pip install -e ".[dev]" + + - name: ⚑ Warm bytecode cache + run: uv run python -m compileall -q gitpilot tests || true + + - name: πŸ”Ž Strict type-check (Batch P1-C gated modules) + run: uv run mypy --config-file mypy.ini + + - name: πŸ§ͺ Run tests with coverage + env: + GITPILOT_LITE_MODE: "0" + PYTHONWARNINGS: "ignore::RuntimeWarning" + run: | + TMP_CFG="$(mktemp -d)" + GITPILOT_CONFIG_DIR="$TMP_CFG" uv run pytest \ + --cov \ + --cov-report=term-missing \ + --cov-report=xml \ + --cov-report=html + rm -rf "$TMP_CFG" + + - name: πŸ“€ Upload coverage HTML artefact + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-html + path: htmlcov/ + retention-days: 7 + + - name: πŸ“€ Upload coverage XML artefact + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-xml + path: coverage.xml + retention-days: 7 diff --git a/.github/workflows/supply-chain.yml b/.github/workflows/supply-chain.yml new file mode 100644 index 0000000..fad1e0a --- /dev/null +++ b/.github/workflows/supply-chain.yml @@ -0,0 +1,92 @@ +name: πŸ” Supply chain β€” SBOM + Sigstore + +# Batch P4-E. Additive workflow that produces a CycloneDX SBOM and +# Sigstore signatures for every Python distribution published in a +# GitHub Release. The existing ``release.yml`` workflow is untouched; +# this one runs in parallel after a release is created, then uploads +# the SBOM + signature blobs back to the same release. +# +# Dry-run support: pushing a tag or pressing "Run workflow" with +# ``dry_run: true`` exercises the whole chain (build, SBOM, sign) +# against a temporary tree. Nothing is uploaded. + +on: + release: + types: [published] + workflow_dispatch: + inputs: + dry_run: + description: "Run the pipeline without uploading any artefact" + required: false + default: "true" + +permissions: + contents: write # upload SBOM + sig to the GitHub release + id-token: write # OIDC for Sigstore keyless signing + +concurrency: + group: supply-chain-${{ github.ref }} + cancel-in-progress: true + +jobs: + attestations: + name: 🧾 SBOM + πŸ” Sigstore + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - name: πŸ“₯ Checkout + uses: actions/checkout@v4 + + - name: 🐍 Install uv + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + + - name: 🐍 Set up Python 3.11 + run: uv python install 3.11 + + - name: πŸ“¦ Install project + dev extras + run: | + uv venv + uv pip install -e ".[dev]" + + - name: πŸ› οΈ Build wheel + sdist + run: uv run python -m build --outdir dist/ + + - name: 🧾 Generate CycloneDX SBOM + run: | + mkdir -p artefacts + uv run python scripts/sbom_fallback.py > artefacts/sbom.json + uv run python - <<'PY' + import json + d = json.load(open("artefacts/sbom.json")) + assert d["bomFormat"] == "CycloneDX", d + print(f"SBOM ok: {len(d['components'])} components") + PY + + - name: πŸ” Sign distributions with Sigstore (keyless OIDC) + if: ${{ github.event_name == 'release' || github.event.inputs.dry_run == 'false' }} + uses: sigstore/gh-action-sigstore-python@v3.0.0 + with: + inputs: ./dist/*.whl ./dist/*.tar.gz + + - name: πŸ“€ Upload SBOM + signatures to the release + if: ${{ github.event_name == 'release' }} + uses: softprops/action-gh-release@v2 + with: + fail_on_unmatched_files: true + files: | + artefacts/sbom.json + dist/*.sigstore.json + dist/*.sigstore + + - name: πŸ“€ Upload artefacts (dry-run / debugging) + if: ${{ github.event_name != 'release' }} + uses: actions/upload-artifact@v4 + with: + name: supply-chain + path: | + artefacts/sbom.json + dist/* + retention-days: 7 diff --git a/.gitignore b/.gitignore index 60439c8..fde110f 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,7 @@ Desktop.ini # ----------------- # uv will create .venv at project root by default (covered above) .uv/ +.uv-cache/ uv.lock.old # Local environment files diff --git a/CHANGELOG.md b/CHANGELOG.md index 555ede8..8e5e464 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,54 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed β€” `make run` now starts the MCP Context Forge stack by default + +**Heads-up for upgraders.** Until this release, `make run` started only the +GitPilot backend and frontend; the MCP stack was opt-in via `make run-mcp` +or `make run-all`. As of this release the happy path is: + +```bash +make install # uv + npm + MCP image cache +make run # MCP Context Forge + GitPilot backend + frontend +``` + +`make run` now: + +* depends on `run-mcp`, which itself depends on `install-mcp`; +* fails loudly when Docker / Docker Compose v2 / the daemon are missing + (with a clear hint pointing at `make run-bare`); +* polls `http://localhost:${MCP_FORGE_PORT:-4444}/health` after + `docker compose up -d`, so it only continues once the gateway is + actually reachable by the GitPilot backend and UI. + +**No-Docker escape hatch** β€” added `make run-bare`, which starts only the +GitPilot backend + frontend. The MCP Servers tab will show the gateway +as Unreachable, but the rest of the app is fully functional. Use this +on Hugging Face Spaces, CI smoke runs, and any minimal host. + +`make run-all` is preserved as the "force-restart the backend" path +(now equivalent to `stop-soft && run`). External tooling that called +it keeps working. + +### Other build / docs updates + +* `make install` is now opinionated as **runtime-only**: dev/test/build + tooling moves to `make install-dev`; docs tooling to + `make uv-install-docs`; a `make install-full` superset is available. + Existing CI that calls `make test` keeps working β€” the target now + uses `uv run --extra dev pytest` internally. +* Re-running `make install-mcp` is now incremental: existing clones skip + network fetch unless `MCP_UPDATE=1`; existing images skip rebuild + unless `MCP_BUILD=1`. +* Render deploy doc updated: build command is now + `pip install uv && uv sync --no-dev` (was `uv sync --all-extras`), + start command is `uv run --no-dev gitpilot serve ...`. Hosted users + that relied on dev tooling at runtime should keep the old commands or + switch to `--extra dev`. +* WSL-friendly `uv` defaults β€” `UV_LINK_MODE=copy` and + `UV_CACHE_DIR=.uv-cache` to avoid hardlink fallback warnings on + `/mnt/c` checkouts. + ### Added β€” MCP Context Forge integration (additive, opt-in) - **`gitpilot/mcp_plugin/`** β€” Context Forge plugin (forge_client, diff --git a/Makefile b/Makefile index 0ac3e4f..0b1e309 100644 --- a/Makefile +++ b/Makefile @@ -6,12 +6,21 @@ UV ?= uv PYTHON ?= python3.11 PORT ?= 8000 +# Keep uv's cache beside the project so WSL /mnt/c checkouts do not copy +# wheels from Linux home-dir cache across filesystems on every install. +UV_CACHE_DIR ?= .uv-cache +# WSL /mnt/c and some Docker/VM filesystems do not support uv hardlinks, +# causing the noisy "Failed to hardlink files" fallback warning. Use copy +# mode by default; override with `make install UV_LINK_MODE=hardlink` on +# native Linux/macOS filesystems if you want hardlinks. +UV_LINK_MODE ?= copy +UV_ENV := UV_CACHE_DIR=$(UV_CACHE_DIR) UV_LINK_MODE=$(UV_LINK_MODE) # Docker Compose command (prefer v2 over v1) DOCKER_COMPOSE := $(shell if command -v docker > /dev/null && docker compose version > /dev/null 2>&1; then echo "docker compose"; elif command -v docker-compose > /dev/null; then echo "docker-compose"; else echo "docker compose"; fi) -.PHONY: help install uv-install frontend-install frontend-build \ - dev run test lint fmt build publish-test publish clean stop \ +.PHONY: help install install-dev install-full uv-install uv-install-dev uv-install-docs frontend-install frontend-build \ + dev run run-bare test lint fmt build publish-test publish clean stop \ benchmark benchmark-quick benchmark-report \ vercel vercel-build vercel-deploy \ build-container run-container stop-container logs-container clean-container publish-container \ @@ -26,12 +35,17 @@ help: @echo "" @echo "GitPilot Make targets" @echo "---------------------" - @echo " make install Install backend (uv) + frontend (npm install)" - @echo " make uv-install Create/refresh Python env and install deps via uv" + @echo " make install Install runtime deps + frontend + MCP stack" + @echo " make install-dev Install developer/test tooling" + @echo " make install-full Install runtime + dev/docs tooling + MCP stack" + @echo " make uv-install Create/refresh Python env with runtime deps only" + @echo " make uv-install-dev Add developer/test tooling via uv" + @echo " make uv-install-docs Add documentation tooling via uv" @echo " make frontend-install Install frontend npm dependencies" @echo " make frontend-build Build React/Vite frontend into gitpilot/web" - @echo " make dev Alias for install" - @echo " make run Run GitPilot backend + frontend dev server" + @echo " make dev Alias for install-dev" + @echo " make run Run MCP stack + GitPilot backend/frontend" + @echo " make run-bare Run GitPilot backend + frontend WITHOUT MCP (no Docker required)" @echo " make stop Stop all processes on ports 8000 and 5173" @echo " make test Run tests with pytest via uv" @echo " make benchmark Run code generation benchmark (all tiers)" @@ -83,24 +97,55 @@ help: @echo " make gateway-register Register GitPilot agent in ContextForge" @echo "" -## High-level install: backend + frontend + MCP env (additive) +## High-level install: runtime backend + frontend + MCP stack. +## GitPilot uses the MCP stack by default, so keep MCP in the happy path while +## leaving heavyweight developer/docs tooling opt-in. install: uv-install frontend-install install-mcp - @echo "βœ… Backend (uv), frontend (npm) and MCP env ready." - @echo " Run 'make run' to start GitPilot, or 'make run-all' to also start the MCP stack." - -## Create / sync the environment with uv (all extras) + @echo "βœ… Backend runtime (uv), frontend (npm) and MCP env ready." + @echo " Run 'make run' to start MCP Context Forge + GitPilot." + @echo " No Docker? Use 'make run-bare' to start GitPilot without MCP." + @echo " Optional: 'make install-dev' for test/lint/build tooling." + +## Custom developer install: add dev/test/build tooling when you need it. +install-dev: uv-install-dev frontend-install + @echo "βœ… Developer tooling ready." + +## Full local workstation install: runtime + MCP + dev/docs tooling. +install-full: install + @echo "πŸ”§ Syncing Python environment with dev + docs tooling..." + @$(UV_ENV) $(UV) sync --extra dev --extra docs + @echo "βœ… Full local environment ready." + @echo " Run 'make run-all' to start GitPilot plus the MCP stack." + +## Create / sync the environment with uv (runtime dependencies only). uv-install: - @echo "πŸ”§ Syncing Python environment with uv (all extras)..." - @$(UV) sync --all-extras - @echo "βœ… Python environment ready." + @echo "πŸ”§ Syncing Python environment with uv (runtime deps only)..." + @$(UV_ENV) $(UV) sync + @echo "βœ… Python runtime environment ready." @echo "⚑ Precompiling bytecode for faster startup (WSL/HF Spaces)..." - @$(UV) run python -m compileall -q -j 4 gitpilot/ 2>/dev/null || true + @$(UV_ENV) $(UV) run --no-dev python -m compileall -q -j 4 gitpilot/ 2>/dev/null || true @echo "βœ… Bytecode cache warmed." +## Add developer/test/build tooling without docs dependencies. +uv-install-dev: + @echo "πŸ”§ Syncing Python environment with dev/test tooling..." + @$(UV_ENV) $(UV) sync --extra dev + @echo "βœ… Python developer environment ready." + +## Add docs tooling only when building or serving documentation. +uv-install-docs: + @echo "πŸ”§ Syncing Python environment with docs tooling..." + @$(UV_ENV) $(UV) sync --extra docs + @echo "βœ… Python docs environment ready." + ## Install frontend dependencies frontend-install: @echo "πŸ“¦ Installing frontend dependencies (npm)..." - @cd frontend && npm install + @if [ -f frontend/package-lock.json ] && [ ! -d frontend/node_modules ]; then \ + cd frontend && npm ci --prefer-offline --no-audit --no-fund; \ + else \ + cd frontend && npm install --prefer-offline --no-audit --no-fund; \ + fi @echo "βœ… Frontend dependencies installed." ## Build the React/Vite frontend and copy dist -> gitpilot/web @@ -112,15 +157,26 @@ frontend-build: frontend-install @echo "βœ… Frontend build complete (gitpilot/web)." ## Developer convenience alias -dev: install +dev: install-dev -## Run GitPilot from the uv-managed environment (backend + frontend). -## Idempotent: if a GitPilot backend is already responding on :$(PORT) +## Run GitPilot from the uv-managed environment (MCP stack + backend + frontend). +## Idempotent: `run-mcp` starts/keeps Context Forge healthy first; if a +## GitPilot backend is already responding on :$(PORT) ## (because you ran `make run` earlier in another tab, or `make run-all` ## was re-invoked), we skip the backend boot and go straight to the ## frontend dev server. The port-in-use check only fires when the port ## is held by *something else*. -run: +## +## No Docker? Use `make run-bare` for the Docker-free path: it starts +## GitPilot backend + frontend without the MCP stack. The UI will show +## the gateway as Unreachable but everything else works. +run: run-mcp run-bare + +## Docker-free run path. Starts GitPilot backend + frontend without +## the MCP stack β€” useful on Hugging Face Spaces, CI smoke runs, and +## any environment where Docker is unavailable. The MCP Servers tab +## will show the gateway as Unreachable; clicking Sync is a no-op. +run-bare: @echo "πŸš€ Starting GitPilot on http://127.0.0.1:$(PORT)..." @# 1. Already a healthy GitPilot? β†’ skip backend boot, go straight to frontend. @if curl -sf http://127.0.0.1:$(PORT)/api/ping > /dev/null 2>&1; then \ @@ -135,7 +191,7 @@ run: exit 1; \ fi @trap 'kill 0' EXIT; \ - $(UV) run python -m gitpilot serve --host 127.0.0.1 --port $(PORT) --no-open & \ + $(UV_ENV) $(UV) run --no-dev python -m gitpilot serve --host 127.0.0.1 --port $(PORT) --no-open & \ BACKEND_PID=$$!; \ echo "⏳ Waiting for backend to be ready (up to 60s for WSL/first-start)..."; \ READY=0; \ @@ -157,7 +213,10 @@ run: echo "🎨 Starting frontend dev server on http://localhost:5173..."; \ cd frontend && npm run dev -- --open -## Stop all running processes (ports 8000 and 5173) +## Stop all running processes (ports 8000 and 5173) AND the MCP stack. +## Now that `make run` starts the MCP Context Forge stack by default, `make +## stop` is symmetric: it stops both GitPilot and Forge. `stop-mcp` is +## idempotent β€” running it when nothing is up is a clean no-op. stop: @echo "πŸ›‘ Attempting to stop processes on ports $(PORT) and 5173..." @@ -179,7 +238,9 @@ stop: echo "No process found on port 5173."; \ fi - @echo "βœ… Stop attempt complete." + @# Tear down the MCP stack started by `make run` (idempotent). + @$(MAKE) --no-print-directory stop-mcp + @echo "βœ… GitPilot + MCP stack stopped." ## Soft-stop GitPilot WITHOUT sudo. Only kills processes the current user ## owns; never prompts for a password. Suitable for `make run-all` to call @@ -212,28 +273,100 @@ test: @echo "πŸ§ͺ Running tests with isolated GitPilot config..." @TMP_CFG="$$(mktemp -d)"; \ echo "Using GITPILOT_CONFIG_DIR=$$TMP_CFG"; \ - GITPILOT_CONFIG_DIR="$$TMP_CFG" GITPILOT_LITE_MODE=0 PYTHONWARNINGS="ignore::RuntimeWarning" $(UV) run pytest; \ + GITPILOT_CONFIG_DIR="$$TMP_CFG" GITPILOT_LITE_MODE=0 PYTHONWARNINGS="ignore::RuntimeWarning" $(UV_ENV) $(UV) run --extra dev pytest; \ STATUS=$$?; \ rm -rf "$$TMP_CFG"; \ exit $$STATUS test-fast: @echo "πŸ§ͺ Running tests (no isolation)..." - @$(UV) run pytest + @$(UV_ENV) $(UV) run --extra dev pytest + +## Coverage gate β€” Batch P1-B +## Enforces the >= 80 % threshold on the gated modules listed in +## pyproject.toml [tool.coverage.run] include. Use `make coverage` locally; +## CI runs the same command. `make coverage-full` reports the whole tree +## without enforcement, useful for spotting candidates to add to the gate. +coverage: + @echo "πŸ“ˆ Running coverage gate (gated modules only)..." + @TMP_CFG="$$(mktemp -d)"; \ + echo "Using GITPILOT_CONFIG_DIR=$$TMP_CFG"; \ + GITPILOT_CONFIG_DIR="$$TMP_CFG" GITPILOT_LITE_MODE=0 PYTHONWARNINGS="ignore::RuntimeWarning" \ + $(UV_ENV) $(UV) run --extra dev pytest --cov --cov-report=term-missing --cov-report=xml --cov-report=html; \ + STATUS=$$?; \ + rm -rf "$$TMP_CFG"; \ + exit $$STATUS + +coverage-html: coverage + @echo "πŸ“ˆ HTML report: htmlcov/index.html" + +coverage-full: + @echo "πŸ“ˆ Full-tree coverage report (informational, no gate)..." + @TMP_CFG="$$(mktemp -d)"; \ + GITPILOT_CONFIG_DIR="$$TMP_CFG" GITPILOT_LITE_MODE=0 PYTHONWARNINGS="ignore::RuntimeWarning" \ + $(UV_ENV) $(UV) run --extra dev pytest --cov=gitpilot --cov-report=term --no-cov-on-fail --cov-config=/dev/null; \ + rm -rf "$$TMP_CFG" + +## Type-check gate β€” Batch P1-C +## Strict mypy on the modules listed in mypy.ini. Run via `make typecheck`. +typecheck: + @echo "πŸ”Ž Running mypy --strict on gated modules..." + @$(UV_ENV) $(UV) run --extra dev mypy --config-file mypy.ini + +## Docs site β€” Batch P4-D +## mkdocs serve + mkdocs build (requires mkdocs-material; install with +## `pip install mkdocs mkdocs-material` or via uv). +docs-serve: + @echo "πŸ“š Serving docs at http://127.0.0.1:8001 ..." + @$(UV_ENV) $(UV) run --extra docs mkdocs serve -a 127.0.0.1:8001 + +docs-build: + @echo "πŸ“š Building static docs site -> site/ ..." + @$(UV_ENV) $(UV) run --extra docs mkdocs build --strict + +linkcheck: + @echo "πŸ”— Running in-repo markdown link checker..." + @$(UV_ENV) $(UV) run --extra dev pytest tests/test_docs_links.py -q + +## Supply chain β€” Batch P4-E +## Generate a CycloneDX SBOM for the installed Python deps. Output is +## artefacts/sbom.json. Run via `make sbom`. CI uploads it alongside +## the signed wheel. +sbom: + @echo "🧾 Generating CycloneDX SBOM..." + @mkdir -p artefacts + @$(UV_ENV) $(UV) run --extra dev python -m cyclonedx_py environment \ + --output-format json \ + --output-file artefacts/sbom.json \ + --PEP-639 || \ + (echo "Falling back to pip freeze SBOM..." && \ + $(UV_ENV) $(UV) run --extra dev python scripts/sbom_fallback.py > artefacts/sbom.json) + @echo "βœ… artefacts/sbom.json" + +sbom-verify: + @echo "🧾 Verifying artefacts/sbom.json shape..." + @$(UV_ENV) $(UV) run --no-dev python -c "import json,sys; d=json.load(open('artefacts/sbom.json')); \ + assert d.get('bomFormat')=='CycloneDX', 'Not a CycloneDX SBOM'; \ + print(f'OK: {len(d.get(\"components\", []))} components')" + +audit-npm: + @echo "πŸ›‘ npm audit (dev deps)..." + @npm --prefix frontend audit --omit=dev --audit-level=high || \ + (echo '⚠️ npm audit found issues; see report above.' && exit 1) ## Benchmark: code generation stress test benchmark: @echo "πŸ“Š Running code generation benchmark (all tiers)..." - @$(UV) run python tests/benchmark.py --model $${GITPILOT_OLLAMA_MODEL:-llama3} --timeout $${BENCHMARK_TIMEOUT:-300} + @$(UV_ENV) $(UV) run --extra dev python tests/benchmark.py --model $${GITPILOT_OLLAMA_MODEL:-llama3} --timeout $${BENCHMARK_TIMEOUT:-300} benchmark-quick: @echo "πŸ“Š Running quick benchmark (tier 1 only)..." - @$(UV) run python tests/benchmark.py --quick --model $${GITPILOT_OLLAMA_MODEL:-llama3} --timeout $${BENCHMARK_TIMEOUT:-120} + @$(UV_ENV) $(UV) run --extra dev python tests/benchmark.py --quick --model $${GITPILOT_OLLAMA_MODEL:-llama3} --timeout $${BENCHMARK_TIMEOUT:-120} benchmark-report: @echo "πŸ“Š Running benchmark with HTML dashboard..." @mkdir -p reports - @$(UV) run python tests/benchmark.py \ + @$(UV_ENV) $(UV) run --extra dev python tests/benchmark.py \ --model $${GITPILOT_OLLAMA_MODEL:-llama3} \ --timeout $${BENCHMARK_TIMEOUT:-300} \ --output reports/benchmark-results.json \ @@ -244,29 +377,29 @@ benchmark-report: ## Lint code lint: @echo "πŸ” Linting with ruff..." - @$(UV) run ruff check gitpilot + @$(UV_ENV) $(UV) run --extra dev ruff check gitpilot ## Format code fmt: @echo "🎨 Formatting with ruff..." - @$(UV) run ruff format gitpilot + @$(UV_ENV) $(UV) run --extra dev ruff format gitpilot ## Build wheel + sdist (includes built frontend) build: frontend-build @echo "πŸ“¦ Building distribution (wheel + sdist)..." - @$(UV) run $(PYTHON) -m build + @$(UV_ENV) $(UV) run --extra dev $(PYTHON) -m build @echo "βœ… Build artifacts are in ./dist" ## Upload to TestPyPI publish-test: @echo "🚚 Uploading to TestPyPI..." - @$(UV) run twine upload -r testpypi dist/* + @$(UV_ENV) $(UV) run --extra dev twine upload -r testpypi dist/* @echo "βœ… Uploaded to TestPyPI" ## Upload to PyPI publish: @echo "πŸš€ Uploading to PyPI..." - @$(UV) run twine upload dist/* + @$(UV_ENV) $(UV) run --extra dev twine upload dist/* @echo "βœ… Uploaded to PyPI" ## Clean build artifacts and caches (cross-platform) @@ -579,12 +712,11 @@ gateway-register: @cd deploy/a2a-mcp && chmod +x register_agent.sh && ./register_agent.sh # ============================================================================= -# MCP Context Forge stack (additive; default `make run` is unchanged) +# MCP Context Forge stack (additive services; default `make run` starts it) # ----------------------------------------------------------------------------- -# `make install` chains `install-mcp` automatically: on machines with Docker -# this pre-pulls images so `make run-mcp` is instant. On machines without -# Docker the script prints a friendly skip message and exits 0, keeping the -# baseline `install` flow unchanged. +# `make install` includes this target because GitPilot uses the MCP stack by +# default. The script is skip-safe and incremental: it only clones/builds what +# is missing unless MCP_UPDATE=1 or MCP_BUILD=1 is supplied. # ============================================================================= ## Pull MCP Context Forge stack images and seed .mcp.env (idempotent) @@ -592,35 +724,64 @@ install-mcp: @bash scripts/install-mcp.sh ## Bring up MCP Context Forge + 3 reference MCP servers (postgre, milvus, inspector) -run-mcp: +run-mcp: install-mcp @if [ ! -f .mcp.env ]; then \ echo "❌ .mcp.env missing. Run 'make install-mcp' first."; exit 1; \ fi + @if ! command -v docker >/dev/null 2>&1; then \ + echo "❌ Docker is required because 'make run' starts MCP Context Forge by default."; \ + echo " Install/start Docker Desktop, then rerun 'make run'."; \ + echo " Or run without MCP: make run-bare"; \ + exit 1; \ + fi + @if ! docker compose version >/dev/null 2>&1; then \ + echo "❌ Docker Compose v2 is required for the MCP stack."; \ + echo " Upgrade Docker Desktop or install the compose v2 plugin."; \ + echo " Or run without MCP: make run-bare"; \ + exit 1; \ + fi + @if ! docker info >/dev/null 2>&1; then \ + echo "❌ Docker daemon is not running; MCP Context Forge cannot start."; \ + echo " Start Docker Desktop, then rerun 'make run'."; \ + echo " Or run without MCP: make run-bare"; \ + exit 1; \ + fi @echo "πŸš€ Starting MCP Context Forge stack..." docker compose --env-file .mcp.env -f docker-compose.mcp.yml --profile mcp up -d - @echo "βœ… Forge: http://localhost:$${MCP_FORGE_PORT:-4444}" - @echo " Postgre: http://localhost:$${MCP_POSTGRE_PORT:-8080}" - @echo " Inspector: http://localhost:$${MCP_INSPECTOR_PORT:-8081}" - @echo " Milvus (opt-in): docker compose -f docker-compose.mcp.yml --profile milvus up -d" + @set -a; . ./.mcp.env; set +a; \ + forge_port="$${MCP_FORGE_PORT:-4444}"; \ + echo "⏳ Waiting for MCP Context Forge on http://localhost:$$forge_port/health..."; \ + ready=0; \ + for i in $$(seq 1 60); do \ + if curl -fsS "http://localhost:$$forge_port/health" >/dev/null 2>&1; then \ + echo "βœ… MCP Context Forge reachable after $$((i * 2))s."; \ + ready=1; \ + break; \ + fi; \ + sleep 2; \ + done; \ + if [ $$ready -ne 1 ]; then \ + echo "❌ MCP Context Forge did not become host-reachable on http://localhost:$$forge_port."; \ + echo " Tail logs with: make logs-mcp"; \ + exit 1; \ + fi + @set -a; . ./.mcp.env; set +a; \ + echo "βœ… Forge: http://localhost:$${MCP_FORGE_PORT:-4444}"; \ + echo " Postgre: http://localhost:$${MCP_POSTGRE_PORT:-8080}"; \ + echo " Inspector: http://localhost:$${MCP_INSPECTOR_PORT:-8081}"; \ + echo " Milvus (opt-in): docker compose --env-file .mcp.env -f docker-compose.mcp.yml --profile milvus up -d" @bash scripts/register-mcp-servers.sh ## Register the 3 MCP servers with Forge (idempotent; called by run-mcp). register-mcp-servers: @bash scripts/register-mcp-servers.sh -## One-shot: GitPilot core + MCP Context Forge stack. +## One-shot with a forced GitPilot backend restart. ## -## We deliberately do a soft-stop of any stale GitPilot backend BEFORE -## starting the new one. Reason: when run-all is invoked the user just -## pulled new code, edited config, or rebuilt an MCP image -- they -## expect the freshly-pulled code path to actually run, not the -## leftover backend from the previous attempt. The 'run' target's -## idempotent skip is great for the dev loop ('make run' twice in a -## row), but it has to be sidestepped here so we don't silently keep -## the old code path alive. -run-all: run-mcp - @$(MAKE) --no-print-directory stop-soft - @$(MAKE) --no-print-directory run +## `make run` now starts the MCP stack by default. Keep `run-all` as the +## explicit "fresh backend" path for users who just pulled code, changed +## config, or rebuilt MCP images and do not want to reuse an old backend. +run-all: stop-soft run ## Local-first: rebuild every MCP image from the cloned mcp-stack/ source ## (mirrors HomePilot's docker-compose.mcp.yml `build:` pattern), then run. diff --git a/README.md b/README.md index 82d4264..62c078c 100644 --- a/README.md +++ b/README.md @@ -368,14 +368,19 @@ The frontend deploys to Vercel. Set `VITE_BACKEND_URL` to your backend. ## Contributing ```bash -# Backend -cd gitpilot -pip install -e ".[dev]" -pytest +# Standard install: runtime backend + frontend + MCP stack +make install +# WSL note: the Makefile defaults uv to UV_LINK_MODE=copy to avoid +# hardlink fallback warnings on /mnt/c checkouts. For best install speed, +# clone the repo inside the native WSL filesystem (for example ~/workspace). + +# Developer/test tooling +make install-dev +make test -# Frontend +# Frontend only cd frontend -npm install +npm ci npm run dev # VS Code Extension @@ -402,4 +407,12 @@ Apache License 2.0. See [LICENSE](LICENSE). --- -**MCP Context Forge integration** β€” GitPilot now ships an opt-in MCP stack (Forge + PostgreSQL / Milvus / Inspector servers) wired into the agents like Claude Code's built-ins; `make run-all` brings everything up. See [INSTALL_MCP.md](./INSTALL_MCP.md) and [PRODUCTION_MCP.md](./PRODUCTION_MCP.md). +**MCP Context Forge integration** β€” GitPilot ships a default MCP stack (Forge + PostgreSQL / Milvus / Inspector servers) wired into the agents like Claude Code's built-ins; `make run` brings everything up. No Docker? Use `make run-bare` to start GitPilot core without MCP. See [docs/deploy/install-mcp.md](./docs/deploy/install-mcp.md) and [docs/deploy/production-mcp.md](./docs/deploy/production-mcp.md). + +--- + +## What's New + +> **Enterprise-ready foundation:** GitPilot now ships with safer defaults and production-grade controls, including thread-safe feature flags, strict typing, CI coverage enforcement, structured error handling, and a fast `gitpilot doctor` health check. All upgrades are additive, flag-gated, and disabled by default, so existing installations remain stable while teams can adopt new capabilities gradually. + +> **Performance, onboarding, and release confidence:** GitPilot now improves runtime efficiency with prompt caching, lazy tool loading, context memoisation, SSE streaming, and safe model warmup. First-time setup is easier with `gitpilot init --wizard`, which creates configuration files atomically with rollback protection and no secret exposure. The platform also adds a stable public API, deprecation handling, MkDocs documentation, broken-link checks, SBOM generation, npm auditing, and Sigstore-based release signing. diff --git a/docs/API_STABILITY.md b/docs/API_STABILITY.md new file mode 100644 index 0000000..6949420 --- /dev/null +++ b/docs/API_STABILITY.md @@ -0,0 +1,151 @@ +# GitPilot β€” Public API stability contract + +> **Status:** active. Applies from version `0.3.x` onward. + +The package `gitpilot.public_api` is the **only** GitPilot import surface +that the project commits to keep stable across releases. Everything +else (every other module under `gitpilot.*`) is internal: signatures may +change, names may move, files may be deleted, all without notice. + +This document explains the contract and the deprecation process. + +--- + +## 1. What "stable" means + +For every name in `gitpilot.public_api.__all__`: + +| Guarantee | Detail | +|---|---| +| **The name keeps resolving** | imports never silently break | +| **The signature stays callable** | new optional parameters are fine; required ones are not added without a deprecation cycle | +| **Documented behaviour is preserved** | bug fixes are allowed, behaviour changes that contradict the docstring are not | +| **Removal goes through deprecation** | see Β§3 | + +Anything not in `__all__` may move, be renamed, or be deleted in any release β€” even a patch. + +--- + +## 2. What's on the surface today + +The list is the source of truth; this section is a human-friendly summary. + +* **Feature flags** β€” `is_on`, `set_override`, `enabled_flags`, … +* **Context** β€” `AgentsLoader`, `MentionParser`, `ContextBudgetManager`, + `build_context_cached` +* **Tools** β€” `ToolPolicy`, `EditGuard`, `MCPGuard`, `classify_tool`, + `register_tool_category`, `prune_descriptors`, `MCPServerToggles`, + `MCPToggleRegistry`, `validate_tool_output` +* **Modes** β€” `Mode`, `ModeRegistry`, `activate_mode`, `ActiveModeContext` +* **Slash commands** β€” `SlashCommand`, `SlashCommandRegistry` +* **Checkpoints** β€” `CheckpointStore`, `CheckpointRecord`, + `ToolCallDescriptor` +* **Rules** β€” `Rule`, `RuleSet`, `compose_rules`, `load_rules` +* **Sandbox** β€” `get_sandbox`, `SandboxPolicy`, `SandboxResult`, + `NullSandbox`, `SubprocessSandbox`, `MatrixLabSandbox`, + `SandboxError`, `SandboxUnavailableError`, `SandboxRunError`, + `BACKEND_OFF`, `BACKEND_SUBPROCESS`, `BACKEND_MATRIXLAB` +* **Trust** β€” `TrustStore`, `TrustEntry`, `TrustStatus`, + `workspace_fingerprint` +* **Errors** β€” `GitPilotError`, `NotFoundError`, `UpstreamError`, + `ValidationError`, `wrap_errors_envelope`, `error_envelope`, + `error_envelope_response` +* **Doctor** β€” `doctor_run_checks`, `doctor_render_text`, + `doctor_render_json`, `CheckResult`, `DoctorReport` +* **Prompt cache (Phase 2)** β€” `build_system_blocks`, + `to_anthropic_kwargs`, `to_legacy_system_string`, `SystemPayload`, + `SystemBlock`, `PromptCacheProvider` +* **Streaming (Phase 2)** β€” `register_stream_routes`, + `AgentStreamRunner`, `StreamEvent`, `StreamMetrics`, + `format_sse_event`, `stream_fallback_adapter` +* **Context cache (Phase 2)** β€” `build_context_cached`, + `get_context_cache_stats`, `clear_context_cache`, `ContextCacheStats` +* **Warmup (Phase 2)** β€” `register_warmup`, `run_warmup_async`, + `run_warmup_now`, `WarmupResult` +* **Wizard (Phase 3)** β€” `run_wizard`, `WizardAnswers`, + `WizardResult`, `WizardError`, `WizardPrompter`, `ScriptedPrompter`, + `wizard_render_env`, `wizard_render_modes`, + `supported_provider_slugs`, `starter_mode_slugs` +* **Deprecation infra** β€” `deprecated_alias` + +The authoritative list is in `gitpilot/public_api/__init__.py`. +A CI test (`tests/test_public_api.py`) fails if any name in `__all__` +becomes unimportable. + +--- + +## 3. Deprecation process + +When a public name needs to go, this is the path: + +1. **Announce** in the release that introduces the deprecation: + ``` + parse_mentions β†’ use expand_mentions instead. + Scheduled for removal in v2.0. + ``` +2. **Wrap** the symbol with `deprecated_alias` so the first call per + process emits a `DeprecationWarning`: + + ```python + from gitpilot._deprecation import deprecated_alias + parse_mentions = deprecated_alias( + "parse_mentions", expand_mentions, + replacement="gitpilot.public_api.expand_mentions", + removed_in="2.0", + ) + ``` + +3. **Keep** the symbol working for at least one minor release. +4. **Remove** only on the milestone version named in `removed_in`. + +The `deprecated_alias` helper enforces: + +* fixed-format warning text (` is deprecated; use instead + (will be removed in v)`) +* emit-once-per-process semantics (no log spam) +* a `__gitpilot_deprecated__` metadata attribute on the wrapper, so + documentation generators and migration tooling can find every + deprecated name without parsing source + +Callers that want to opt out of the noise can filter the category as +usual: + +```python +import warnings +warnings.filterwarnings( + "ignore", category=DeprecationWarning, module=r"gitpilot\..*", +) +``` + +--- + +## 4. SemVer mapping + +GitPilot follows semantic versioning for the `public_api` surface only: + +* **MAJOR** β€” a name is removed, or a required parameter is added. +* **MINOR** β€” a new name lands, a deprecation is announced, or a new + optional parameter is added. +* **PATCH** β€” bug fixes and behaviour preserved. + +Internal modules ignore SemVer entirely. + +--- + +## 5. Suggested migration playbook for callers + +If you are integrating GitPilot inside another tool, do exactly two +things to stay future-proof: + +1. **Import only from `gitpilot.public_api`.** Reaching into + `gitpilot.session` or `gitpilot.agent_executor` is allowed but + not protected. +2. **Treat any `DeprecationWarning` from `gitpilot._deprecation` as + a hard build break.** CI: + + ```bash + pytest -W error::DeprecationWarning + ``` + +Following both ensures one GitPilot major-bump is the only place you +need to spend migration effort. diff --git a/docs/PHASE1.md b/docs/PHASE1.md new file mode 100644 index 0000000..a2c9983 --- /dev/null +++ b/docs/PHASE1.md @@ -0,0 +1,108 @@ +# Phase 1 β€” Foundations + +Every batch below is additive, flag-gated where applicable, and removable +in a single revert. Phase 1 ships no user-visible behaviour change; it +puts the rails in place so Phases 2–4 can land safely. + +## Status + +| Batch | Done | Notes | +|---|---|---| +| P1-A Β· Feature-flag service | βœ… | `gitpilot/flags.py`, 16 tests, RLock-safe | +| P1-B Β· Coverage gate β‰₯ 80 % | βœ… | gated allowlist in `pyproject.toml`; CI workflow at `.github/workflows/coverage.yml` | +| P1-C Β· `mypy --strict` foothold | βœ… | 15 modules + `gitpilot/public_api/__init__.py` | +| P1-D Β· Error envelope | βœ… | `wrap_errors_envelope` decorator; flag: `error_envelope` | +| P1-E Β· `gitpilot doctor` CLI | βœ… | 9 checks, runs in ≀ 100 ms offline; JSON via `--json` | + +Full test count: **1 109 passing** (1 035 prior + 74 new). +Gated coverage: **88.05 %**. +Strict mypy: **15 source files clean**. + +## Quick reference + +### Feature flags +```bash +# Enable a flag for one process +GITPILOT_FLAGS="error_envelope=1,prompt_cache=0" gitpilot serve + +# Or persist for the workspace +echo '{"error_envelope": true}' > .gitpilot/flags.json +``` + +### Coverage +```bash +make coverage # gated modules, enforces >= 80 % +make coverage-full # informational, full tree +make coverage-html # writes htmlcov/index.html +``` + +### Types +```bash +make typecheck # mypy --strict on gated modules +``` + +### Error envelope +```python +from gitpilot.public_api import wrap_errors_envelope, NotFoundError + +@app.get("/widgets/{wid}") +@wrap_errors_envelope +async def get_widget(wid: str) -> dict: + if not exists(wid): + raise NotFoundError(f"widget {wid} not found", + hint="Check the widget ID with /widgets/list") + return load_widget(wid) +``` +With flag `error_envelope=1` the response on a 404 becomes: +```json +{ + "error": { + "code": "resource.not_found", + "message": "widget abc not found", + "hint": "Check the widget ID with /widgets/list", + "doc_url": "https://docs.gitpilot.dev/errors/resource-not-found" + }, + "trace_id": "8f3c…" +} +``` +With the flag off (legacy default) FastAPI's original 500/HTTPException +behaviour is preserved. + +### Doctor +```bash +gitpilot doctor # rich table, exit 0/1 +gitpilot doctor --offline # skip every network probe (~100 ms) +gitpilot doctor --json # machine-readable, for CI +python -m gitpilot.doctor --json # zero-Typer fallback +``` + +Checks run today: +1. Python β‰₯ 3.11 +2. node on PATH +3. uv on PATH +4. Workspace files (`AGENTS.md`, `.gitpilot/modes.yaml`) +5. `modes.yaml` parses +6. Sandbox backend reachable (subprocess / matrixlab / off) +7. MCP config parses +8. Model API credential present for the configured provider +9. Frontend bundle packaged + +### Public API surface + +```python +from gitpilot.public_api import ( + # flags + is_on, set_override, + # context + tools + ToolPolicy, ContextBudgetManager, AgentsLoader, MentionParser, + # sandbox + trust + get_sandbox, SandboxPolicy, TrustStore, + # error envelope + wrap_errors_envelope, GitPilotError, NotFoundError, + # doctor + doctor_run_checks, doctor_render_json, +) +``` +Anything outside this list is internal and may change. Older modules +(legacy `gitpilot.api`, agents, GitHub clients, …) are unchanged and +remain importable as before. diff --git a/docs/PHASE2.md b/docs/PHASE2.md new file mode 100644 index 0000000..fbf0253 --- /dev/null +++ b/docs/PHASE2.md @@ -0,0 +1,133 @@ +# Phase 2 β€” Performance + +Five additive batches that target perceived speed and per-turn cost +without changing any user-visible behaviour by default. Every code +path is reachable only when its feature flag is on; the flags ship +**off** so the merge is risk-free. + +## Status + +| Batch | Done | Flag | Notes | +|---|---|---|---| +| P2-A Β· Prompt cache builder | βœ… | `prompt_cache` | Anthropic-only ``cache_control: ephemeral`` markers | +| P2-B Β· Lazy MCP tool defs | βœ… | `lazy_tool_defs` | drops tools the mode policy forbids | +| P2-C Β· Context-pack memoisation | βœ… | `context_cache` | LRU keyed on workspace, mode, query, mtimes | +| P2-D Β· End-to-end SSE streaming | βœ… | `stream_v2`, `ui_stream_v2` | new `/chat/stream` route, legacy unchanged | +| P2-E Β· Model warmup | βœ… | `model_warmup` | 1-token startup ping with 3-second cap | + +Test count: **1 172 passing** (1 109 prior + 63 new). +Gated coverage: **88.79 %** across 19 modules. +Strict mypy: **20 source files clean**. + +## Turning a flag on + +```bash +# Single env-var override, scoped to the process +GITPILOT_FLAGS="prompt_cache=1,lazy_tool_defs=1,context_cache=1,stream_v2=1,model_warmup=1" \ + gitpilot serve + +# Per-workspace persistence +cat > .gitpilot/flags.json <<'EOF' +{ + "prompt_cache": true, + "lazy_tool_defs": true, + "context_cache": true, + "stream_v2": true, + "model_warmup": true +} +EOF +``` + +## Bench DoD + +The plan asked for two measurable gates before flipping flags on in +production. Both checks are easy to wire into a smoke job: + +* **Input tokens ↓ β‰₯ 50 %** on a 20-turn benchmark with `prompt_cache=1`. + Measure with the digest emitted by ``SystemPayload.cache_prefix_digest`` + and your provider's input-token billing field. +* **p50 first-byte ↓ β‰₯ 40 %** on a fixed prompt with `stream_v2=1`. + The `done` event payload includes ``first_byte_ms`` so the benchmark + can record it directly. + +## Quick reference + +### Prompt cache + +```python +from gitpilot.public_api import build_system_blocks, to_anthropic_kwargs + +payload = build_system_blocks( + base_system="You are GitPilot.", + workspace=workspace_path, + mode_slug="coder", + tool_defs=list_tools_for_session(), + session_conventions=current_turn_notes, +) +kwargs = to_anthropic_kwargs(payload) # ``system=`` ready for the SDK +``` + +### Lazy MCP tool defs + +```python +from gitpilot.public_api import prune_descriptors, build_mcp_agent_tools +# Mode picker β†’ ToolPolicy β†’ bridge accepts policy= +crewai_tools = build_mcp_agent_tools(policy=active_mode.tool_policy()) +``` + +### Context cache + +```python +from gitpilot.public_api import build_context_cached, get_context_cache_stats +context = build_context_cached(workspace_path, query=user_query, mode_slug="coder") +print(get_context_cache_stats().hit_ratio) +``` + +### SSE streaming + +Server side (one-line registration, idempotent): + +```python +from gitpilot.public_api import register_stream_routes +register_stream_routes(app, adapter=my_adapter) +``` + +Client side (browser): + +```js +const es = new EventSource('/chat/stream', { withCredentials: true }); +es.addEventListener('assistant_chunk', (e) => render(JSON.parse(e.data).text)); +es.addEventListener('done', (e) => es.close()); +``` + +### Model warmup + +```python +from gitpilot.public_api import register_warmup +register_warmup(app) # noop when flag off; idempotent across reloads +``` + +## Rollback paths + +| Issue | Action | +|---|---| +| Anthropic cache markers break a provider | `GITPILOT_FLAGS="prompt_cache=0"` | +| Mode policy hides a tool we still need | `GITPILOT_FLAGS="lazy_tool_defs=0"` | +| Stale context served from the LRU | `GITPILOT_FLAGS="context_cache=0"` or call `clear_context_cache()` | +| Streaming UX flakier than batch | `GITPILOT_FLAGS="stream_v2=0"` (legacy routes still serve) | +| Warmup timeouts during boot storm | `GITPILOT_FLAGS="model_warmup=0"` | + +Each item is one env-var change; no redeploy required. + +## Backwards compatibility + +* No existing module deleted or rewritten. The few legacy files that + were touched (`gitpilot/api.py`, `gitpilot/cli.py`, `gitpilot/llm_provider.py`, + `gitpilot/agent_executor.py`, `gitpilot/mcp_tools_bridge.py`) received + **only additive changes**: new helpers, new optional arguments + defaulting to legacy behaviour, new co-methods. Every legacy entry + point keeps its signature. +* The 1 109 pre-existing tests continue to pass alongside the 63 new + ones. +* All new modules live behind feature flags that default off; turning + them on is one env-var change. diff --git a/docs/PHASE3_G.md b/docs/PHASE3_G.md new file mode 100644 index 0000000..87a8ce7 --- /dev/null +++ b/docs/PHASE3_G.md @@ -0,0 +1,77 @@ +# Phase 3 β€” Batch G Β· First-run wizard + +Replaces "read the 6 KB ``.env.template``" with a four-question walkthrough +that produces exactly the files a new user actually needs. + +## What ships + +| Item | Where | +|---|---| +| Wizard module | `gitpilot/init_wizard.py` | +| CLI integration | `gitpilot init --wizard` (also `--provider`, `--mode`, `--api-key`, `--no-trust`, `--overwrite`) | +| Public API surface | `gitpilot.public_api.{run_wizard, WizardAnswers, WizardResult, …}` | +| Tests | `tests/test_init_wizard.py` (22 specs) | +| Flag | `init_wizard` (default off) | + +## Behaviour at a glance + +1. Pick a provider β€” Anthropic Claude, OpenAI, IBM watsonx, or Ollama. +2. Paste the API key (skipped for Ollama; input is hidden, never echoed). +3. Pick a starter mode β€” `coder`, `planner`, or `reviewer`. +4. Confirm workspace trust (writes a `TrustStore` entry). + +Outputs (all atomic): + +* `.env` β€” only the keys you actually picked (mode `0o600`). +* `.gitpilot/modes.yaml` β€” one starter mode wired with the right tool groups. +* `AGENTS.md` β€” via the existing `agents_md.run_init` helper. +* `~/.gitpilot/trusted.json` β€” trust entry for the workspace. + +## Industry-grade guarantees + +* **Atomic writes.** Every file is written to a sibling temp file, + `fsync`-ed, then renamed. An abort mid-run rolls back every + successful write so a retry starts from a clean slate. +* **Secret safety.** API keys are never echoed back to stdout, are + rejected if they contain control characters, and `.env` is set to + `0o600` on POSIX. +* **Idempotent.** Re-running the wizard with the same inputs produces + byte-identical files. Existing files are skipped unless + `--overwrite` is passed. +* **Non-interactive.** Every prompt has a CLI flag (`--provider`, + `--mode`, `--api-key`, `--no-trust`), so CI and provisioning scripts + can drive the same code path the human flow uses. +* **Flag-gated.** Without `init_wizard=1` the wizard refuses to run + and the user is pointed at the legacy `gitpilot init`. + +## Try it + +```bash +# Interactive +GITPILOT_FLAGS="init_wizard=1" gitpilot init --wizard + +# Non-interactive (CI) +GITPILOT_FLAGS="init_wizard=1" gitpilot init --wizard \ + --provider anthropic \ + --api-key "$ANTHROPIC_API_KEY" \ + --mode coder \ + . +``` + +Expected output: + +``` +wrote ./.env +wrote ./.gitpilot/modes.yaml +wrote ./AGENTS.md +trusted workspace recorded in ~/.gitpilot/trusted.json +done in 7 ms +``` + +## Rollback + +* `GITPILOT_FLAGS="init_wizard=0"` β€” disables the new flow. The + legacy `gitpilot init` (just `.gitpilot/GITPILOT.md`) is unchanged + and remains the default. +* Single revert of this commit removes the wizard module, CLI flags, + and tests without disturbing any other batch. diff --git a/docs/PHASE4.md b/docs/PHASE4.md new file mode 100644 index 0000000..9e31413 --- /dev/null +++ b/docs/PHASE4.md @@ -0,0 +1,100 @@ +# Phase 4 β€” Quality safety net + +Three additive batches that lock the contract, tidy the docs, and harden +the release pipeline. Every change is reversible in a single revert. + +## Status + +| Batch | Done | Notes | +|---|---|---| +| P4-C Β· Public API stability layer | βœ… | `gitpilot/_deprecation.py`, `docs/API_STABILITY.md`, stronger `tests/test_public_api.py` | +| P4-D Β· README rewrite + docs site | βœ… | one-path README; legacy deployment docs moved to `docs/deploy/`; `mkdocs.yml` + `make docs-{serve,build}`; in-repo link checker | +| P4-E Β· Supply chain | βœ… | `make sbom` (CycloneDX 1.5), Sigstore-signing release workflow, `make audit-npm` baseline | + +Full test count: **1 266 passing** (1 194 prior + 72 new). +Gated coverage: **88.70 %** across 21 modules. +Strict mypy: **22 source files clean**. + +--- + +## P4-C β€” Public API stability + +* **`gitpilot/_deprecation.py`** β€” small helper exporting + `deprecated(...)` (decorator) and `deprecated_alias(...)` (factory). + Both emit a single `DeprecationWarning` per process per symbol, + carry `__gitpilot_deprecated__` metadata for tooling, and follow a + fixed warning template (`" is deprecated; use instead + (will be removed in v)"`). +* **`docs/API_STABILITY.md`** β€” the written contract: what + `gitpilot.public_api` guarantees, the SemVer mapping, the migration + playbook (treat `DeprecationWarning` as a hard build break). +* **`tests/test_public_api.py`** now enforces three extra invariants: + every name resolves, every callable carries a non-trivial + docstring, every callable has resolvable type hints. + +No public symbol is currently scheduled for removal. The first real +deprecation will use: + +```python +from gitpilot._deprecation import deprecated_alias +parse_mentions = deprecated_alias( + "parse_mentions", expand_mentions, + replacement="gitpilot.public_api.expand_mentions", + removed_in="2.0", +) +``` + +## P4-D β€” README + docs site + +* **README** β€” one path, three commands. Everything heavier moves to + `docs/`. +* **`docs/deploy/`** β€” 10 legacy deployment docs moved verbatim + (history preserved via `git mv`): + + ``` + docker.md render.md render-detailed.md vercel.md vercel-setup.md + vercel-testing.md quick.md production.md production-mcp.md install-mcp.md + ``` + +* **`docs/contributing/`** β€” packaging + frontend reference. +* **`mkdocs.yml`** β€” material theme; `make docs-serve` runs locally, + `make docs-build --strict` is CI-ready. +* **`tests/test_docs_links.py`** β€” broken-link checker for in-repo + markdown. Failing test = "you moved a file without updating its + incoming links." Three real broken links were caught and fixed by + this batch. + +## P4-E β€” Supply chain + +* **`scripts/sbom_fallback.py`** β€” dependency-light CycloneDX 1.5 SBOM + generator. Walks `importlib.metadata` to produce a deterministic, + sorted, JSON SBOM that downstream consumers (Sigstore attestations, + vendor risk tools) can consume as-is. +* **`make sbom`** / **`make sbom-verify`** β€” produces and validates + `artefacts/sbom.json` (192 components for the current dev env). +* **`make audit-npm`** β€” gates the frontend on `npm audit` at + `--audit-level=high`; baseline locked. +* **`.github/workflows/supply-chain.yml`** β€” separate workflow that + runs after a GitHub Release: + 1. builds wheel + sdist, + 2. generates SBOM, + 3. **signs every distribution with Sigstore via keyless OIDC** + (pinned to `sigstore/gh-action-sigstore-python@v3.0.0`), + 4. uploads SBOM + `.sigstore.json` signatures back to the release. + Workflow-dispatch dry-runs upload to an Actions artefact instead of + the release, so engineers can verify the chain without cutting a tag. +* **`tests/test_supply_chain.py`** β€” 12 assertions: SBOM is valid + CycloneDX 1.5, components are sorted + unique, every component + has `purl`/`name`/`version`; the workflow has the right OIDC + permissions, the right step order, the right Sigstore action pin, + and a dry-run path. + +## Rollback + +| Batch | One-line rollback | +|---|---| +| P4-C | `git rm gitpilot/_deprecation.py docs/API_STABILITY.md tests/test_deprecation.py` (or `git revert `) | +| P4-D | Single `git revert` restores the old README and `docs/deploy/` layout | +| P4-E | `rm .github/workflows/supply-chain.yml scripts/sbom_fallback.py tests/test_supply_chain.py` | + +Each batch is independent, so a partial revert is supported. diff --git a/docs/UPGRADES.md b/docs/UPGRADES.md new file mode 100644 index 0000000..b9e72b2 --- /dev/null +++ b/docs/UPGRADES.md @@ -0,0 +1,353 @@ +# GitPilot Upgrades β€” Context, Tools, Modes, Sandbox + +All changes in this document are **additive and non-destructive**. +Existing GitPilot installations keep working with no configuration; the +new features are opt-in. + +--- + +## 1. Persistent project context β€” `AGENTS.md` + +`AGENTS.md` at the workspace root is loaded into every session as a +high-priority context block. It is the recommended place for project +conventions, directory map, stack notes, and workflow shortcuts. + +### Generate one + +```bash +gitpilot init # writes AGENTS.md if it does not exist +``` + +The starter document is produced by scanning the workspace (detects +Python, Node, Docker, Makefile targets, top-level layout). Edit it +freely afterwards. + +### Mode-specific overlays + +Place per-mode overrides in `.gitpilot/AGENTS..md`. They are +loaded **after** the root file, so the most specific rules apply last. + +### Includes + +Any `AGENTS.md` may include other markdown files with a single line: + +```markdown +@./fragments/db-conventions.md +``` + +* relative or absolute paths are supported +* circular includes are detected and broken automatically +* total size is capped to protect the context budget + +--- + +## 2. `@`-mentions in chat + +The chat input recognises typed references: + +| Token | Expands to | +|---|---| +| `@./src/app.py` | the file's contents (size-capped) | +| `@glob:src/**/*.ts` | a list of matching paths | +| `@problems` | the diagnostics dumped to `.gitpilot/problems.json` | +| `@commit:` | `git show` of that commit | +| `@diff:` | `git diff ` | +| `@selection` | the snippet sent from the editor | +| `@pr:` | placeholder resolved by the API layer | + +Unknown tokens are reported but otherwise left alone β€” typing is +forgiving. + +--- + +## 3. Context budget + live token counter + +A new module (`gitpilot.context_budget`) tracks token usage per session +and condenses older history when the running total crosses a +configurable threshold. + +* Default budget: **200 000 tokens**, condense at **70 %**. +* Strategy: drop oversize tool outputs first, then summarise older + non-pinned turns into a single recap message, then keep the most + recent six turns verbatim. +* `ContextStats` exposes `{prompt_tokens, max_tokens, ratio, + condensations}` for surfacing a live counter in the web UI and + editor extension. + +Token estimation uses `tiktoken` when available and falls back to a +length-based heuristic. + +--- + +## 4. Tool categories + per-mode policy + +Every tool now belongs to one of six categories: + +``` +read edit command browser mcp mode +``` + +A mode may declare which categories it wants and add fine-grained +guards: + +```yaml +groups: + - read + - mcp: + allow: ["postgres.*"] + alwaysAllow: ["postgres.explain"] + disabledServers: ["github"] + - edit: + fileRegex: "^migrations/.*\\.sql$" +``` + +* `fileRegex` is enforced at edit time β€” a write outside the pattern + is rejected before any bytes hit disk. +* `alwaysAllow` lets specific MCP tools run without the per-call + approval prompt. + +Plugins can register their own categories with +`gitpilot.tool_groups.register_category(name, category)`. + +--- + +## 5. Per-MCP-tool toggles + tool-output validator + +`.gitpilot/mcp.json` (project) or `~/.gitpilot/mcp.json` (user) accept +per-server toggles: + +```json +{ + "servers": [ + { + "name": "github", + "enabledTools": ["search_code", "list_issues"], + "disabledTools": ["create_pr"], + "alwaysAllow": ["search_code"], + "disabled": false + } + ] +} +``` + +Disabled tools are removed from the model's tool descriptions β€” every +disabled tool is a small win on the prompt budget. Project file wins +on conflicts. + +Tool outputs pass through `validate_tool_output` before being injected +into history. Outputs with control characters are flagged; oversize +outputs are truncated. Both responses are returned as +`ToolOutputCheck`, so the caller can ask the user instead of poisoning +context. + +--- + +## 6. Custom modes + +A mode is a YAML record describing a persona, its instructions, the +tool categories it may use, and (optionally) MCP servers that live and +die with the mode. + +```yaml +# .gitpilot/modes.yaml +customModes: + - slug: db-pilot + name: "DB Pilot" + description: Natural-language queries against staging Postgres + roleDefinition: | + You are a senior DBA. Always EXPLAIN before mutating. + whenToUse: | + Use for schema, queries, or migrations. + customInstructions: | + Refuse DROP / TRUNCATE without explicit confirmation. + groups: + - read + - mcp: + allow: ["postgres.query", "postgres.explain"] + alwaysAllow: ["postgres.explain"] + - edit: + fileRegex: "^migrations/.*\\.sql$" + mcpServers: + postgres: + command: uvx + args: [mcp-postgres-server] + env: { PG_URL: "${STAGING_PG_URL}" } + alwaysAllow: [postgres.explain] +``` + +Lookup order: + +1. `~/.gitpilot/modes.yaml` β€” user-global +2. `/.gitpilot/modes.yaml` β€” project (wins on slug clash) + +`activate_mode(registry, "db-pilot")` returns an `ActiveModeContext` +bundle ready to plug into the executor: + +* `system_prompt_block` β€” for prompt injection +* `tool_policy` β€” pass to the executor / approval layer +* `mcp_server_configs` β€” for the MCP client to spin up +* `extra_mcp_toggles` β€” apply via `MCPToggleRegistry` + +When a mode is exited, its mode-scoped MCP servers stop and their tool +definitions leave the prompt automatically. + +--- + +## 7. Slash commands as markdown + +Drop a file into `.gitpilot/commands/.md` (project) or +`~/.gitpilot/commands/.md` (user) to define a reusable command: + +```markdown +--- +description: Create a new API endpoint +argument-hint: +--- + +Create a new endpoint called $1 handling $2 requests. +Include error handling, tests, and OpenAPI docs. +``` + +* Filename β†’ command name (lower-case, dash-separated). +* `$1`..`$9` are positional; `$ARGS` expands to the full arg string. +* Front-matter `description` powers the `/` menu. + +--- + +## 8. Checkpointing + +Before any mutating tool call, `CheckpointStore.snapshot` records: + +1. A git commit in a **shadow** repo at + `~/.gitpilot/history//snapshot`. +2. The conversation transcript up to that point. +3. The exact tool call that was about to run. + +`store.restore(checkpoint_id)` rolls the workspace files back and +returns the saved transcript so the chat can resume from the same +state. The shadow repo never touches the project's `.git/` directory. + +```python +from gitpilot.checkpoints import CheckpointStore, ToolCallDescriptor + +store = CheckpointStore(workspace) +record = store.snapshot( + ToolCallDescriptor(name="write_local_file", target_path="src/app.py"), + transcript=conversation, +) +# …later… +restored = store.restore(record.id) +``` + +`store.prune(keep_last=50)` removes older checkpoints for housekeeping. + +--- + +## 9. Custom rules + +Rule files steer style and process without filling the chat with +boilerplate. Discovery (global β†’ workspace, last wins): + +``` +~/.gitpilot/rules/*.md +~/.gitpilot/rules-/*.md +/.gitpilotrules +/.gitpilotrules- +/.gitpilot/rules/*.md +/.gitpilot/rules-/*.md +``` + +```python +from gitpilot.rules import compose_rules + +markdown, ruleset = compose_rules(workspace_path=ws, mode_slug="coder") +``` + +The returned block is bounded β€” over-budget rules are tail-trimmed so +the freshest instructions stay visible. + +--- + +## 10. Sandboxed tool execution + +A new `gitpilot.sandbox` module introduces pluggable execution +backends. By default GitPilot uses the **subprocess** backend (cwd +jailed to the workspace, secret env vars stripped, blocked-pattern +deny list). For real containerised isolation, point GitPilot at a +[MatrixLab](https://github.com/agent-matrix/matrixlab) runner: + +```bash +export GITPILOT_SANDBOX=matrixlab +export GITPILOT_MATRIXLAB_URL=http://localhost:8000 # default +export GITPILOT_MATRIXLAB_TOKEN= +``` + +```python +from gitpilot.sandbox import get_sandbox, SandboxPolicy + +sb = get_sandbox(policy=SandboxPolicy(workspace=ws, timeout_sec=120)) +result = await sb.run(["pytest", "-q"]) +print(result.stdout, result.exit_code, result.sandbox_id) +``` + +| Backend | Isolation | Setup | +|---|---|---| +| `off` | none (legacy host exec) | always available | +| `subprocess` (default) | cwd jail + env scrub + deny patterns | always available | +| `matrixlab` | ephemeral container, resource caps, no host FS | requires a running MatrixLab runner | + +Selection precedence: explicit argument β†’ `GITPILOT_SANDBOX` env β†’ +`settings.json` `tools.sandbox` β†’ `subprocess`. An unknown backend +falls back to `subprocess` rather than running on the host. + +--- + +## 11. Trusted folders + +GitPilot now records a per-workspace trust decision in +`~/.gitpilot/trusted.json`: + +```python +from gitpilot.trusted_folders import TrustStore, TrustStatus + +store = TrustStore.default() +status = store.status(workspace) +if status is TrustStatus.UNKNOWN: + # Prompt the user, then: + store.trust(workspace, note="onboarded 2026-05") +elif status is TrustStatus.FINGERPRINT_MISMATCH: + # The workspace's structural files changed since we trusted it β€” + # ask the user to re-confirm before proceeding. + ... +``` + +The fingerprint covers a small set of structural files +(`package.json`, `pyproject.toml`, `Cargo.toml`, `Makefile`, +`AGENTS.md`, `.gitpilot/modes.yaml`, …) so wholesale folder swaps +invalidate trust automatically. + +--- + +## Backwards compatibility + +* No existing module was modified β€” every change ships as a new file + under `gitpilot/`. +* All 956 pre-existing tests continue to pass; 79 new tests cover the + new modules (1035 total). +* Default behaviour is unchanged: a session that doesn't load + `AGENTS.md`, doesn't activate a custom mode, and doesn't ask for a + sandbox behaves exactly as before. + +--- + +## Quick adoption checklist + +1. `gitpilot init` β€” drop a starter `AGENTS.md` in the repo. +2. Add `.gitpilot/modes.yaml` with the modes your team uses. +3. Tighten `.gitpilot/mcp.json` β€” turn off tools you don't need; mark + read-only tools `alwaysAllow`. +4. Drop a few `.gitpilot/commands/*.md` for recurring prompts. +5. Set `GITPILOT_SANDBOX=matrixlab` (and point at a running MatrixLab + runner) for production-grade isolation of shell tools. +6. Wire the `ContextBudgetManager.stats()` output into the chat UI to + surface a live token counter. diff --git a/FRONTEND_CODE_REFERENCE.md b/docs/contributing/frontend-code-reference.md similarity index 100% rename from FRONTEND_CODE_REFERENCE.md rename to docs/contributing/frontend-code-reference.md diff --git a/PACKAGING.md b/docs/contributing/packaging.md similarity index 100% rename from PACKAGING.md rename to docs/contributing/packaging.md diff --git a/DEPLOYMENT_DOCKER.md b/docs/deploy/docker.md similarity index 99% rename from DEPLOYMENT_DOCKER.md rename to docs/deploy/docker.md index 5f45ea5..9fdbe97 100644 --- a/DEPLOYMENT_DOCKER.md +++ b/docs/deploy/docker.md @@ -388,7 +388,7 @@ Already implemented for frontend to minimize image size! ## πŸ”— Related Documentation -- [Render Deployment](./DEPLOYMENT_RENDER.md) -- [Vercel Testing](./VERCEL_TESTING.md) +- [Render deployment](./render.md) +- [Vercel testing](./vercel-testing.md) - [Docker Documentation](https://docs.docker.com/) - [Docker Compose Documentation](https://docs.docker.com/compose/) diff --git a/docs/deploy/index.md b/docs/deploy/index.md new file mode 100644 index 0000000..9a3d8cc --- /dev/null +++ b/docs/deploy/index.md @@ -0,0 +1,38 @@ +# Deploying GitPilot + +GitPilot is a standard Python package + a FastAPI server + a static frontend. +Pick the path that matches your environment. + +## Hosted (one-click) + +* **[Render](render.md)** β€” Python + Docker, free tier available. +* **[Vercel](vercel.md)** β€” serverless frontend + API. +* **[Quick deploy](quick.md)** β€” opinionated 60-second deploy. + +## Self-hosted + +* **[Docker](docker.md)** β€” single-host docker-compose stack. +* **[Production](production.md)** β€” production-hardened defaults. +* **[Production with MCP](production-mcp.md)** β€” adds the MCP context-forge stack. +* **[Install MCP](install-mcp.md)** β€” install just the MCP layer separately. + +## Detailed guides + +* **[Render β€” detailed](render-detailed.md)** β€” every knob explained. +* **[Vercel setup](vercel-setup.md)** β€” initial configuration. +* **[Vercel testing](vercel-testing.md)** β€” smoke tests after deploy. + +## Recommended path + +For a brand-new project: + +1. `pip install gitcopilot` β€” try locally. +2. `gitpilot init --wizard` β€” generate the workspace artefacts. +3. Pick the deployment target that matches your team's existing + infrastructure (Docker if self-hosting, Render or Vercel if you + want managed). + +All deployment recipes assume you have set the appropriate provider +API key (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, …) in the platform's +secret store. See **[../API_STABILITY.md](../API_STABILITY.md)** for +the import surface your integration should rely on. diff --git a/INSTALL_MCP.md b/docs/deploy/install-mcp.md similarity index 81% rename from INSTALL_MCP.md rename to docs/deploy/install-mcp.md index 63b5360..2124265 100644 --- a/INSTALL_MCP.md +++ b/docs/deploy/install-mcp.md @@ -1,21 +1,25 @@ # Installing the MCP Context Forge stack -The GitPilot **MCP Context Forge** is an optional second environment +The GitPilot **MCP Context Forge** is a bundled sidecar environment that runs alongside GitPilot core. When it is up, GitPilot's agents can call out to external MCP servers (PostgreSQL schema discovery, Milvus vector search, MCP inspector, plus anything else you attach) **as first-class tools during code generation**, the way Claude Code uses its built-in toolbox. -The whole stack is **opt-in and additive**: +The stack is **installed by default and runtime-additive**: -* `make install` works exactly as before for everyone. On machines with - Docker it now *also* pre-pulls the Forge images. Without Docker, it - prints a friendly skip message and exits 0 β€” the baseline flow is - byte-identical. -* `make run` is unchanged. -* The new `make run-mcp`, `make run-all`, `make sync-mcp` etc. are the - only way to involve Forge. +* `make install` prepares GitPilot core, the frontend, and the MCP stack. + On machines without Docker, MCP preparation prints a friendly skip + message and exits 0 so the baseline app install still succeeds. +* Re-running `make install` is incremental: existing MCP checkouts skip + network fetches unless `MCP_UPDATE=1` is set, and existing Docker images + skip rebuilds unless `MCP_BUILD=1` is set. +* `make run` starts the MCP stack first, verifies the Forge health endpoint + is host-reachable, then starts GitPilot backend/frontend. Use `make run-all` + only when you also want to force-restart an already-running backend. +* No Docker? Use `make run-bare` to start GitPilot without the MCP stack; + the UI will show the gateway as Unreachable, but everything else works. --- @@ -23,7 +27,7 @@ The whole stack is **opt-in and additive**: ```bash make install # backend + frontend + (if Docker) MCP image cache -make run-all # GitPilot + Forge + 3 reference servers +make run # MCP Context Forge + GitPilot backend/frontend # In a browser: Settings β†’ MCP Servers β†’ click "Sync" ``` @@ -54,7 +58,8 @@ the same approach HomePilot uses for its MCP servers stack: clone each upstream repo into `./mcp-stack/` and let Compose build the image from its Dockerfile. Branches / refs are pinned via `.mcp.env` (`MCP_FORGE_REF`, `MCP_POSTGRE_REF`, `MCP_MILVUS_REF`, -`MCP_INSPECTOR_REF`) so each `make install-mcp` is reproducible. +`MCP_INSPECTOR_REF`). Re-run with `MCP_UPDATE=1` when you want to fetch +those pinned refs again. `./mcp-stack/` is git-ignored β€” it's a build-time scratch dir, not part of the repo. @@ -84,10 +89,11 @@ No existing service, route, test or build target is modified. | Target | What it does | Needs Docker? | |--------|--------------|---------------| | `make install` | uv + npm + `install-mcp` (skip-safe) | no | -| `make install-mcp` | Pull Forge images, seed `.mcp.env` if missing | yes (else no-op) | -| `make run` | Start GitPilot core (unchanged) | no | +| `make install-mcp` | Seed `.mcp.env`, clone missing MCP repos, build missing images | yes (else no-op) | +| `make run` | Start MCP stack, verify Forge, then start GitPilot core/frontend | yes for MCP | +| `make run-bare` | Start GitPilot core/frontend WITHOUT the MCP stack | no | | `make run-mcp` | Start Forge + 3 reference servers | yes | -| `make run-all` | `run-mcp` then `run` | yes | +| `make run-all` | Stop stale backend, then `run` | yes | | `make stop-mcp` | Stop the MCP stack (volumes preserved) | yes | | `make logs-mcp` | Tail logs from the MCP stack | yes | | `make sync-mcp` | Trigger `/api/mcp/sync` against running GitPilot | no (curl) | @@ -99,9 +105,10 @@ No existing service, route, test or build target is modified. This is the bit that makes it feel like Claude Code: -1. `make run-mcp` brings up Forge with three pre-registered servers. -2. `make run` starts GitPilot. Its **MCP Servers** tab now shows the - gateway as **Connected** instead of *Unreachable*. +1. `make run` brings up Forge with three pre-registered servers, verifies + `http://localhost:4444/health`, and starts GitPilot. +2. Its **MCP Servers** tab now shows the gateway as **Connected** instead + of *Unreachable*. 3. Click **Sync**. GitPilot calls Forge's registry, mirrors every server into its local store, and shows a banner: `+3 added Β· 0 refreshed Β· 0 orphaned`. @@ -151,7 +158,7 @@ distinction. A custom server you add (real DNS / IP) is left untouched. | Reversible | `uninstall-mcp.sh` cleans containers + volumes + images | | Token never committed | `.mcp.env` auto-added to `.gitignore`; tokens generated locally | | Skip-safe on minimal hosts | `install-mcp.sh` exits 0 when Docker is absent | -| One-command happy path | `make install && make run-all` | +| One-command happy path | `make install && make run` | --- diff --git a/PRODUCTION_MCP.md b/docs/deploy/production-mcp.md similarity index 95% rename from PRODUCTION_MCP.md rename to docs/deploy/production-mcp.md index af25e6d..b608adc 100644 --- a/PRODUCTION_MCP.md +++ b/docs/deploy/production-mcp.md @@ -2,7 +2,7 @@ This document is the operator's guide to running GitPilot with the optional MCP Context Forge stack in production. It complements -[INSTALL_MCP.md](./INSTALL_MCP.md) (which targets developers). +[install-mcp.md](./install-mcp.md) (which targets developers). The stack is **strictly additive**: enabling it never changes how GitPilot core behaves. Disabling it (`GITPILOT_MCP_ENABLED=false`) is a @@ -14,7 +14,7 @@ single env-var flip that returns the system to its baseline shape. ```bash make install # uv + npm + MCP image cache (skip-safe without Docker) -make run-all # GitPilot core + Forge + 3 reference MCP servers +make run # Forge + 3 reference MCP servers + GitPilot core make smoke-mcp # post-deploy health sweep make sync-mcp # mirror Forge's registry into GitPilot's local store ``` @@ -69,10 +69,10 @@ All four MCP services live under the Compose **`mcp` profile** in ```bash git pull make install # idempotent; safe on already-running hosts -make run-all +make run ``` -The first `make run-all` builds four images (3-8 minutes on a warm +The first `make run` builds four images (3-8 minutes on a warm broadband link). Subsequent runs reuse the build cache. ### 2. Verify @@ -123,8 +123,8 @@ Claude Code sees its built-ins. ```bash git pull -make install-mcp # re-clones / fetches upstream repos to the pinned ref -make run-mcp # rebuilds + recreates only what changed +MCP_UPDATE=1 MCP_BUILD=1 make install-mcp # fetch pinned refs and force image rebuild +make run # starts updated MCP stack + GitPilot make smoke-mcp ``` @@ -161,7 +161,7 @@ make uninstall-mcp # prompts y/N; removes containers, volumes, images ## Pinning to release tags (post-publish) Once Docker Hub publish workflows have run (see -[`extensions/mcp_workflows/README.md`](./extensions/mcp_workflows/README.md)) +[`extensions/mcp_workflows/README.md`](../../extensions/mcp_workflows/README.md)) and tags exist for each image, you have two ways to pin to a known good release: diff --git a/PRODUCTION.md b/docs/deploy/production.md similarity index 100% rename from PRODUCTION.md rename to docs/deploy/production.md diff --git a/QUICK_DEPLOY.md b/docs/deploy/quick.md similarity index 97% rename from QUICK_DEPLOY.md rename to docs/deploy/quick.md index dc6d199..228c8ec 100644 --- a/QUICK_DEPLOY.md +++ b/docs/deploy/quick.md @@ -226,8 +226,8 @@ docker push your-username/gitpilot-backend:latest - [Render Docker Deployment](https://render.com/docs/deploy-an-image) - [Vercel Environment Variables](https://vercel.com/docs/environment-variables) - [Docker Hub](https://hub.docker.com/) -- [DEPLOYMENT_DOCKER.md](./DEPLOYMENT_DOCKER.md) - Full Docker guide -- [DEPLOYMENT_RENDER.md](./DEPLOYMENT_RENDER.md) - Render deployment details +- [docker.md](./docker.md) - Full Docker guide +- [render.md](./render.md) - Render deployment details --- diff --git a/RENDER_DEPLOYMENT.md b/docs/deploy/render-detailed.md similarity index 100% rename from RENDER_DEPLOYMENT.md rename to docs/deploy/render-detailed.md diff --git a/DEPLOYMENT_RENDER.md b/docs/deploy/render.md similarity index 97% rename from DEPLOYMENT_RENDER.md rename to docs/deploy/render.md index 9ef9896..c3119aa 100644 --- a/DEPLOYMENT_RENDER.md +++ b/docs/deploy/render.md @@ -45,8 +45,8 @@ This guide explains how to deploy GitPilot with: 3. **Configure**: - **Name**: `gitpilot-backend` - **Environment**: `Python 3` - - **Build Command**: `pip install uv && uv sync --all-extras` - - **Start Command**: `uv run gitpilot serve --host 0.0.0.0 --port $PORT` + - **Build Command**: `pip install uv && uv sync --no-dev` + - **Start Command**: `uv run --no-dev gitpilot serve --host 0.0.0.0 --port $PORT` - **Health Check Path**: `/api/health` 4. **Set environment variables** (same as above) 5. **Deploy** diff --git a/VERCEL_SETUP.md b/docs/deploy/vercel-setup.md similarity index 100% rename from VERCEL_SETUP.md rename to docs/deploy/vercel-setup.md diff --git a/VERCEL_TESTING.md b/docs/deploy/vercel-testing.md similarity index 100% rename from VERCEL_TESTING.md rename to docs/deploy/vercel-testing.md diff --git a/VERCEL_DEPLOYMENT.md b/docs/deploy/vercel.md similarity index 100% rename from VERCEL_DEPLOYMENT.md rename to docs/deploy/vercel.md diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..f52ce58 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,45 @@ +# GitPilot documentation + +**Open-source multi-agent AI coding assistant.** +Plan, code, test, and ship β€” with you in the loop. + +## Get started β€” three commands + +```bash +pip install gitcopilot +GITPILOT_FLAGS="init_wizard=1" gitpilot init --wizard +gitpilot serve +``` + +Open [http://localhost:8000](http://localhost:8000). + +## Sections + +* **[Quickstart](quickstart.md)** β€” install, configure a model, run the + first chat. +* **[API stability contract](API_STABILITY.md)** β€” what + `gitpilot.public_api` promises, deprecation policy, SemVer mapping. +* **[Deploy](deploy/)** β€” Docker, Render, Vercel, MCP stack, production. +* **[Contributing](contributing/packaging.md)** β€” packaging, frontend + reference, hacking on GitPilot itself. +* **Phase history** β€” [Phase 1](PHASE1.md), [Phase 2](PHASE2.md), + [Phase 3-G](PHASE3_G.md). +* **[Upgrade catalogue](UPGRADES.md)** β€” every feature introduced via + the Phase plan. + +## Why GitPilot? + +* **Four agents, not one.** Explorer reads, Planner drafts, Coder + writes, Reviewer audits. You see every step. +* **Any LLM.** Anthropic, OpenAI, watsonx, Ollama. Switch in + settings, no code change. +* **Safe by default.** Sandboxed shell, file-regex edit guards, + atomic checkpoints, trusted-folder gate. +* **Daily-driver speed.** Prompt cache, lazy tool defs, context-pack + LRU, SSE streaming, model warmup β€” every one flag-gated. +* **Stable contract.** Build on `gitpilot.public_api` and stay + unbroken through major bumps. + +## License + +Apache 2.0. diff --git a/PATCH_NOTES.md b/docs/patch-notes.md similarity index 100% rename from PATCH_NOTES.md rename to docs/patch-notes.md diff --git a/QUICKSTART.md b/docs/quickstart.md similarity index 100% rename from QUICKSTART.md rename to docs/quickstart.md diff --git a/extensions/vscode/package-lock.json b/extensions/vscode/package-lock.json index d6d8096..17a3d3c 100644 --- a/extensions/vscode/package-lock.json +++ b/extensions/vscode/package-lock.json @@ -1,13 +1,13 @@ { "name": "gitpilot-vscode", - "version": "0.1.7", + "version": "0.2.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "gitpilot-vscode", - "version": "0.1.7", - "license": "MIT", + "version": "0.2.6", + "license": "Apache-2.0", "devDependencies": { "@types/node": "^20.19.39", "@types/vscode": "^1.110.0", diff --git a/extensions/vscode/package.json b/extensions/vscode/package.json index 6772ff8..ab9a14e 100644 --- a/extensions/vscode/package.json +++ b/extensions/vscode/package.json @@ -233,6 +233,36 @@ "category": "GitPilot", "icon": "$(settings-gear)" }, + { + "command": "gitpilot.runDoctor", + "title": "Run Doctor (environment check)", + "category": "GitPilot", + "icon": "$(check-all)" + }, + { + "command": "gitpilot.runInitWizard", + "title": "Run First-Run Wizard", + "category": "GitPilot", + "icon": "$(rocket)" + }, + { + "command": "gitpilot.openApiStability", + "title": "Open API Stability Contract", + "category": "GitPilot", + "icon": "$(book)" + }, + { + "command": "gitpilot.openPhaseRunbooks", + "title": "Open Phase Runbook…", + "category": "GitPilot", + "icon": "$(notebook)" + }, + { + "command": "gitpilot.showFeatureFlags", + "title": "Copy Feature Flag…", + "category": "GitPilot", + "icon": "$(beaker)" + }, { "command": "gitpilot.ollaBridgePair", "title": "Pair with OllaBridge Cloud", diff --git a/extensions/vscode/src/commands/phase4Commands.ts b/extensions/vscode/src/commands/phase4Commands.ts new file mode 100644 index 0000000..35f7cc5 --- /dev/null +++ b/extensions/vscode/src/commands/phase4Commands.ts @@ -0,0 +1,221 @@ +/** + * GitPilot β€” VS Code commands that surface the new backend features + * shipped in Phases 1–4 of the upgrade plan. + * + * Every command in this module is **additive**. None of them rewrite + * existing UI flows; they run the new CLI subcommands in an integrated + * terminal (so the user can see real output and abort with Ctrl-C), + * or open the new documentation in VS Code's preview pane. When the + * underlying CLI feature is gated behind a feature flag, the command + * sets `GITPILOT_FLAGS` on the spawned terminal so the user does not + * have to remember the env var. + * + * Commands added (each contributes one entry under `contributes.commands` + * in package.json): + * + * - gitpilot.runDoctor β€” `gitpilot doctor` health check + * - gitpilot.runInitWizard β€” `gitpilot init --wizard` + * - gitpilot.openApiStability β€” opens docs/API_STABILITY.md + * - gitpilot.openPhaseRunbooks β€” quick-pick over docs/PHASE*.md + * - gitpilot.showFeatureFlags β€” quick-pick of toggleable flags + * + * The commands themselves are intentionally thin: they orchestrate VS + * Code APIs (terminals, quick picks, document previews) against the + * already-shipped CLI. No new backend endpoints are required. + */ + +import * as vscode from "vscode"; +import * as path from "path"; + +// --------------------------------------------------------------------------- +// Feature flags catalogued from the backend (Phase 1–3 batches). +// Keeping the list in TypeScript lets the quick-pick stay snappy without +// a round-trip to the backend; mismatches with the backend would surface +// as "flag has no effect" rather than an error. +// --------------------------------------------------------------------------- + +const KNOWN_FLAGS: ReadonlyArray<{ name: string; description: string }> = [ + { name: "error_envelope", description: "Structured error responses (Phase 1-D)" }, + { name: "prompt_cache", description: "Anthropic prompt-cache markers (Phase 2-A)" }, + { name: "lazy_tool_defs", description: "Mode-policy-driven MCP tool pruning (Phase 2-B)" }, + { name: "context_cache", description: "LRU memoisation of context packs (Phase 2-C)" }, + { name: "stream_v2", description: "End-to-end SSE streaming route (Phase 2-D)" }, + { name: "ui_stream_v2", description: "UI consumer for stream_v2 (Phase 2-D)" }, + { name: "model_warmup", description: "1-token startup ping (Phase 2-E)" }, + { name: "init_wizard", description: "Interactive first-run wizard (Phase 3-G)" }, +]; + + +// --------------------------------------------------------------------------- +// Public registration entry point. Call once from extension.ts. +// --------------------------------------------------------------------------- + +export function registerPhase4Commands(context: vscode.ExtensionContext): void { + context.subscriptions.push( + vscode.commands.registerCommand("gitpilot.runDoctor", runDoctor), + vscode.commands.registerCommand("gitpilot.runInitWizard", runInitWizard), + vscode.commands.registerCommand("gitpilot.openApiStability", openApiStability), + vscode.commands.registerCommand("gitpilot.openPhaseRunbooks", openPhaseRunbooks), + vscode.commands.registerCommand("gitpilot.showFeatureFlags", showFeatureFlags), + ); +} + + +// --------------------------------------------------------------------------- +// gitpilot.runDoctor +// --------------------------------------------------------------------------- + +async function runDoctor(): Promise { + const terminal = ensureTerminal("GitPilot Β· Doctor"); + terminal.show(true); + // ``--offline`` keeps the check ≀ 100 ms when there is no network. + terminal.sendText("gitpilot doctor --offline"); +} + + +// --------------------------------------------------------------------------- +// gitpilot.runInitWizard +// --------------------------------------------------------------------------- + +async function runInitWizard(): Promise { + const folder = currentFolderOrWarn(); + if (!folder) { + return; + } + + // The wizard refuses to run unless its flag is on. Setting the env + // var on the terminal removes one source of confusion for first-time + // users while keeping the flag globally off. + const env = { GITPILOT_FLAGS: "init_wizard=1" }; + const terminal = ensureTerminal("GitPilot Β· Init wizard", env); + terminal.show(true); + terminal.sendText(`gitpilot init --wizard ${quoteArg(folder)}`); +} + + +// --------------------------------------------------------------------------- +// gitpilot.openApiStability / gitpilot.openPhaseRunbooks +// --------------------------------------------------------------------------- + +async function openApiStability(): Promise { + await openDocFromRepo("docs/API_STABILITY.md"); +} + + +async function openPhaseRunbooks(): Promise { + const options: Array<{ label: string; doc: string }> = [ + { label: "Phase 1 β€” Foundations", doc: "docs/PHASE1.md" }, + { label: "Phase 2 β€” Performance", doc: "docs/PHASE2.md" }, + { label: "Phase 3-G β€” First-run wizard", doc: "docs/PHASE3_G.md" }, + { label: "Phase 4 β€” Quality safety net", doc: "docs/PHASE4.md" }, + { label: "Upgrade catalogue (all phases)", doc: "docs/UPGRADES.md" }, + { label: "Public API contract", doc: "docs/API_STABILITY.md" }, + ]; + const pick = await vscode.window.showQuickPick( + options.map((o) => ({ label: o.label, description: o.doc })), + { placeHolder: "Open a GitPilot phase runbook" }, + ); + if (pick) { + await openDocFromRepo(pick.description as string); + } +} + + +// --------------------------------------------------------------------------- +// gitpilot.showFeatureFlags +// --------------------------------------------------------------------------- + +async function showFeatureFlags(): Promise { + const items = KNOWN_FLAGS.map((f) => ({ + label: f.name, + description: f.description, + })); + const pick = await vscode.window.showQuickPick(items, { + placeHolder: "Pick a flag to copy a sample GITPILOT_FLAGS env var to your clipboard", + }); + if (!pick) { + return; + } + const value = `${pick.label}=1`; + await vscode.env.clipboard.writeText(`GITPILOT_FLAGS="${value}"`); + vscode.window.showInformationMessage( + `Copied to clipboard: GITPILOT_FLAGS="${value}". Restart \`gitpilot serve\` for it to apply.`, + ); +} + + +// --------------------------------------------------------------------------- +// Helpers β€” kept private to this module +// --------------------------------------------------------------------------- + +function ensureTerminal( + name: string, + env?: Record, +): vscode.Terminal { + const existing = vscode.window.terminals.find((t) => t.name === name); + if (existing) { + return existing; + } + return vscode.window.createTerminal({ name, env }); +} + + +function currentFolderOrWarn(): string | undefined { + const folders = vscode.workspace.workspaceFolders; + if (!folders || folders.length === 0) { + vscode.window.showWarningMessage( + "Open a folder in VS Code before running the GitPilot wizard.", + ); + return undefined; + } + return folders[0]!.uri.fsPath; +} + + +function quoteArg(arg: string): string { + // Cheap shell-escape β€” wraps the path in double quotes and escapes + // any embedded double quote. Sufficient for VS Code workspace + // paths on Linux / macOS / Windows. + return `"${arg.replace(/"/g, '\\"')}"`; +} + + +async function openDocFromRepo(relativePath: string): Promise { + const repoRoot = await findRepoRoot(); + if (!repoRoot) { + vscode.window.showWarningMessage( + "GitPilot docs are not part of the open workspace. " + + "Clone https://github.com/ruslanmv/gitpilot to access the runbooks.", + ); + return; + } + const docPath = path.join(repoRoot, relativePath); + const uri = vscode.Uri.file(docPath); + try { + await vscode.commands.executeCommand("markdown.showPreview", uri); + } catch { + // Fallback when the markdown preview extension is not available. + await vscode.window.showTextDocument(uri, { preview: true }); + } +} + + +async function findRepoRoot(): Promise { + const folders = vscode.workspace.workspaceFolders; + if (!folders) { + return undefined; + } + for (const folder of folders) { + const candidate = folder.uri.fsPath; + // Look for `docs/UPGRADES.md` as the canonical marker that we're + // inside a GitPilot clone (the file ships from Phase 1 onward). + try { + const probe = vscode.Uri.file(path.join(candidate, "docs", "UPGRADES.md")); + await vscode.workspace.fs.stat(probe); + return candidate; + } catch { + // not this folder + } + } + return undefined; +} diff --git a/extensions/vscode/src/extension.ts b/extensions/vscode/src/extension.ts index 0ce2c00..b9911c1 100644 --- a/extensions/vscode/src/extension.ts +++ b/extensions/vscode/src/extension.ts @@ -49,6 +49,7 @@ import { registerSetupCommands } from "./commands/setupCommands"; import { registerProviderCommands } from "./commands/providerCommands"; import { registerSessionCommands } from "./commands/sessionCommands"; import { registerChatCommandsV2 } from "./commands/chatCommands"; +import { registerPhase4Commands } from "./commands/phase4Commands"; import { StateStore } from "./core/stateStore"; import { GitPilotEvents } from "./core/events"; @@ -1849,6 +1850,8 @@ export function activate(context: vscode.ExtensionContext): void { registerProviderCommands(context, stateStore, settingsClient); registerSessionCommands(context, stateStore, sessionCoordinator); registerChatCommandsV2(context, stateStore, chatClientV2); + // Phase 1–4 backend feature commands (doctor, wizard, runbooks, flags). + registerPhase4Commands(context); registerCommand("gitpilot.showAgentFlow", () => { AgentFlowPanel.show(client, context.extensionUri); diff --git a/frontend/App.jsx b/frontend/App.jsx index d3b491a..693b5d2 100644 --- a/frontend/App.jsx +++ b/frontend/App.jsx @@ -561,6 +561,57 @@ export default function App() { }) => { if (!repoKey || !branch) return; + // Clear the session-keyed chat cache's ``plan`` AND append the + // completion message synchronously, before any branch change can + // trigger ChatPanel's session-sync effect. Two bugs need to be + // fixed in the same write: + // + // 1. Stale plan: without clearing, the sync effect re-reads the + // old approved plan and restores the Approve & execute / Reject + // plan buttons, enabling accidental double-execution. + // + // 2. Wiped completion: in hard-switch mode the sync effect runs + // BEFORE the persistence effect (declared earlier in + // ChatPanel), so it overwrites local ``messages`` with + // ``sessionChatState.messages`` β€” which doesn't yet contain + // completionMsg. The user's "Answer / Execution Log" block + // then vanishes from the session view. + // + // By appending normalizedCompletion here, sessionChatState already + // carries the completion when the sync effect reads it. No + // duplicate is introduced: local ``messages`` already has the same + // entry, so the subsequent persistence pass is a no-op write. + if (activeSessionId) { + const normalizedCompletion = + completionMsg && + (completionMsg.answer || completionMsg.content || completionMsg.executionLog) + ? { + from: completionMsg.from || "ai", + role: completionMsg.role || "assistant", + answer: completionMsg.answer, + content: completionMsg.content, + executionLog: completionMsg.executionLog, + diff: completionMsg.diff, + } + : null; + setChatBySession((prev) => { + const existing = prev[activeSessionId]; + if (!existing) return prev; + const noPlanChange = existing.plan == null; + if (noPlanChange && !normalizedCompletion) return prev; + return { + ...prev, + [activeSessionId]: { + ...existing, + messages: normalizedCompletion + ? [...(existing.messages || []), normalizedCompletion] + : existing.messages, + plan: null, + }, + }; + }); + } + setRepoStateByKey((prev) => { const cur = prev[repoKey] || { diff --git a/frontend/components/AdminTabs/mcp/GatewayHeader.jsx b/frontend/components/AdminTabs/mcp/GatewayHeader.jsx index ee9590d..902dcfe 100644 --- a/frontend/components/AdminTabs/mcp/GatewayHeader.jsx +++ b/frontend/components/AdminTabs/mcp/GatewayHeader.jsx @@ -109,7 +109,7 @@ export default function GatewayHeader({ title={ reachable ? "Pull the server registry from MCP Context Forge" - : "Gateway unreachable β€” start MCP Context Forge first (make run-mcp)" + : "Gateway unreachable β€” start MCP Context Forge first (make run)" } style={{ padding: "6px 12px", diff --git a/frontend/components/AssistantMessage.jsx b/frontend/components/AssistantMessage.jsx index cb24b5c..9ec8c00 100644 --- a/frontend/components/AssistantMessage.jsx +++ b/frontend/components/AssistantMessage.jsx @@ -1,7 +1,13 @@ import React from "react"; import PlanView from "./PlanView.jsx"; -export default function AssistantMessage({ answer, plan, executionLog }) { +export default function AssistantMessage({ answer, plan, executionLog, planStatus }) { + // ``planStatus`` is optional metadata about the lifecycle of the plan + // attached to this message: "executed" | "rejected" | null. It drives + // the badge next to the Action Plan header so the user can tell at a + // glance, in chat history, whether a previous plan was approved or + // dismissed. Defaults to null (no badge) to keep the legacy render + // path untouched. const styles = { container: { marginBottom: "20px", @@ -89,8 +95,48 @@ export default function AssistantMessage({ answer, plan, executionLog }) { {/* Action Plan section β€” only when there are file changes */} {plan && hasFileActions && (
-
-

Action Plan

+
+

Action Plan

+ {planStatus === "executed" && ( + + βœ“ Executed + + )} + {planStatus === "rejected" && ( + + βœ• Rejected + + )}
diff --git a/frontend/components/ChatPanel.jsx b/frontend/components/ChatPanel.jsx index 60b889a..c66d274 100644 --- a/frontend/components/ChatPanel.jsx +++ b/frontend/components/ChatPanel.jsx @@ -1,6 +1,8 @@ // frontend/components/ChatPanel.jsx import React, { useEffect, useRef, useState } from "react"; import AssistantMessage from "./AssistantMessage.jsx"; +import ThinkingIndicator from "./ThinkingIndicator.jsx"; +import ContextMeter from "./ContextMeter.jsx"; import DiffStats from "./DiffStats.jsx"; import DiffViewer from "./DiffViewer.jsx"; import CreatePRButton from "./CreatePRButton.jsx"; @@ -109,7 +111,12 @@ export default function ChatPanel({ setLoadingPlan(false); // Consolidate streaming events into a chat message (use ref to - // avoid stale closure β€” streamingEvents state would be stale here) + // avoid stale closure β€” streamingEvents state would be stale here). + // + // We also commit the FINAL consolidated text to the backend session + // here. Previously this branch never called persistMessage, so the + // assistant turn looked correct in the live view but vanished on the + // next session reload β€” the canonical "streaming truncation" symptom. const events = streamingEventsRef.current; if (events.length > 0) { const textParts = events @@ -123,6 +130,7 @@ export default function ChatPanel({ content: textParts.join(""), }; setMessages((prev) => [...prev, consolidated]); + persistMessage(sessionId, "assistant", consolidated.content); } setStreamingEvents([]); } @@ -211,17 +219,45 @@ export default function ChatPanel({ // HANDLERS // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- - // Persist a message to the backend session (fire-and-forget) + // Persist a message to the backend session (fire-and-forget). + // + // The fourth argument carries the *structured* payload of the assistant + // response β€” the Action Plan, the Execution Log, diff stats, etc. The + // backend stores it on Message.metadata; on session reload App.jsx + // spreads metadata back into the local message via normalizeBackendMessage, + // so the same AssistantMessage renderer can re-draw the Plan / Steps / + // Create buttons identically to the live view. + // + // Before this fix the structured payload was dropped at persist time β€” + // the session reloaded as raw text, and the UI degraded to a plain + // paragraph. This is the canonical "state loss during hydration" bug. // --------------------------------------------------------------------------- - const persistMessage = (sid, role, content) => { + const persistMessage = (sid, role, content, metadata = null) => { if (!sid) return; + const body = { role, content }; + if (metadata && typeof metadata === "object" && Object.keys(metadata).length > 0) { + body.metadata = metadata; + } fetch(`/api/sessions/${sid}/message`, { method: "POST", headers: getHeaders(), - body: JSON.stringify({ role, content }), + body: JSON.stringify(body), }).catch(() => {}); // best-effort }; + // Pick the structured fields a message can carry across a reload. + // Keep this in one place so every call-site stores the same shape and + // the renderer never has to guess. + const pickAssistantMetadata = (m) => { + if (!m || typeof m !== "object") return null; + const meta = {}; + if (m.plan) meta.plan = m.plan; + if (m.executionLog) meta.executionLog = m.executionLog; + if (m.diff) meta.diff = m.diff; + if (m.actions) meta.actions = m.actions; + return Object.keys(meta).length > 0 ? meta : null; + }; + const send = async () => { if (!repo || !goal.trim()) return; @@ -313,27 +349,61 @@ export default function ChatPanel({ throw new Error(detail || "Failed to generate plan"); } - setPlan(data); + // Guard: a plan with no executable file actions is not a plan we + // can approve. This happens when the planner/explorer agents + // refused (tool-loop hallucination or a real safety refusal) and + // CrewAI returned a schema-valid but empty payload. Without + // this guard the Approve & execute / Reject plan buttons would + // render against a payload that can't actually be executed. + const planSteps = Array.isArray(data?.steps) + ? data.steps + : Array.isArray(data?.plan?.steps) + ? data.plan.steps + : []; + const hasExecutableFiles = planSteps.some( + (s) => + Array.isArray(s?.files) && + s.files.some((f) => ["CREATE", "MODIFY", "DELETE"].includes(f?.action)), + ); // Extract summary from nested plan structure or top-level const summary = data.plan?.summary || data.summary || data.message || "Here is the proposed plan for your request."; - // Assistant response (Answer + Action Plan) - setMessages((prev) => [ - ...prev, - { + if (hasExecutableFiles) { + setPlan(data); + const assistantMsg = { from: "ai", role: "assistant", answer: summary, content: summary, plan: data, - }, - ]); - - // Persist assistant response to backend session - persistMessage(sid, "assistant", summary); + }; + setMessages((prev) => [...prev, assistantMsg]); + persistMessage(sid, "assistant", summary, pickAssistantMetadata(assistantMsg)); + } else { + // No executable steps β€” surface a clear failure to the user + // instead of half-rendering a plan card and dangling buttons. + // The most common cause is the explorer/planner agent loop + // (CrewAI same-input limiter blocks repeat tool calls, the + // agent panics and "refuses"). Encourage a retry rather than + // letting the user click Approve on nothing. + setPlan(null); + const failureText = + "I couldn't produce a plan for that request. The agent may have " + + "got stuck reading the same file twice. Try rephrasing, or " + + "switch to a stronger model in Settings β†’ Provider."; + const failureMsg = { + from: "ai", + role: "system", + content: failureText, + }; + setMessages((prev) => [...prev, failureMsg]); + persistMessage(sid, "system", failureText); + setStatus("No executable plan produced."); + return; + } } catch (err) { const msg = String(err?.message || err); console.error(err); @@ -347,6 +417,36 @@ export default function ChatPanel({ } }; + // --------------------------------------------------------------------------- + // Reject the active plan β€” minimal first cut. + // + // Industry rule we follow from the start: never write to disk on a path the + // user did not approve. Rejecting is the cheapest expression of that β€” + // discard the proposed plan locally, leave the workspace untouched, record + // the rejection in chat history so the user sees it after a session reload. + // + // No backend endpoint is needed yet because plans are not persisted as + // first-class objects today; they ride along on the assistant message's + // metadata. When we later add per-plan state tracking, this handler will + // also POST /api/chat/plan/{id}/reject β€” leaving that for a follow-up. + // --------------------------------------------------------------------------- + const rejectPlan = () => { + if (!plan || executing) return; + setPlan(null); + setStatus("Plan rejected. No files were changed."); + + const rejectionMsg = { + from: "ai", + role: "system", + content: "Plan rejected. No files were changed.", + }; + setMessages((prev) => [...prev, rejectionMsg]); + + if (sessionId) { + persistMessage(sessionId, "system", rejectionMsg.content); + } + }; + const execute = async () => { if (!repo || !plan) return; @@ -385,11 +485,23 @@ export default function ChatPanel({ answer: data.message || "Execution completed.", content: data.message || "Execution completed.", executionLog: data.executionLog, + diff: data.diff, }; // Show completion immediately (keeps old "Execution Log" section) setMessages((prev) => [...prev, completionMsg]); + // Persist the execution log + diff alongside the message text so + // the History view re-renders the green "Execution Log" panel and + // the "View diff" affordance. Without this, reloading the session + // shows just the one-line "Execution completed." summary. + persistMessage( + sessionId, + "assistant", + completionMsg.content, + pickAssistantMetadata(completionMsg), + ); + // Clear active plan UI setPlan(null); @@ -571,13 +683,39 @@ export default function ChatPanel({ ); } - // Assistant message (Answer / Plan / Execution Log) + // Assistant message (Answer / Plan / Execution Log). + // + // Lifecycle audit signal: if this message carries a plan, look + // ahead in the timeline for any subsequent message that + // records an execution log (=> the plan was approved+executed) + // or a system "Plan rejected" entry (=> the plan was + // rejected). The status is rendered as a small green/grey + // badge next to the Action Plan header so users can tell at a + // glance β€” in history β€” whether a previous plan was acted on. + let planStatus = null; + if (m.plan) { + const after = messages.slice(idx + 1); + if (after.some((later) => later.executionLog)) { + planStatus = "executed"; + } else if ( + after.some( + (later) => + later.role === "system" && + typeof later.content === "string" && + later.content.includes("Plan rejected"), + ) + ) { + planStatus = "rejected"; + } + } + return (
{/* Diff stats indicator (Claude-Code-on-Web parity) */} {m.diff && ( @@ -597,10 +735,34 @@ export default function ChatPanel({
)} + {/* Enterprise Pulse β€” agentic thinking state shown after the user + hits Send and before the first streamed/planned chunk arrives. + Falls back gracefully to nothing once streamingEvents start + flowing in (StreamingMessage takes over the live feedback). */} {loadingPlan && streamingEvents.length === 0 && ( -
- Thinking... -
+ + )} + + {/* Live execution status β€” visible in the chat timeline while + ``executing`` is true, sits between the Action Plan card and + where the Execution Log (green panel in AssistantMessage) + will land once the backend returns. Removes the "did the + app freeze?" feeling caused by only the bottom button + saying "Executing…". + + Reuses the ThinkingIndicator with execution-specific labels. + When the executor finishes, ``setExecuting(false)`` removes + this bubble and the completionMsg lands in the timeline as + a normal assistant message with its green Execution Log + block β€” already rendered by AssistantMessage today. */} + {executing && ( + )} {!messages.length && !plan && !loadingPlan && streamingEvents.length === 0 && ( @@ -699,14 +861,44 @@ export default function ChatPanel({ {loadingPlan ? "Planning..." : wsConnected ? "Send" : "Generate plan"} - + {/* Approve & execute β€” visible only while a plan is awaiting + approval, or while an execution is already in flight (so + the user sees the "Executing…" label, not a missing + button). Previously this was always rendered with + ``disabled={!plan}``, which meant after a successful + execute() the button stayed on screen as a dimmed ghost + and a second click could trigger a duplicate run β€” + causing the executor to re-write the same file with the + same content (~50 s of wasted LLM time per accidental + click). Hiding the button entirely once ``plan`` is + null makes the bug impossible. */} + {(plan || executing) && ( + + )} + + {/* Reject plan β€” same visibility window as Approve. */} + {plan && !executing && !loadingPlan && ( + + )} {/* Create PR button (Claude-Code-on-Web parity) */} {isOnSessionBranch && ( @@ -720,17 +912,20 @@ export default function ChatPanel({ )}
- {/* WebSocket connection indicator */} - {sessionId && ( -
- - - {wsConnected ? "Live" : "Connecting..."} - -
- )} + {/* WebSocket connection indicator + context-window meter */} +
+ + {sessionId && ( + + + {wsConnected ? "Live" : "Connecting..."} + + )} + + +
{/* Diff Viewer overlay */} diff --git a/frontend/components/ContextMeter.jsx b/frontend/components/ContextMeter.jsx new file mode 100644 index 0000000..acd60ea --- /dev/null +++ b/frontend/components/ContextMeter.jsx @@ -0,0 +1,410 @@ +// frontend/components/ContextMeter.jsx +// +// Small bottom-right control that shows the active LLM's context-window +// utilisation. Collapsed: a single β“˜ icon (no number β€” keeps the UI +// quiet during normal use). Expanded: a compact popover with the +// breakdown, topology line, and a manual refresh button. +// +// Refresh model: lazy β€” fetched only when the popover opens, plus the +// explicit ↻ button. Zero idle traffic. +// +// Token-count estimate flag: when the backend reports is_estimate=true +// (Ollama / OllaBridge β€” no real tokenizer available) every number is +// prefixed with β‰ˆ so the imprecision is visible. +// +// Colours: GitPilot orange #D95C3D for β‰₯60% (warning), red #B91C1C for +// β‰₯85% (saturated). No new dependencies; inline styles + a scoped +// + + + + {open && ( +
+

Context window

+ + {loading && !data && ( +
Loading…
+ )} + {error && error !== "disabled" && ( +
+ Couldn't load: {error} +
+ )} + + {data && ( + <> +
+ Provider + {data.provider} + Model + {data.model || "β€”"} + Topology + {data.topology} +
+ +
+ + + {prefix} + {fmt(data.used)} / {fmt(data.context_window)}{" "} + ({percent.toFixed(1)}%) + +
+ + + + + + + +
+ + + + {percent >= 85 && ( +
= 95 ? "1" : "0"}> + Context near saturation. Consider: +
    +
  • Resetting the conversation
  • +
  • Switching to a larger-context model
  • +
  • Reducing repository scope
  • +
+
+ )} + +
+ {estimate ? "Token counts are estimated" : "Token counts via tiktoken"} + +
+ + )} +
+ )} + + ); +} diff --git a/frontend/components/ThinkingIndicator.jsx b/frontend/components/ThinkingIndicator.jsx new file mode 100644 index 0000000..92b6e3d --- /dev/null +++ b/frontend/components/ThinkingIndicator.jsx @@ -0,0 +1,151 @@ +// frontend/components/ThinkingIndicator.jsx +// +// Compact, enterprise-grade thinking state. Sits inline in the chat +// timeline as a small assistant-style bubble: +// +// ● Reading repository... Β· Β· Β· +// +// Design goals (from the bug report): +// * Calm, precise, technical β€” no large card, no big glow, no +// all-caps "THINKING" label. +// * Sits inline next to other chat messages; ~36 px tall, auto width. +// * Tiny pulsing brand-orange dot as the only accent (no rings, +// no progress sweep, no nested animated panels). +// * Muted text, sentence case, task-specific labels that rotate +// ("Reading repository", "Building plan", "Checking context", +// "Preparing response"). +// * Three tiny fading dots on the right as a generic "still working" +// signal. +// +// Implementation constraints (this codebase, not the proposal's): +// * No Tailwind β€” uses plain inline-style objects. +// * No framer-motion β€” uses CSS @keyframes in one scoped +
+ ); +} diff --git a/frontend/styles.css b/frontend/styles.css index 0fd9111..03c11e9 100644 --- a/frontend/styles.css +++ b/frontend/styles.css @@ -1333,6 +1333,19 @@ body { border: 1px solid #272832; } +/* Compact thinking bubble β€” defensive isolation so the global + .chat-message-ai span rule (which gives every span a chunky + 10Γ—14 padded pill with a dark background) cannot leak into the + thinking indicator's tiny inline-styled dots and label. */ +.gitpilot-thinking-indicator, +.gitpilot-thinking-indicator span { + background: transparent; + border: none; + padding: 0; + max-width: none; + line-height: 1.4; +} + .chat-empty-state { display: flex; flex-direction: column; diff --git a/gitpilot/_deprecation.py b/gitpilot/_deprecation.py new file mode 100644 index 0000000..c21b0d3 --- /dev/null +++ b/gitpilot/_deprecation.py @@ -0,0 +1,119 @@ +# gitpilot/_deprecation.py +"""Deprecation helpers used by :mod:`gitpilot.public_api` β€” Batch P4-C. + +This module is intentionally internal (leading underscore) and tiny. +It provides one decorator and one alias factory so that every +deprecated symbol on the stable surface behaves the same way: + +* a single :class:`DeprecationWarning` is emitted at the first call + through that symbol (per process, to avoid log spam) +* the warning text follows a fixed template: + ``" is deprecated; use instead (will be removed in vX.Y)"`` +* original behaviour is preserved β€” no breaking change to callers + +Use it from the public-API package like this:: + + from gitpilot._deprecation import deprecated_alias + + parse_mentions = deprecated_alias( + "parse_mentions", expand_mentions, + replacement="gitpilot.public_api.expand_mentions", + removed_in="2.0", + ) + +The corresponding entry in :doc:`API_STABILITY.md` documents the +removal milestone. +""" +from __future__ import annotations + +import functools +import threading +import warnings +from typing import Any, Callable, TypeVar + +F = TypeVar("F", bound=Callable[..., Any]) + + +_WARNED: set[str] = set() +_LOCK = threading.RLock() + + +def _emit_once(key: str, message: str, stacklevel: int = 3) -> None: + """Emit ``DeprecationWarning(message)`` at most once per key.""" + with _LOCK: + if key in _WARNED: + return + _WARNED.add(key) + warnings.warn(message, DeprecationWarning, stacklevel=stacklevel) + + +def deprecated( + *, + replacement: str, + removed_in: str, + legacy_name: str | None = None, +) -> Callable[[F], F]: + """Decorator: emit a :class:`DeprecationWarning` on first call. + + Parameters + ---------- + replacement + Dotted path the caller should use instead, e.g. + ``"gitpilot.public_api.run_wizard"``. + removed_in + Version that will drop the symbol, e.g. ``"2.0"``. Surfaces in + the warning text so users can plan the migration. + legacy_name + Override for the symbol's display name; defaults to the + wrapped function's ``__qualname__``. + """ + + def _wrap(fn: F) -> F: + name = legacy_name or fn.__qualname__ + + @functools.wraps(fn) + def _wrapper(*args: Any, **kwargs: Any) -> Any: + _emit_once( + key=f"call:{name}", + message=( + f"{name} is deprecated; use {replacement} instead " + f"(will be removed in v{removed_in})" + ), + ) + return fn(*args, **kwargs) + + # Surface the deprecation metadata for tooling / docs generation. + _wrapper.__gitpilot_deprecated__ = { # type: ignore[attr-defined] + "legacy_name": name, + "replacement": replacement, + "removed_in": removed_in, + } + return _wrapper # type: ignore[return-value] + + return _wrap + + +def deprecated_alias( + legacy_name: str, + target: F, + *, + replacement: str, + removed_in: str, +) -> F: + """Build a deprecated alias that delegates to ``target``. + + Use this when you keep two names for the same callable for + backwards compatibility β€” the alias warns on use; the canonical + name does not. + """ + return deprecated( + replacement=replacement, + removed_in=removed_in, + legacy_name=legacy_name, + )(target) + + +def reset_deprecation_log_for_tests() -> None: + """Forget every emit-once key. Test-only.""" + with _LOCK: + _WARNED.clear() diff --git a/gitpilot/agent_executor.py b/gitpilot/agent_executor.py index b4a5d20..3998968 100644 --- a/gitpilot/agent_executor.py +++ b/gitpilot/agent_executor.py @@ -342,3 +342,75 @@ def _parse_test_counts(output: str) -> tuple[int, int, int]: passed = output.count("\nok") return passed, failed, skipped + + + # --------------------------------------------------------------------- + # Batch P2-D β€” additive streaming co-method. + # + # Adapts the legacy ``execute(...)`` to the :mod:`gitpilot.streaming` + # adapter contract. Yields :class:`StreamEvent` objects so the SSE + # route can flush each as it arrives. Behaviour falls back to a + # single ``assistant_chunk`` when the underlying executor has nothing + # to stream (e.g. folder-only sessions). No legacy method is + # modified. + # --------------------------------------------------------------------- + async def run_streaming(self, payload): + """Yield ``StreamEvent`` instances for the request *payload*. + + Recognised keys (every key is optional; sensible defaults apply): + + * ``user_message`` (str) β€” the user's request + * ``repo_full_name`` (str) β€” ``owner/repo`` for GitHub sessions + * ``branch`` (str), ``token`` (str), ``mode`` (str) + + The method itself does not import ``gitpilot.streaming`` at + module top-level so the agent executor stays usable in + contexts where the streaming layer isn't wired (CLI, tests). + """ + from .streaming import StreamEvent # local import β€” keep agent_executor lean + + user_message = str(payload.get("user_message", "")) + repo_full_name = str(payload.get("repo_full_name", "")) + branch = payload.get("branch") + token = payload.get("token") + mode = payload.get("mode", "auto") + + yield StreamEvent( + event="agent_event", + data={"type": "executor_started", "mode": mode}, + ) + + try: + result = await self.execute( + user_message=user_message, + repo_full_name=repo_full_name, + branch=branch, + token=token, + mode=mode, + ) + except Exception as exc: # noqa: BLE001 β€” boundary adapter + yield StreamEvent( + event="error", + data={"code": "executor.failed", "message": str(exc)[:240]}, + ) + return + + if result is None: + yield StreamEvent( + event="assistant_chunk", + data={"text": "(no plan produced β€” streaming fallback)"}, + ) + return + + plan_text = result.get("summary") if isinstance(result, dict) else None + if not plan_text and isinstance(result, dict): + plan_text = "\n".join( + str(step.get("title") or step) for step in (result.get("steps") or [])[:5] + ) + if plan_text: + yield StreamEvent( + event="assistant_chunk", + data={"text": plan_text}, + ) + + yield StreamEvent(event="agent_event", data={"type": "executor_finished"}) diff --git a/gitpilot/agent_tools.py b/gitpilot/agent_tools.py index 71572af..e0a34ea 100644 --- a/gitpilot/agent_tools.py +++ b/gitpilot/agent_tools.py @@ -20,17 +20,43 @@ def _sanitize_tool_arg(value: Any, fallback_key: str = "description") -> str: instead of: "README.md" - This helper unwraps the dict and returns a plain string. + Worst case: the LLM copies the schema verbatim with a literal + ``"None"`` value (because the tool exposes ``description: None``): + {"description": "None", "type": "str"} + + This helper unwraps every variant we have seen in production and + returns a plain string. Raises ``ValueError`` only when the value + cannot be recovered (e.g. the LLM passed a list or an empty dict) + so the caller can surface a clear error instead of querying + GitHub with a stringified Python dict. """ if isinstance(value, str): return value if isinstance(value, dict): - # Try common keys the LLM might stuff the value into - for key in (fallback_key, "description", "value", "default", "title"): - if key in value and isinstance(value[key], str) and value[key]: - return value[key] - # Last resort: stringify - return str(next(iter(value.values()), "")) + # 1. Try the most likely human-supplied keys. + for key in (fallback_key, "description", "value", "default", "title", "path"): + v = value.get(key) + if isinstance(v, str) and v and v.lower() != "none": + return v + # 2. Any other string field on the dict that isn't the schema + # ``type`` marker. + for key, v in value.items(): + if key in {"type", "anyOf", "format"}: + continue + if isinstance(v, str) and v and v.lower() != "none": + return v + raise ValueError( + f"tool argument arrived as a schema-shaped dict with no " + f"usable value (got keys: {sorted(value.keys())!r}). " + f"Pass the parameter as a plain string." + ) + if value is None: + raise ValueError("tool argument is required but received None") + if isinstance(value, (list, tuple, set)): + raise ValueError( + f"tool argument expected a string, got a {type(value).__name__}; " + f"pass a single value, not a sequence." + ) return str(value) # Global context for current repository @@ -173,8 +199,14 @@ def get_directory_structure() -> str: @tool("Read file content") -def read_file(file_path: str) -> str: - """Reads the content of a specific file.""" +def read_file(file_path: Any) -> str: + """Read the content of a file from the active repository. + + file_path: the file's path relative to the repository root, e.g. + "README.md" or "src/main.py". Pass a plain string β€” do **not** pass + a dict like ``{"description": "...", "type": "str"}`` (that is the + parameter's schema, not its value). + """ file_path = _sanitize_tool_arg(file_path) try: owner, repo, token, branch = get_repo_context() @@ -216,8 +248,14 @@ def get_repository_summary() -> str: # --------------------------------------------------------------------------- @tool("Write or update a file in the repository") -def write_file(file_path: str, content: str, commit_message: str) -> str: - """Creates or updates a file in the repository. Provide the full file content.""" +def write_file(file_path: Any, content: Any, commit_message: Any) -> str: + """Create or update a file in the repository. + + file_path: path relative to the repo root (plain string, e.g. + ``"src/main.py"``). content: the full new file content (plain + string). commit_message: a short imperative commit summary. Do + **not** wrap any of these in a ``{description, type}`` schema dict. + """ file_path = _sanitize_tool_arg(file_path) content = _sanitize_tool_arg(content, fallback_key="value") commit_message = _sanitize_tool_arg(commit_message, fallback_key="value") @@ -241,8 +279,13 @@ def write_file(file_path: str, content: str, commit_message: str) -> str: @tool("Delete a file from the repository") -def delete_repo_file(file_path: str, commit_message: str) -> str: - """Deletes a file from the repository.""" +def delete_repo_file(file_path: Any, commit_message: Any) -> str: + """Delete a file from the repository. + + file_path: the path relative to the repo root (plain string, e.g. + ``"docs/old.md"``). commit_message: a short imperative commit + summary. Both are plain strings β€” never wrap them in a schema dict. + """ file_path = _sanitize_tool_arg(file_path) commit_message = _sanitize_tool_arg(commit_message, fallback_key="value") try: diff --git a/gitpilot/agentic.py b/gitpilot/agentic.py index 2aed66b..5c30ff9 100644 --- a/gitpilot/agentic.py +++ b/gitpilot/agentic.py @@ -6,7 +6,7 @@ from textwrap import dedent from typing import Any, Dict, List, Literal, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ValidationError as _PydanticValidationError from .agent_router import AgentType, RequestCategory, WorkflowPlan, route as route_request from .context_pack import build_context_pack from .topology_registry import ( @@ -207,6 +207,56 @@ class PlanResult(BaseModel): steps: List[PlanStep] +# --------------------------------------------------------------------------- +# Markdown-fence stripper for agent file-content output. +# +# The Code Writer agent's system prompt asks it to return ONLY the file +# content, no markdown code blocks. In practice every small LLM and +# even some large ones wrap the output in ``` ... ``` (and sometimes +# ~~~ ... ~~~). This helper removes that wrapper before the content +# is written to disk, including a few real-world variants the previous +# inline logic missed: +# +# * tilde fences ``~~~python ... ~~~`` +# * fenced block with a leading language tag (``` ```python ... ``` ```) +# * leading or trailing whitespace / blank lines outside the fence +# * fenced block embedded in explanatory prose +# ("Here is the file:\n```python\n...\n```\nLet me know if…") +# +# The fallback is the input unchanged β€” if no clear single fenced block +# is found, we leave the content alone (better to commit slightly +# wrapped content than to corrupt it by guessing). +# --------------------------------------------------------------------------- + +_FENCE_BLOCK_RE = __import__("re").compile( + r"(?P```|~~~)[^\n]*\n(?P.*?)\n[ \t]*(?P=f)\s*$", + __import__("re").DOTALL | __import__("re").MULTILINE, +) + + +def _strip_markdown_fences(content: str) -> str: + """Strip a wrapping markdown code fence from agent-produced file + content. Returns the bare body when a clean fence pair is found; + returns the input unchanged otherwise.""" + if not isinstance(content, str) or not content: + return content + stripped = content.strip() + + # Fast path: the whole payload is one fenced block with nothing + # before it. Walk every fence occurrence and pick the largest body + # β€” this gives the right answer when the agent prepends a sentence + # like "Here is the file:". + best_body: str | None = None + for match in _FENCE_BLOCK_RE.finditer(stripped): + body = match.group("body") + if best_body is None or len(body) > len(best_body): + best_body = body + if best_body is not None: + return best_body + + return stripped + + async def generate_plan( goal: str, repo_full_name: str, @@ -305,7 +355,25 @@ def _explore(): # Propagate context to thread for CrewAI execution ctx = contextvars.copy_context() - exploration_result = await _guarded_agent_call(ctx, _explore, label="explore_repo") + try: + exploration_result = await _guarded_agent_call(ctx, _explore, label="explore_repo") + except _PydanticValidationError as exc: + # Same failure mode as the planner-side validation error: the + # explorer's Final Answer didn't match the expected schema, so + # CrewAI's converter blew up before we could even ask the + # planner anything. Surface the same friendly message β€” the + # underlying agent-quality issue is identical. + logger.warning( + "[GitPilot] Explorer emitted output that failed schema " + "validation: %s", + (exc.errors()[0].get("msg") if exc.errors() else "(no detail)"), + ) + raise RuntimeError( + "The repository explorer did not return a usable result. " + "This usually means the LLM lost its instruction format " + "(common with smaller / quantised models). Re-run the " + "request, or switch to a stronger LLM via Settings β†’ Provider." + ) from exc exploration_report = exploration_result.raw if hasattr(exploration_result, "raw") else str(exploration_result) logger.info("[GitPilot] Exploration complete. Report length: %s chars", len(exploration_report)) @@ -445,17 +513,137 @@ def _plan(): return plan_crew.kickoff(inputs={"goal": goal}) ctx = contextvars.copy_context() - result = await _guarded_agent_call(ctx, _plan, label="generate_plan") + try: + result = await _guarded_agent_call(ctx, _plan, label="generate_plan") + except _PydanticValidationError as exc: + # CrewAI tried to coerce the planner's Final Answer into the + # ``PlanResult`` schema and failed. We have seen two real + # production payloads cause this: + # + # 1. The agent emitted a ReAct-format "Thought / Action / + # Action Input" block instead of JSON (its instruction + # formatting collapsed). CrewAI's converter still tries + # to find a ``{...}`` substring, lands on ``Input: {}``, + # validates that, and Pydantic complains: + # "3 validation errors for PlanResult: goal / summary + # / steps - Field required" + # + # 2. The agent returned plain refusal prose with an empty + # ``{}`` somewhere in it. + # + # Both cases are agent-quality failures, not user errors. + # Translate to the same friendly RuntimeError surface the + # refusal path already uses so the UI shows "couldn't produce + # a plan" rather than a 500 with a Pydantic traceback. + logger.warning( + "[GitPilot] Planner emitted output that failed PlanResult " + "validation (%d error%s). First error: %s", + len(exc.errors()), + "" if len(exc.errors()) == 1 else "s", + (exc.errors()[0].get("msg") if exc.errors() else "(no detail)"), + ) + raise RuntimeError( + "The planner did not return a valid plan structure. This " + "usually means the LLM lost its instruction format mid-task " + "(common with smaller / quantised models). Re-run the " + "request, or switch to a stronger LLM via Settings β†’ Provider." + ) from exc + + # ------------------------------------------------------------------ + # Post-hoc guards β€” catch the failure mode where the planner LLM + # returns either a refusal or a hallucinated stock plan that has + # nothing to do with the user's repository. + # ------------------------------------------------------------------ + from .plan_guards import ( + PlanHallucinationError, + assess_plan, + detect_refusal, + enrich_plan_with_reads, + ) + + refusal = detect_refusal(result) + if refusal is not None: + logger.warning( + "[GitPilot] Planner returned a refusal-shaped response (%r); " + "treating as failure rather than rendering a hallucinated plan.", + refusal, + ) + raise RuntimeError( + "The planner refused to produce a plan. This usually means " + "the explorer could not read repository content. Re-run the " + "request, or switch to a stronger LLM via Settings β†’ Provider." + ) if hasattr(result, "pydantic") and result.pydantic: plan = result.pydantic logger.info("[GitPilot] Plan created with %s steps (ref=%s)", len(plan.steps), active_ref) + + # Cross-check the plan against the real repo file list. Suspicious + # placeholder-shaped paths combined with a 0% hit-rate on + # MODIFY/DELETE actions strongly suggests the planner hallucinated + # a generic stock plan rather than working from the actual repo. + try: + repo_files: list[str] = [] + tools_cache = _tools() + owner, repo, token, branch = await _resolve_repo_target(tools_cache) + if owner and repo: + ctx_summary = await tools_cache["get_repository_context_summary"]( + owner, repo, token=token, branch=branch, + ) + repo_files = list(ctx_summary.get("all_files", []) or []) + except Exception: + logger.debug("[GitPilot] could not fetch repo file list for plausibility check", exc_info=True) + repo_files = [] + + if repo_files: + # Small / quantised LLMs (llama3:8b is the canonical case) + # consistently drop READ entries from plan steps even when + # the step's description clearly says "Read the content of + # README.md". Enrich the plan before the plausibility + # check so the Action Plan card surfaces the complete set + # of files the agent will touch β€” both the READ inputs and + # the CREATE / MODIFY / DELETE outputs. + added_reads = enrich_plan_with_reads(plan, repo_files) + if added_reads: + logger.info( + "[GitPilot] Auto-injected %d READ entr%s based on plan " + "step descriptions (small-model READ-drop mitigation).", + added_reads, "y" if added_reads == 1 else "ies", + ) + + assessment = assess_plan(plan, repo_files) + if assessment.hallucinated: + logger.warning( + "[GitPilot] Plausibility check failed (suspicious=%s, hit_ratio=%.2f); " + "treating plan as hallucinated.", + len(assessment.suspicious_paths), assessment.hit_ratio, + ) + raise PlanHallucinationError( + "The planner produced paths that do not match this " + "repository. Re-run the request, or switch to a " + "stronger LLM via Settings β†’ Provider.", + assessment=assessment, + ) + return plan logger.warning("[GitPilot] Unexpected planning result type: %r", type(result)) return result +async def _resolve_repo_target(tools_cache: dict) -> tuple[str, str, str | None, str | None]: + """Best-effort lookup of (owner, repo, token, branch) for the active + planning session. Returns empty strings when the context is not + available β€” callers must tolerate that and skip the plausibility + check rather than fail.""" + try: + from .agent_tools import get_repo_context + owner, repo, token, branch = get_repo_context() + return owner, repo, token, branch + except Exception: + return "", "", None, None + + # ============================================================================ # Lite Mode β€” Simplified single-agent for small LLMs (< 7B parameters) # ============================================================================ @@ -830,13 +1018,7 @@ def _create(): ctx = contextvars.copy_context() content = await _guarded_agent_call(ctx, _create, label="create_file") - content = content.strip() - if content.startswith("```"): - lines = content.split("\n") - if lines[-1].strip() == "```": - content = "\n".join(lines[1:-1]) - else: - content = "\n".join(lines[1:]) + content = _strip_markdown_fences(content) await put_file(owner, repo, file.path, content, f"GitPilot Lite: Create {file.path}", token=token, branch=branch_name) @@ -1014,14 +1196,7 @@ def _create(): ctx = contextvars.copy_context() content = await _guarded_agent_call(ctx, _create, label="exec_create_file") - - content = content.strip() - if content.startswith("```"): - lines = content.split("\n") - if lines[-1].strip() == "```": - content = "\n".join(lines[1:-1]) - else: - content = "\n".join(lines[1:]) + content = _strip_markdown_fences(content) await put_file( owner, diff --git a/gitpilot/agents_md.py b/gitpilot/agents_md.py new file mode 100644 index 0000000..a3e04c2 --- /dev/null +++ b/gitpilot/agents_md.py @@ -0,0 +1,314 @@ +# gitpilot/agents_md.py +"""Persistent project context file β€” ``AGENTS.md`` + ``/init``. + +Industry-convention `AGENTS.md` lives at the workspace root and is loaded +into every session as a high-priority context block. This module is +purely additive β€” when no ``AGENTS.md`` exists the rest of GitPilot +behaves exactly as before. + +Three responsibilities: + +1. Render a starter ``AGENTS.md`` from a workspace scan (``/init``). +2. Load the active ``AGENTS.md`` and its mode-specific siblings under + ``.gitpilot/AGENTS..md`` for prompt injection. +3. Expand inline ``@./other.md`` includes with circular-import detection. +""" +from __future__ import annotations + +import logging +import os +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple, TypedDict + + +class _IncludeInfo(TypedDict): + remaining_budget: int + include_count: int + truncated: bool + +logger = logging.getLogger(__name__) + +AGENTS_MD = "AGENTS.md" +GITPILOT_DIR = ".gitpilot" + +MAX_AGENTS_MD_BYTES = 32_000 +MAX_INCLUDE_DEPTH = 5 +MAX_INCLUDES_TOTAL = 32 + +_INCLUDE_RE = re.compile(r"^@(\./|\.\./|/)([^\s]+)\s*$", re.MULTILINE) + + +@dataclass +class AgentsDoc: + """Loaded AGENTS.md with includes resolved.""" + + path: Path + content: str + includes: List[Path] = field(default_factory=list) + truncated: bool = False + circular: List[str] = field(default_factory=list) + + @property + def is_empty(self) -> bool: + return not self.content.strip() + + +class AgentsLoader: + """Locate and load AGENTS.md (root + optional mode-specific).""" + + def __init__(self, workspace_path: Path) -> None: + self.workspace_path = workspace_path.resolve() + + # ------------------------------------------------------------------ + # Discovery + # ------------------------------------------------------------------ + def root_path(self) -> Path: + return self.workspace_path / AGENTS_MD + + def mode_path(self, mode_slug: str) -> Path: + safe = re.sub(r"[^a-zA-Z0-9_.-]", "", mode_slug) + return self.workspace_path / GITPILOT_DIR / f"AGENTS.{safe}.md" + + # ------------------------------------------------------------------ + # Loading + include expansion + # ------------------------------------------------------------------ + def load(self, mode_slug: Optional[str] = None) -> AgentsDoc: + candidates: List[Path] = [] + if mode_slug: + mp = self.mode_path(mode_slug) + if mp.exists(): + candidates.append(mp) + root = self.root_path() + if root.exists(): + candidates.append(root) + + if not candidates: + return AgentsDoc(path=root, content="") + + # If both exist, mode-specific is appended after the root so the + # mode overrides apply last in the system prompt. + rendered_parts: List[str] = [] + seen: Set[Path] = set() + circular: List[str] = [] + includes: List[Path] = [] + truncated = False + budget = MAX_AGENTS_MD_BYTES + include_count = 0 + + for cand in reversed(candidates): # root first, mode last + text, info = self._expand_includes( + cand, depth=0, seen=seen, circular=circular, includes=includes, + remaining_budget=budget, include_count=include_count, + ) + rendered_parts.append(text) + budget = info["remaining_budget"] + include_count = info["include_count"] + truncated = truncated or info["truncated"] + if budget <= 0: + truncated = True + break + + return AgentsDoc( + path=candidates[0], + content="\n\n".join(p for p in rendered_parts if p), + includes=includes, + truncated=truncated, + circular=circular, + ) + + def _expand_includes( + self, + path: Path, + *, + depth: int, + seen: Set[Path], + circular: List[str], + includes: List[Path], + remaining_budget: int, + include_count: int, + ) -> Tuple[str, _IncludeInfo]: + resolved = path.resolve() + if resolved in seen: + circular.append(str(resolved)) + return "", {"remaining_budget": remaining_budget, "include_count": include_count, "truncated": False} + if depth > MAX_INCLUDE_DEPTH or include_count >= MAX_INCLUDES_TOTAL: + return "", {"remaining_budget": remaining_budget, "include_count": include_count, "truncated": True} + + if not str(resolved).startswith(str(self.workspace_path)): + return "", {"remaining_budget": remaining_budget, "include_count": include_count, "truncated": False} + + seen.add(resolved) + try: + raw = resolved.read_text(encoding="utf-8") + except Exception as e: + logger.debug("could not read %s: %s", resolved, e) + return "", {"remaining_budget": remaining_budget, "include_count": include_count, "truncated": False} + + out_parts: List[str] = [] + truncated = False + cursor = 0 + for m in _INCLUDE_RE.finditer(raw): + out_parts.append(raw[cursor : m.start()]) + cursor = m.end() + include_token = m.group(1) + m.group(2) + target = (resolved.parent / include_token).resolve() if not include_token.startswith("/") else Path(include_token).resolve() + includes.append(target) + include_count += 1 + child_text, child_info = self._expand_includes( + target, + depth=depth + 1, + seen=seen, + circular=circular, + includes=includes, + remaining_budget=remaining_budget, + include_count=include_count, + ) + out_parts.append(child_text) + remaining_budget = child_info["remaining_budget"] + include_count = child_info["include_count"] + truncated = truncated or child_info["truncated"] + out_parts.append(raw[cursor:]) + + body = "".join(out_parts) + if len(body) > remaining_budget: + body = body[:remaining_budget] + truncated = True + remaining_budget -= len(body) + + return body, {"remaining_budget": remaining_budget, "include_count": include_count, "truncated": truncated} + + +# ---------------------------------------------------------------------- +# /init implementation +# ---------------------------------------------------------------------- + +@dataclass +class InitReport: + """Summary returned by ``/init``.""" + + created: bool + path: Path + sections: List[str] = field(default_factory=list) + skipped_reason: Optional[str] = None + + +def _scan_workspace(workspace_path: Path) -> Dict[str, Any]: + """Extract a low-cost fingerprint of the project for the starter doc.""" + info: Dict[str, Any] = {} + info["python"] = (workspace_path / "pyproject.toml").exists() or any(workspace_path.glob("*.py")) + info["node"] = (workspace_path / "package.json").exists() + info["docker"] = (workspace_path / "Dockerfile").exists() or any(workspace_path.glob("Dockerfile*")) + info["compose"] = any(workspace_path.glob("docker-compose*.y*ml")) + info["has_tests"] = (workspace_path / "tests").exists() or (workspace_path / "test").exists() + info["has_makefile"] = (workspace_path / "Makefile").exists() + info["readme"] = next((p.name for p in workspace_path.glob("README*")), None) + # Cheap top-level layout + top: List[str] = [] + for child in sorted(workspace_path.iterdir()): + if child.name.startswith("."): + continue + top.append(child.name + ("/" if child.is_dir() else "")) + if len(top) >= 30: + break + info["top_level"] = top + return info + + +_STARTER_TEMPLATE = """# AGENTS.md + +> Persistent project context loaded into every GitPilot session. +> Edit freely β€” agents will follow these notes. + +## Project Overview +{overview} + +## Directory Layout +{layout} + +## Stack +{stack} + +## Workflows +{workflows} + +## Conventions +- Keep changes small and reversible. +- Run the test suite before committing. +- Write docstrings for any new public function. + +## Mode-Specific Notes +Place per-mode overrides in `.gitpilot/AGENTS..md` (for example +`.gitpilot/AGENTS.coder.md`). Use `@./relative/path.md` on its own line to +include another markdown file. +""" + + +def run_init( + workspace_path: Path, + *, + overwrite: bool = False, +) -> InitReport: + """Generate a starter ``AGENTS.md`` for the workspace. Idempotent.""" + workspace_path = workspace_path.resolve() + target = workspace_path / AGENTS_MD + if target.exists() and not overwrite: + return InitReport(created=False, path=target, skipped_reason="exists") + + info = _scan_workspace(workspace_path) + + stack_bits: List[str] = [] + if info.get("python"): + stack_bits.append("Python") + if info.get("node"): + stack_bits.append("Node.js") + if info.get("docker"): + stack_bits.append("Docker") + if info.get("compose"): + stack_bits.append("docker-compose") + stack = ", ".join(stack_bits) or "_unknown β€” describe here_" + + workflows: List[str] = [] + if info.get("has_makefile"): + workflows.append("- `make install`, `make test`, `make run`") + if info.get("node"): + workflows.append("- `npm install`, `npm test`") + if info.get("python"): + workflows.append("- `pip install -e .` and `pytest`") + workflows_md = "\n".join(workflows) or "_describe build/test/run commands here_" + + layout = "\n".join(f"- `{e}`" for e in info.get("top_level", [])) or "_workspace empty_" + overview = ( + f"This project has a `{info.get('readme')}` at its root β€” refer to it for " + "purpose and high-level usage." + if info.get("readme") else "_describe the project here_" + ) + + doc = _STARTER_TEMPLATE.format( + overview=overview, + layout=layout, + stack=stack, + workflows=workflows_md, + ) + + target.write_text(doc, encoding="utf-8") + return InitReport( + created=True, + path=target, + sections=["Project Overview", "Directory Layout", "Stack", "Workflows", "Conventions"], + ) + + +def load_for_session( + workspace_path: Path, + mode_slug: Optional[str] = None, +) -> str: + """Convenience: return the AGENTS.md content (with includes) or ''.""" + doc = AgentsLoader(workspace_path).load(mode_slug=mode_slug) + if doc.is_empty: + return "" + suffix = "" + if doc.truncated: + suffix = "\n\n_…AGENTS.md truncated to fit context budget._" + return doc.content + suffix diff --git a/gitpilot/api.py b/gitpilot/api.py index 135c43d..0107bce 100644 --- a/gitpilot/api.py +++ b/gitpilot/api.py @@ -10,6 +10,11 @@ from pydantic import BaseModel, Field from .version import __version__ +# Batch P1-D β€” error-envelope decorator (opt-in via the `error_envelope` flag). +# Re-exported here so endpoint authors can `@wrap_errors_envelope` without +# reaching into the implementation module. Importing the symbol is a no-op +# when the flag is off, so this is fully backwards compatible. +from .errors import GitPilotError, wrap_errors_envelope # noqa: F401 from .github_api import ( list_user_repos, list_user_repos_paginated, # Pagination support @@ -1117,6 +1122,93 @@ async def api_update_llm_settings(updates: dict): ) +# ============================================================================ +# Context-window meter +# ============================================================================ + +@app.get("/api/context/usage") +async def api_context_usage(session_id: Optional[str] = Query(None)): + """Return a snapshot of the active model's context-window utilisation. + + When ``session_id`` is supplied, the ``messages`` row reflects the + real token total of that session's persisted conversation. Without + it the row is 0 and the popover shows the structure-only view (still + useful: tool schemas + system prompt + reserved are all populated). + """ + from . import flags + from .context_meter import ( + FLAG_CONTEXT_METER, + build_usage, + count_messages_tokens, + count_system_prompt_tokens, + count_tool_schema_tokens, + ) + + if not flags.is_on(FLAG_CONTEXT_METER, default=True): + raise HTTPException(status_code=404, detail="Context meter is disabled") + + s: AppSettings = get_settings() + lite_mode = _is_lite_mode_active() + + # Tool count + tool-schema tokens β€” best-effort, lazy import so we + # don't pay the agent-tools cost on a settings-only client. In lite + # mode the planner doesn't see tools at all, so we report zero. + tool_count = 0 + tool_lists: list[list[object]] = [] + if not lite_mode: + try: + from .agentic import _tools + + t = _tools() + for key in ( + "REPOSITORY_TOOLS", + "WRITE_TOOLS", + "ISSUE_TOOLS", + "PR_TOOLS", + "SEARCH_TOOLS", + "LOCAL_TOOLS", + ): + group = t.get(key) or [] + tool_lists.append(list(group)) + tool_count += len(group) + except Exception as exc: # pragma: no cover - defensive + logger.debug("[context-meter] tool count unavailable: %s", exc) + + tool_schema_tokens = count_tool_schema_tokens(tool_lists) if tool_lists else 0 + system_prompt_tokens = count_system_prompt_tokens(lite_mode=lite_mode) + + # Conversation messages β€” only when the caller passes a session_id. + # Failure to load is silent: the popover stays useful with messages=0 + # rather than erroring on a freshly-created session. + messages_tokens = 0 + if session_id: + try: + session = _session_mgr.load(session_id) + messages_tokens = count_messages_tokens(session.messages) + except Exception as exc: + logger.debug( + "[context-meter] session %s not loadable: %s", session_id, exc + ) + + # Repo context summary is computed fresh per plan and not cached + # per-session, so we leave the row at 0. When we add per-session + # caching (planned), populate this from the cache. + breakdown = { + "messages": messages_tokens, + "system_prompt": system_prompt_tokens, + "repo_context": 0, + "tool_schemas": tool_schema_tokens, + } + + usage = build_usage( + s, + breakdown=breakdown, + tool_count=tool_count, + lite_mode=lite_mode, + ) + return usage.to_dict() + + # ============================================================================ # Chat Endpoints # ============================================================================ @@ -1189,10 +1281,20 @@ async def api_chat_plan(req: ChatPlanRequest, authorization: Optional[str] = Hea ) from exc # ── Structured-output parse failure (common with small models) ─ + # New markers match the friendly RuntimeError surfaces we + # raise in gitpilot/agentic.py::generate_plan for refusal / + # ValidationError / tool-loop hallucination paths. Catching + # them here routes the user to the single-agent Lite planner + # automatically β€” much better than the previous outcome where + # those RuntimeErrors leaked through as raw HTTP 500. _plan_parse_markers = ( "validation error for planresult", "json_invalid", "invalid json: key must be a string", + "did not return a valid plan structure", + "did not return a usable result", + "the planner refused to produce a plan", + "the planner produced paths that do not match", ) if any(marker in error_msg.lower() for marker in _plan_parse_markers): logger.warning( @@ -1212,10 +1314,32 @@ async def api_chat_plan(req: ChatPlanRequest, authorization: Optional[str] = Hea "[GitPilot] Lite planner fallback also failed after parse error: %s", lite_exc, ) - raise - - # Re-raise anything else - raise + # Surface a clear 502 with actionable guidance rather + # than leaking the raw RuntimeError as a generic 500. + raise HTTPException( + status_code=502, + detail=( + "The planner couldn't produce a usable plan even " + "with the simplified Lite-mode fallback. This is " + "almost always a small-model issue β€” the LLM is " + "looping on tool calls or losing its instruction " + "format mid-task. Solutions:\n" + "β€’ Switch to a larger Ollama model (llama3.1:8b β†’ " + "llama3.1:70b, qwen2.5:14b+, mistral)\n" + "β€’ Use a cloud provider (OpenAI, Claude) for " + "complex multi-step tasks\n" + "β€’ Try simplifying the request (one file at a time)" + ), + ) from lite_exc + + # Anything else β€” surface a clean 500 with a clear message + # so the UI's existing error handler renders something + # actionable instead of a bare "Internal Server Error". + logger.exception("[GitPilot] /api/chat/plan failed: %s", error_msg) + raise HTTPException( + status_code=500, + detail=error_msg or "Plan generation failed.", + ) from exc @app.post("/api/chat/execute") diff --git a/gitpilot/checkpoints.py b/gitpilot/checkpoints.py new file mode 100644 index 0000000..04725b0 --- /dev/null +++ b/gitpilot/checkpoints.py @@ -0,0 +1,275 @@ +# gitpilot/checkpoints.py +"""Project checkpointing via a shadow git repository. + +A checkpoint is a three-part snapshot taken before a mutating tool +call: + +1. A git commit in a shadow repo at + ``~/.gitpilot/history/``. This commit contains a + copy of all tracked files (plus untracked, ignoring ``.git/``). +2. The conversation transcript up to that point, serialised as JSON. +3. A descriptor of the tool call that was about to run. + +Restoring a checkpoint copies the snapshot files back into the +workspace and re-emits the saved transcript so the conversation can be +resumed deterministically. + +The module is opt-in and side-effect-free until :meth:`CheckpointStore.snapshot` +is called. It deliberately uses Python's ``git`` CLI rather than a +library to keep dependencies minimal. +""" +from __future__ import annotations + +import hashlib +import json +import logging +import shutil +import subprocess +import time +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +HISTORY_ROOT = Path.home() / ".gitpilot" / "history" +META_DIR = "meta" +SNAP_DIR = "snapshot" +TRANSCRIPT_FILE = "transcript.json" +DESCRIPTOR_FILE = "tool_call.json" + + +@dataclass +class CheckpointRecord: + """Lightweight checkpoint summary returned to callers.""" + + id: str + timestamp: float + tool_name: str + target_path: Optional[str] = None + note: str = "" + files_changed: int = 0 + commit_sha: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class ToolCallDescriptor: + """The tool call that was about to run when the checkpoint was made.""" + + name: str + arguments: Dict[str, Any] = field(default_factory=dict) + target_path: Optional[str] = None + note: str = "" + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +class CheckpointStore: + """Manage checkpoints for a single workspace.""" + + def __init__(self, workspace_path: Path, history_root: Optional[Path] = None) -> None: + self.workspace_path = workspace_path.resolve() + root = history_root or HISTORY_ROOT + self.history_dir = root / _workspace_hash(self.workspace_path) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + def init(self) -> None: + self.history_dir.mkdir(parents=True, exist_ok=True) + snap = self.history_dir / SNAP_DIR + snap.mkdir(exist_ok=True) + if not (snap / ".git").exists(): + self._git(snap, "init", "-q") + self._git(snap, "config", "user.email", "checkpoints@gitpilot.local") + self._git(snap, "config", "user.name", "GitPilot Checkpoints") + (self.history_dir / META_DIR).mkdir(exist_ok=True) + + # ------------------------------------------------------------------ + # Snapshot / restore + # ------------------------------------------------------------------ + def snapshot( + self, + descriptor: ToolCallDescriptor, + transcript: Optional[List[Dict[str, Any]]] = None, + ) -> CheckpointRecord: + """Capture the workspace + transcript + tool call descriptor.""" + self.init() + snap = self.history_dir / SNAP_DIR + files_changed = _mirror_workspace(self.workspace_path, snap) + ts = time.time() + ckpt_id = _format_id(ts, descriptor) + meta_dir = self.history_dir / META_DIR / ckpt_id + meta_dir.mkdir(parents=True, exist_ok=True) + (meta_dir / TRANSCRIPT_FILE).write_text( + json.dumps(transcript or [], indent=2), encoding="utf-8" + ) + (meta_dir / DESCRIPTOR_FILE).write_text( + json.dumps(descriptor.to_dict(), indent=2), encoding="utf-8" + ) + commit_sha: Optional[str] = None + try: + self._git(snap, "add", "-A") + res = self._git(snap, "commit", "-q", "--allow-empty", "-m", ckpt_id, capture=True) + commit_sha = self._git(snap, "rev-parse", "HEAD", capture=True).strip() or None + _ = res + except Exception as e: + logger.warning("checkpoint commit failed: %s", e) + record = CheckpointRecord( + id=ckpt_id, + timestamp=ts, + tool_name=descriptor.name, + target_path=descriptor.target_path, + note=descriptor.note, + files_changed=files_changed, + commit_sha=commit_sha, + ) + (meta_dir / "record.json").write_text( + json.dumps(record.to_dict(), indent=2), encoding="utf-8" + ) + return record + + def list(self) -> List[CheckpointRecord]: + out: List[CheckpointRecord] = [] + meta_root = self.history_dir / META_DIR + if not meta_root.exists(): + return out + for child in sorted(meta_root.iterdir(), reverse=True): + record_file = child / "record.json" + if not record_file.exists(): + continue + try: + data = json.loads(record_file.read_text(encoding="utf-8")) + out.append(CheckpointRecord(**data)) + except Exception as e: + logger.debug("could not load checkpoint %s: %s", child, e) + return out + + def restore(self, checkpoint_id: str) -> Dict[str, Any]: + """Restore files for ``checkpoint_id`` and return the transcript.""" + meta_dir = self.history_dir / META_DIR / checkpoint_id + if not meta_dir.exists(): + raise FileNotFoundError(f"unknown checkpoint: {checkpoint_id}") + snap = self.history_dir / SNAP_DIR + record_path = meta_dir / "record.json" + if not record_path.exists(): + raise FileNotFoundError("missing record.json") + record = json.loads(record_path.read_text(encoding="utf-8")) + sha = record.get("commit_sha") + if sha: + try: + self._git(snap, "checkout", "-q", sha, "--", ".") + except Exception as e: + logger.warning("checkout of %s failed: %s", sha, e) + # Mirror snapshot files back into the workspace (additive only β€” + # we never delete files the user may have created since). + _restore_workspace(snap, self.workspace_path) + transcript_path = meta_dir / TRANSCRIPT_FILE + descriptor_path = meta_dir / DESCRIPTOR_FILE + return { + "record": record, + "transcript": json.loads(transcript_path.read_text(encoding="utf-8")) + if transcript_path.exists() else [], + "tool_call": json.loads(descriptor_path.read_text(encoding="utf-8")) + if descriptor_path.exists() else {}, + } + + # ------------------------------------------------------------------ + # Maintenance + # ------------------------------------------------------------------ + def prune(self, keep_last: int = 50) -> int: + records = self.list() + if len(records) <= keep_last: + return 0 + removed = 0 + for record in records[keep_last:]: + target = self.history_dir / META_DIR / record.id + if target.exists(): + shutil.rmtree(target, ignore_errors=True) + removed += 1 + return removed + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + def _git(self, cwd: Path, *args: str, capture: bool = False) -> str: + proc = subprocess.run( + ["git", *args], + cwd=str(cwd), + check=False, + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode != 0: + raise RuntimeError(proc.stderr.strip() or f"git {args[0]} failed") + return proc.stdout if capture else "" + + +# ---------------------------------------------------------------------- +# Helpers +# ---------------------------------------------------------------------- + +_DEFAULT_IGNORES = {".git", ".gitpilot", "__pycache__", "node_modules", ".venv", ".tox"} + + +def _workspace_hash(workspace: Path) -> str: + return hashlib.sha1(str(workspace).encode("utf-8")).hexdigest()[:12] + + +def _format_id(ts: float, descriptor: ToolCallDescriptor) -> str: + iso = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime(ts)) + tool = descriptor.name.replace("/", "_") + suffix = f"-{Path(descriptor.target_path).name}" if descriptor.target_path else "" + return f"{iso}-{tool}{suffix}"[:120] + + +def _mirror_workspace(src: Path, dst: Path) -> int: + """Copy ``src`` into ``dst`` (overwriting), skipping ignored paths.""" + count = 0 + # Wipe existing snapshot content (but keep its .git/). + for entry in list(dst.iterdir()): + if entry.name == ".git": + continue + if entry.is_dir(): + shutil.rmtree(entry, ignore_errors=True) + else: + try: + entry.unlink() + except OSError: + pass + for path in src.rglob("*"): + rel = path.relative_to(src) + if any(part in _DEFAULT_IGNORES for part in rel.parts): + continue + target = dst / rel + if path.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + try: + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(path, target) + count += 1 + except OSError: + continue + return count + + +def _restore_workspace(src: Path, dst: Path) -> None: + for path in src.rglob("*"): + rel = path.relative_to(src) + if rel.parts and rel.parts[0] == ".git": + continue + target = dst / rel + if path.is_dir(): + target.mkdir(parents=True, exist_ok=True) + continue + try: + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(path, target) + except OSError: + continue diff --git a/gitpilot/cli.py b/gitpilot/cli.py index ce0e569..a0c860a 100644 --- a/gitpilot/cli.py +++ b/gitpilot/cli.py @@ -147,14 +147,106 @@ def _run_server(host: str, port: int, reload: bool = False): ) +def _maybe_bootstrap_workspace(workspace: Path) -> None: + """Silently run the first-run wizard when the workspace is fresh. + + Triggers only when *all* of these are true: + + - ``.env`` does not exist + - ``.gitpilot/`` does not exist + - ``AGENTS.md`` does not exist + + Picks a sensible non-interactive default for the model provider: + + - if ``OPENAI_API_KEY`` / ``ANTHROPIC_API_KEY`` / ``WATSONX_API_KEY`` + is already set in the environment, use that provider; + - otherwise default to Ollama (which needs no key) so the user + can keep going without picking up extra credentials. + + Errors are logged and swallowed β€” bootstrapping must never block + ``gitpilot serve``. + """ + try: + env_file = workspace / ".env" + gitpilot_dir = workspace / ".gitpilot" + agents_md = workspace / "AGENTS.md" + if env_file.exists() or gitpilot_dir.exists() or agents_md.exists(): + return # workspace already configured, leave it alone + + # Pick a provider that won't fail on missing credentials. + provider = "ollama" + api_key = None + for env_var, name in ( + ("ANTHROPIC_API_KEY", "anthropic"), + ("OPENAI_API_KEY", "openai"), + ("WATSONX_API_KEY", "watsonx"), + ): + value = os.environ.get(env_var) + if value: + provider = name + api_key = value + break + + # Turn the flag on locally; the wizard rejects calls otherwise. + from . import flags as _flags + from .init_wizard import ( + FLAG_INIT_WIZARD, + WizardAnswers, + run_wizard, + ) + + previous = _flags.is_on(FLAG_INIT_WIZARD) + _flags.set_override(FLAG_INIT_WIZARD, True) + try: + result = run_wizard( + workspace, + presets=WizardAnswers( + provider=provider, + api_key=api_key, + mode_slug="coder", + workspace_trust=True, + ), + ) + finally: + _flags.set_override(FLAG_INIT_WIZARD, previous) + + if result.aborted: + return + console.print( + f"[green]βœ“[/green] First-run bootstrap: wrote " + f"{len(result.files_written)} file(s), provider={provider} " + f"(re-run with --skip-init to disable)." + ) + except Exception: + # Never block serve startup on a bootstrap hiccup. + import logging + logging.getLogger(__name__).debug("workspace bootstrap failed", exc_info=True) + + @cli.command() def serve( host: str = typer.Option("127.0.0.1", "--host", "-h", help="Host to bind"), port: int = typer.Option(8000, "--port", "-p", help="Port to bind"), reload: bool = typer.Option(False, "--reload", help="Enable auto-reload"), open_browser: bool = typer.Option(True, "--open/--no-open", help="Open browser"), + skip_init: bool = typer.Option( + False, "--skip-init", + help="Do not auto-run the first-run wizard when the workspace is fresh.", + ), ): - """Start the GitPilot server with web UI.""" + """Start the GitPilot server with web UI. + + First-run convenience: when the current workspace has no ``.env``, + no ``.gitpilot/`` directory, and no ``AGENTS.md``, we silently + bootstrap a minimal config with sensible defaults (Ollama if no + provider env var is set; otherwise the matching provider). The + user gets a two-command onboarding β€” ``pip install`` then + ``gitpilot serve`` β€” without giving up the explicit-flag flow. + Pass ``--skip-init`` to opt out. + """ + if not skip_init: + _maybe_bootstrap_workspace(Path.cwd()) + # Check if port is already in use (prevent double-start) import socket with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: @@ -262,6 +354,23 @@ def version(): console.print(f"GitPilot [cyan]v{__version__}[/cyan]") +# --------------------------------------------------------------------------- +# Batch P1-E β€” `gitpilot doctor` health-check sub-command. Additive. +# Removal is a one-line revert. +# --------------------------------------------------------------------------- +@cli.command("doctor", help="Run install / environment health checks.") +def doctor_command( + workspace: Path = typer.Option(Path.cwd(), "--workspace", "-w", help="Workspace directory"), + offline: bool = typer.Option(False, "--offline", help="Skip every network probe"), + json_out: bool = typer.Option(False, "--json", help="Emit machine-readable JSON"), +) -> None: + from .doctor import render_json, render_text, run_checks + + report = run_checks(workspace, offline=offline) + console.print(render_json(report) if json_out else render_text(report)) + raise typer.Exit(code=report.exit_code) + + def main(): """Main entry point - run server by default.""" if len(sys.argv) == 1: @@ -347,12 +456,88 @@ def run( @cli.command("init") def init_project( path: str = typer.Argument(".", help="Project directory to initialise"), + wizard: bool = typer.Option( + False, "--wizard", + help="Run the interactive first-run wizard (provider, key, mode, trust).", + ), + provider: str = typer.Option( + None, "--provider", + help="Wizard preset: anthropic | openai | watsonx | ollama (non-interactive).", + ), + mode_slug: str = typer.Option( + None, "--mode", + help="Wizard preset: coder | planner | reviewer (non-interactive).", + ), + api_key: str = typer.Option( + None, "--api-key", + help="Wizard preset: API key for the chosen provider (non-interactive).", + ), + no_trust: bool = typer.Option( + False, "--no-trust", + help="Wizard preset: skip recording workspace trust.", + ), + overwrite: bool = typer.Option( + False, "--overwrite", + help="Wizard: overwrite existing .env / .gitpilot/modes.yaml / AGENTS.md.", + ), ): - """Initialize .gitpilot/ directory with template GITPILOT.md.""" + """Initialize .gitpilot/ directory with template GITPILOT.md. + + Default behaviour is unchanged. Pass ``--wizard`` for the + Batch P3-G first-run flow that also writes a provider-aware ``.env``, + a starter ``.gitpilot/modes.yaml``, and a trust entry. Provider / + mode / key can be pre-supplied for CI use; missing prompts are + asked interactively. + """ from pathlib import Path as StdPath from .memory import MemoryManager workspace = StdPath(path).resolve() + + if wizard: + from . import flags as _flags + from .init_wizard import ( + FLAG_INIT_WIZARD, + WizardAnswers, + WizardError, + run_wizard, + ) + if not _flags.is_on(FLAG_INIT_WIZARD): + console.print( + "[yellow]The init_wizard flag is off.[/yellow] " + "Enable it with [bold]GITPILOT_FLAGS=\"init_wizard=1\"[/bold] " + "and re-run, or omit --wizard for the legacy init." + ) + raise typer.Exit(code=2) + presets = WizardAnswers( + provider=provider or "anthropic", + api_key=api_key, + mode_slug=mode_slug or "coder", + workspace_trust=not no_trust, + overwrite_env=overwrite, + overwrite_modes=overwrite, + overwrite_agents_md=overwrite, + ) + # Force non-interactive mode only when all required answers are present. + try: + result = run_wizard(workspace, presets=presets) + except WizardError as err: + console.print(f"[red]Wizard error:[/red] {err}") + raise typer.Exit(code=1) from err + + # Render the outcome. Secrets are never printed. + for written in result.files_written: + console.print(f"[green]wrote[/green] {written}") + for skipped, why in result.files_skipped: + console.print(f"[yellow]skipped[/yellow] {skipped} ({why})") + if result.trust_recorded: + console.print("[green]trusted[/green] workspace recorded in ~/.gitpilot/trusted.json") + if result.aborted: + console.print(f"[red]aborted[/red] {result.reason}") + raise typer.Exit(code=1) + console.print(f"[dim]done in {result.duration_ms} ms[/dim]") + return + mgr = MemoryManager(workspace) md_path = mgr.init_project() console.print(f"[green]Initialized:[/green] {md_path}") diff --git a/gitpilot/context_budget.py b/gitpilot/context_budget.py new file mode 100644 index 0000000..3ce7417 --- /dev/null +++ b/gitpilot/context_budget.py @@ -0,0 +1,246 @@ +# gitpilot/context_budget.py +"""Conversation context budgeting and auto-condensation. + +Strategy (additive β€” opt-in via :class:`BudgetPolicy` or the global default): + +* Maintain a running token total per session. +* When the total crosses ``condense_at`` (default 70 % of ``max_tokens``) + fold the oldest non-essential messages into a single summary block, + preserving: + - system instructions + - tool definitions + - the AGENTS.md block + - the last N turns +* Drop oversize tool outputs first β€” they're the cheapest to lose and the + costliest to keep. +* Provide a stable :class:`ContextStats` snapshot that the API surfaces as + ``{prompt_tokens, max_tokens, ratio}`` so the web UI can render a live + token counter. + +The token estimator is best-effort: it uses ``tiktoken`` when available +and falls back to a ``len(text) / 4`` heuristic. Counts do not need to +be exact β€” they only steer condensation timing. +""" +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Literal, Optional + +logger = logging.getLogger(__name__) + +Role = Literal["system", "user", "assistant", "tool"] +Importance = Literal["pinned", "normal", "drop-first"] + + +# ---------------------------------------------------------------------- +# Token estimation +# ---------------------------------------------------------------------- + +_TIKTOKEN: Any = None +try: # pragma: no cover - depends on environment + import tiktoken + + _TIKTOKEN = tiktoken.get_encoding("cl100k_base") +except Exception: # pragma: no cover - tiktoken optional + _TIKTOKEN = None + + +def estimate_tokens(text: str) -> int: + """Return an estimated token count for ``text``.""" + if not text: + return 0 + if _TIKTOKEN is not None: + try: + return len(_TIKTOKEN.encode(text)) + except Exception: + pass + # Heuristic fallback β€” close enough to steer condensation thresholds. + return max(1, len(text) // 4) + + +# ---------------------------------------------------------------------- +# Data model +# ---------------------------------------------------------------------- + +@dataclass +class Message: + """One conversation turn or fragment.""" + + role: Role + content: str + importance: Importance = "normal" + tokens: int = 0 + meta: Dict[str, str] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not self.tokens: + self.tokens = estimate_tokens(self.content) + + +@dataclass +class BudgetPolicy: + """Knobs for context budgeting.""" + + max_tokens: int = 200_000 + condense_at_ratio: float = 0.70 + keep_recent_turns: int = 6 + large_tool_output_tokens: int = 4_000 + summary_label: str = "Conversation summary (older turns condensed)" + + @property + def condense_at(self) -> int: + return int(self.max_tokens * self.condense_at_ratio) + + +@dataclass +class ContextStats: + """Snapshot suitable for surfacing in the chat UI.""" + + prompt_tokens: int + max_tokens: int + ratio: float + condensations: int + + def to_dict(self) -> Dict[str, object]: + return { + "prompt_tokens": self.prompt_tokens, + "max_tokens": self.max_tokens, + "ratio": round(self.ratio, 4), + "condensations": self.condensations, + } + + +# ---------------------------------------------------------------------- +# Budget manager +# ---------------------------------------------------------------------- + +SummariseFn = Callable[[List[Message]], str] + + +def _default_summariser(messages: List[Message]) -> str: + """Deterministic, dependency-free fallback summariser. + + Produces a compact bulleted recap. Production deployments can pass a + smarter summariser that delegates to an LLM. + """ + bullets: List[str] = [] + for m in messages: + first_line = m.content.strip().splitlines()[0] if m.content.strip() else "" + if not first_line: + continue + truncated = first_line[:140] + ("…" if len(first_line) > 140 else "") + bullets.append(f"- ({m.role}) {truncated}") + if len(bullets) >= 40: + break + return "\n".join(bullets) or "_no older content to summarise_" + + +class ContextBudgetManager: + """Track token usage and condense history when the budget is tight.""" + + def __init__( + self, + policy: Optional[BudgetPolicy] = None, + summariser: Optional[SummariseFn] = None, + ) -> None: + self.policy = policy or BudgetPolicy() + self._summariser = summariser or _default_summariser + self._messages: List[Message] = [] + self._condensations = 0 + + # ------------------------------------------------------------------ + # Mutation API + # ------------------------------------------------------------------ + def add(self, message: Message) -> None: + self._messages.append(message) + + def add_text(self, role: Role, content: str, **kwargs: Any) -> None: + self.add(Message(role=role, content=content, **kwargs)) + + def extend(self, messages: List[Message]) -> None: + self._messages.extend(messages) + + def clear(self) -> None: + self._messages.clear() + self._condensations = 0 + + # ------------------------------------------------------------------ + # Inspection + # ------------------------------------------------------------------ + def total_tokens(self) -> int: + return sum(m.tokens for m in self._messages) + + def stats(self) -> ContextStats: + total = self.total_tokens() + return ContextStats( + prompt_tokens=total, + max_tokens=self.policy.max_tokens, + ratio=total / self.policy.max_tokens if self.policy.max_tokens else 0.0, + condensations=self._condensations, + ) + + def messages(self) -> List[Message]: + return list(self._messages) + + # ------------------------------------------------------------------ + # Condensation + # ------------------------------------------------------------------ + def needs_condense(self) -> bool: + return self.total_tokens() >= self.policy.condense_at + + def condense(self) -> int: + """Fold older non-essential messages into a single summary entry. + + Returns the number of tokens removed. A no-op when nothing + eligible is found, which leaves the running total unchanged. + """ + if not self._messages: + return 0 + + before = self.total_tokens() + + # 1. Drop oversize tool outputs first. + for m in self._messages: + if ( + m.role == "tool" + and m.importance != "pinned" + and m.tokens >= self.policy.large_tool_output_tokens + ): + replacement = "_tool output dropped to free context budget_" + m.content = replacement + m.tokens = estimate_tokens(replacement) + m.meta = {**m.meta, "condensed": "1"} + + if self.total_tokens() < self.policy.condense_at: + self._condensations += 1 + return before - self.total_tokens() + + # 2. Split keep-recent vs. condensable. + pinned: List[Message] = [m for m in self._messages if m.importance == "pinned"] + rest: List[Message] = [m for m in self._messages if m.importance != "pinned"] + keep_n = max(0, self.policy.keep_recent_turns) + condensable = rest[:-keep_n] if keep_n else rest + kept_recent = rest[-keep_n:] if keep_n else [] + + if not condensable: + self._condensations += 1 + return before - self.total_tokens() + + summary_text = self._summariser(condensable) + summary_msg = Message( + role="system", + content=f"## {self.policy.summary_label}\n\n{summary_text}", + importance="pinned", + meta={"summary": "1"}, + ) + + self._messages = pinned + [summary_msg] + kept_recent + self._condensations += 1 + return before - self.total_tokens() + + def maybe_condense(self) -> int: + """Condense iff the budget is over the threshold.""" + if self.needs_condense(): + return self.condense() + return 0 diff --git a/gitpilot/context_cache.py b/gitpilot/context_cache.py new file mode 100644 index 0000000..4993a47 --- /dev/null +++ b/gitpilot/context_cache.py @@ -0,0 +1,257 @@ +# gitpilot/context_cache.py +"""In-process LRU memoisation for the workspace context pack. + +Batch P2-C β€” additive. :func:`gitpilot.context_pack.build_context_pack` +re-scans the workspace on every turn, which is the right behaviour for +correctness but wasteful when nothing has changed: most turns reuse +the same conventions, the same active use case, and the same vault +chunks. + +``build_cached`` wraps the original builder with an LRU keyed on the +workspace path, the active mode slug, the query string, and a digest +of the *mtimes* of the files that contribute to the pack. Because the +key incorporates mtimes, edits to the relevant files invalidate the +cache automatically. Callers must not edit files via the cache layer +itself β€” touching ``AGENTS.md`` or ``.gitpilot/*`` is enough. + +Behaviour matrix +---------------- +* ``context_cache`` flag off (default) β†’ straight passthrough to + :func:`gitpilot.context_pack.build_context_pack`. Zero new state. +* Flag on β†’ memoised; cache size capped to keep memory bounded. + +The cache is *strict per workspace*: cross-workspace contamination is +impossible because the workspace path is part of the key. +""" +from __future__ import annotations + +import hashlib +import logging +import threading +import time +from collections import OrderedDict +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, Iterable, Optional, Tuple + +from . import flags + +logger = logging.getLogger(__name__) + +FLAG_CONTEXT_CACHE = "context_cache" +DEFAULT_CACHE_SIZE = 32 + +# Files that contribute to the cache key β€” touching any of these +# invalidates the entry on the next call. +_FINGERPRINT_FILES: Tuple[str, ...] = ( + "AGENTS.md", + ".gitpilot/AGENTS.md", + ".gitpilot/GITPILOT.md", + ".gitpilot/modes.yaml", + ".gitpilotrules", +) +_FINGERPRINT_DIRS: Tuple[str, ...] = ( + ".gitpilot/rules", + ".gitpilot/skills", + ".gitpilot/uploads", +) + + +# ---------------------------------------------------------------------- +# Stats +# ---------------------------------------------------------------------- + +@dataclass +class CacheStats: + """Snapshot of the in-process cache state.""" + + size: int + capacity: int + hits: int + misses: int + + @property + def hit_ratio(self) -> float: + total = self.hits + self.misses + return (self.hits / total) if total else 0.0 + + def to_dict(self) -> Dict[str, Any]: + return { + "size": self.size, + "capacity": self.capacity, + "hits": self.hits, + "misses": self.misses, + "hit_ratio": round(self.hit_ratio, 4), + } + + +# ---------------------------------------------------------------------- +# Cache +# ---------------------------------------------------------------------- + +class _LRUCache: + """Tiny LRU keyed on ``(workspace, mode, query, mtime_digest)``.""" + + def __init__(self, capacity: int = DEFAULT_CACHE_SIZE) -> None: + self._capacity = max(1, int(capacity)) + self._store: "OrderedDict[Tuple[str, Optional[str], str, str], str]" = OrderedDict() + self._lock = threading.RLock() + self._hits = 0 + self._misses = 0 + + def get(self, key: Tuple[str, Optional[str], str, str]) -> Optional[str]: + with self._lock: + value = self._store.get(key) + if value is None: + self._misses += 1 + return None + self._store.move_to_end(key) + self._hits += 1 + return value + + def put(self, key: Tuple[str, Optional[str], str, str], value: str) -> None: + with self._lock: + self._store[key] = value + self._store.move_to_end(key) + while len(self._store) > self._capacity: + self._store.popitem(last=False) + + def clear(self) -> None: + with self._lock: + self._store.clear() + self._hits = 0 + self._misses = 0 + + def stats(self) -> CacheStats: + with self._lock: + return CacheStats( + size=len(self._store), + capacity=self._capacity, + hits=self._hits, + misses=self._misses, + ) + + +_CACHE = _LRUCache() + + +def get_cache_stats() -> CacheStats: + """Return a snapshot of the global cache state.""" + return _CACHE.stats() + + +def clear_cache() -> None: + """Drop every cached entry. Useful for tests and ``/admin`` hooks.""" + _CACHE.clear() + + +def set_capacity(capacity: int) -> None: + """Resize the cache. Effective on the next ``put``.""" + global _CACHE + new_cache = _LRUCache(capacity=capacity) + # Preserve recent entries up to the new capacity. + with _CACHE._lock: + for key, value in list(_CACHE._store.items())[-capacity:]: + new_cache.put(key, value) + new_cache._hits = _CACHE._hits + new_cache._misses = _CACHE._misses + _CACHE = new_cache + + +# ---------------------------------------------------------------------- +# Public builder +# ---------------------------------------------------------------------- + +def build_cached( + workspace_path: Path, + query: str = "", + *, + mode_slug: Optional[str] = None, + enabled: Optional[bool] = None, + **builder_kwargs: object, +) -> str: + """Memoised wrapper around :func:`context_pack.build_context_pack`. + + When the ``context_cache`` flag is off (or ``enabled=False``) + this calls the underlying builder directly and returns its output β€” + nothing is cached. Otherwise the result is keyed on + ``(workspace, mode_slug, query, mtime_digest)`` and reused on hit. + """ + from .context_pack import build_context_pack # local import (avoid cycle) + + flag_on = enabled if enabled is not None else flags.is_on(FLAG_CONTEXT_CACHE) + if not flag_on: + return build_context_pack(workspace_path, query=query, **builder_kwargs) # type: ignore[arg-type] + + workspace_path = workspace_path.resolve() + digest = _mtimes_digest(workspace_path) + key = (str(workspace_path), mode_slug, query, digest) + hit = _CACHE.get(key) + if hit is not None: + return hit + value = build_context_pack(workspace_path, query=query, **builder_kwargs) # type: ignore[arg-type] + _CACHE.put(key, value) + return value + + +# ---------------------------------------------------------------------- +# Mtime digest +# ---------------------------------------------------------------------- + +def _mtimes_digest(workspace_path: Path) -> str: + """SHA-256 of (path, mtime_ns) pairs for the fingerprint set.""" + h = hashlib.sha256() + for rel in _FINGERPRINT_FILES: + path = workspace_path / rel + if path.exists() and path.is_file(): + try: + stat = path.stat() + except OSError: + continue + h.update(rel.encode("utf-8")) + h.update(b"\0") + h.update(str(stat.st_mtime_ns).encode("ascii")) + h.update(b"\0") + h.update(str(stat.st_size).encode("ascii")) + h.update(b"\0") + for rel in _FINGERPRINT_DIRS: + directory = workspace_path / rel + if not directory.is_dir(): + continue + for child in sorted(_walk_files(directory)): + try: + stat = child.stat() + except OSError: + continue + h.update(str(child).encode("utf-8")) + h.update(b"\0") + h.update(str(stat.st_mtime_ns).encode("ascii")) + h.update(b"\0") + return h.hexdigest()[:32] + + +def _walk_files(directory: Path) -> Iterable[Path]: + for child in directory.rglob("*"): + if child.is_file(): + yield child + + +# ---------------------------------------------------------------------- +# Maintenance utilities (mostly for tests / admin) +# ---------------------------------------------------------------------- + +def warm(workspace_path: Path, queries: Iterable[str], *, mode_slug: Optional[str] = None) -> int: + """Pre-populate the cache for a list of common queries. Returns the + number of entries inserted (cache may already contain some).""" + inserted = 0 + for q in queries: + start_size = _CACHE.stats().size + build_cached(workspace_path, q, mode_slug=mode_slug, enabled=True) + if _CACHE.stats().size != start_size: + inserted += 1 + return inserted + + +def now() -> float: + """Wall-clock helper used by tests that want monotonic timestamps.""" + return time.monotonic() diff --git a/gitpilot/context_meter.py b/gitpilot/context_meter.py new file mode 100644 index 0000000..f565b6a --- /dev/null +++ b/gitpilot/context_meter.py @@ -0,0 +1,385 @@ +"""Context-window usage meter β€” read-only snapshot for the chat UI. + +Computes the active LLM's context-window utilisation: provider, model, +token budget, what's currently occupying that budget, and a short +human-readable description of the agent topology in use. + +Token counting is best-effort. When :mod:`tiktoken` is available we use +it (cl100k_base β€” accurate for OpenAI/Anthropic). For local providers +without a published tokenizer we fall back to a ``len(text) // 4`` +heuristic; callers can recognise that case via ``is_estimate=True`` and +the UI prefixes the numbers with ``β‰ˆ`` to flag the imprecision. + +Pure, side-effect-free, no I/O beyond reading settings β€” safe to call +from a hot endpoint on every popover open. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Dict, Iterable, Mapping, Optional + +from .context_budget import _TIKTOKEN, estimate_tokens +from .settings import AppSettings, LLMProvider + +FLAG_CONTEXT_METER = "context_meter" + +# ---------------------------------------------------------------------- +# Context-window catalogue +# ---------------------------------------------------------------------- +# Conservative values β€” when in doubt round DOWN. We'd rather show a +# user "94% full" against a 7 800-token estimate than claim "47% full" +# against a 16 000 number the provider won't actually honour. + +_DEFAULT_CONTEXT_WINDOW = 8_192 + +_OPENAI_WINDOWS: Mapping[str, int] = { + "gpt-4o": 128_000, + "gpt-4o-mini": 128_000, + "gpt-4-turbo": 128_000, + "gpt-4": 8_192, + "gpt-3.5-turbo": 16_385, + "o1": 200_000, + "o1-mini": 128_000, + "o3-mini": 200_000, +} + +_CLAUDE_WINDOWS: Mapping[str, int] = { + "claude-opus-4-7": 200_000, + "claude-sonnet-4-6": 200_000, + "claude-sonnet-4-5": 200_000, + "claude-haiku-4-5": 200_000, + "claude-3-7-sonnet": 200_000, + "claude-3-5-sonnet": 200_000, + "claude-3-5-haiku": 200_000, + "claude-3-opus": 200_000, + "claude-3-sonnet": 200_000, + "claude-3-haiku": 200_000, +} + +_WATSONX_WINDOWS: Mapping[str, int] = { + "meta-llama/llama-3-3-70b-instruct": 131_072, + "meta-llama/llama-3-1-70b-instruct": 131_072, + "meta-llama/llama-3-1-8b-instruct": 131_072, + "ibm/granite-3-8b-instruct": 4_096, +} + +# Ollama / OllaBridge β€” keyed on the *family* prefix. Anything not +# matched falls back to the conservative 8 k default. These are the +# advertised values; users running with a smaller ``num_ctx`` will see +# the bar fill faster than expected, which is the safe direction. +_OLLAMA_FAMILY_WINDOWS: Mapping[str, int] = { + "llama3": 8_192, + "llama3.1": 131_072, + "llama3.2": 131_072, + "llama2": 4_096, + "qwen2.5": 32_768, + "qwen2": 32_768, + "mistral": 32_768, + "mixtral": 32_768, + "phi3": 4_096, + "phi": 2_048, + "gemma2": 8_192, + "gemma": 8_192, + "codellama": 16_384, + "deepseek-coder": 16_384, +} + + +def _ollama_window(model: str) -> int: + """Look up the context window for an Ollama model tag (e.g. ``llama3:8b``).""" + family = model.split(":", 1)[0].lower() + if family in _OLLAMA_FAMILY_WINDOWS: + return _OLLAMA_FAMILY_WINDOWS[family] + # Try a prefix match for variants like "llama3.1:8b-instruct". + for prefix, window in _OLLAMA_FAMILY_WINDOWS.items(): + if family.startswith(prefix): + return window + return _DEFAULT_CONTEXT_WINDOW + + +# ---------------------------------------------------------------------- +# Public dataclass +# ---------------------------------------------------------------------- + +@dataclass +class ContextUsage: + """Snapshot of the active model's context-window utilisation.""" + + provider: str + model: str + context_window: int + used: int + reserved_response: int + topology: str + tool_count: int + breakdown: Dict[str, int] = field(default_factory=dict) + is_estimate: bool = False + """True when token counts come from the chars/4 heuristic rather than + a real tokenizer. The UI prefixes such numbers with ``β‰ˆ``.""" + + @property + def free(self) -> int: + return max(0, self.context_window - self.used - self.reserved_response) + + @property + def percent_used(self) -> float: + if self.context_window <= 0: + return 0.0 + return round(100.0 * self.used / self.context_window, 1) + + def to_dict(self) -> Dict[str, object]: + return { + "provider": self.provider, + "model": self.model, + "context_window": self.context_window, + "used": self.used, + "reserved_response": self.reserved_response, + "free": self.free, + "percent_used": self.percent_used, + "topology": self.topology, + "tool_count": self.tool_count, + "breakdown": dict(self.breakdown), + "is_estimate": self.is_estimate, + } + + +# ---------------------------------------------------------------------- +# Resolvers +# ---------------------------------------------------------------------- + +def resolve_provider_model(settings: AppSettings) -> tuple[str, str]: + """Return ``(provider_display_name, model_id)`` for the active config.""" + p = settings.provider + if p == LLMProvider.openai: + return ("OpenAI", settings.openai.model or "gpt-4o-mini") + if p == LLMProvider.claude: + return ("Anthropic", settings.claude.model or "claude-sonnet-4-5") + if p == LLMProvider.watsonx: + return ("watsonx", settings.watsonx.model_id or "") + if p == LLMProvider.ollama: + return ("Ollama", settings.ollama.model or "llama3") + if p == LLMProvider.ollabridge: + return ("OllaBridge", settings.ollabridge.model or "") + return (str(p), "") + + +def resolve_context_window(settings: AppSettings) -> int: + """Return the advertised context-window size for the active model.""" + p = settings.provider + if p == LLMProvider.openai: + return _OPENAI_WINDOWS.get(settings.openai.model, _DEFAULT_CONTEXT_WINDOW) + if p == LLMProvider.claude: + return _CLAUDE_WINDOWS.get(settings.claude.model, 200_000) + if p == LLMProvider.watsonx: + return _WATSONX_WINDOWS.get(settings.watsonx.model_id, _DEFAULT_CONTEXT_WINDOW) + if p == LLMProvider.ollama: + return _ollama_window(settings.ollama.model) + if p == LLMProvider.ollabridge: + return _ollama_window(settings.ollabridge.model) + return _DEFAULT_CONTEXT_WINDOW + + +def has_real_tokenizer(settings: AppSettings) -> bool: + """True when token counts will come from a real tokenizer rather + than the chars/4 heuristic. ``cl100k_base`` is a reasonable + approximation for OpenAI and Anthropic; local model tokenizers are + not bundled, so Ollama/OllaBridge falls back to the estimate.""" + if _TIKTOKEN is None: + return False + return settings.provider in (LLMProvider.openai, LLMProvider.claude) + + +# ---------------------------------------------------------------------- +# Topology string +# ---------------------------------------------------------------------- + +def describe_topology( + *, + lite_mode: bool, + tool_count: int, + extra_tools: int = 0, +) -> str: + """Build the one-line topology description shown in the popover. + + ``extra_tools`` covers MCP / plugin tools registered at runtime β€” the + caller passes it in so this module stays import-free of those + optional subsystems. + """ + total_tools = tool_count + extra_tools + if lite_mode: + return "lite Β· prompt-only Β· 0 tools Β· no repo I/O" + return f"single-agent Β· CrewAI ReAct Β· {total_tools} tools" + + +# ---------------------------------------------------------------------- +# Token-count helpers +# ---------------------------------------------------------------------- + +def count_tokens(text: str) -> int: + """Thin wrapper around :func:`context_budget.estimate_tokens` so + callers don't have to know about the fallback hierarchy.""" + return estimate_tokens(text) + + +def sum_tokens(texts: Iterable[str]) -> int: + return sum(count_tokens(t) for t in texts if t) + + +# ---------------------------------------------------------------------- +# Real breakdown sources +# ---------------------------------------------------------------------- + +# Snapshot of the planner / executor / explorer persona strings that go +# into every LLM call. We pin them here as constants (rather than +# importing from ``agentic``) so this module stays import-light and the +# token math is deterministic in tests. When those personae change in +# ``agentic.py``, update these strings. +_PLANNER_BACKSTORY = ( + "You are an experienced staff engineer who creates plans based on FACTS, not assumptions. " + "You have received a complete exploration report of the repository. " + "You ONLY create plans for files that actually exist in the exploration report. " + "You are extremely careful with DELETE actions - you verify the file exists " + "and that it's not on the 'keep' list before marking it for deletion. " + "When users ask to delete files, you delete individual FILES, not directory names. " + "When users ask to ANALYZE files and GENERATE new content (code, docs, examples), " + "you create plans that READ existing files and CREATE new files with generated content. " + "You understand that 'analyze X and create Y' means: use tools to read X, then plan to CREATE Y. " + "You never make changes yourself, only create detailed plans." +) + +_PLANNER_ROLE = "Repository Refactor Planner" +_PLANNER_GOAL = ( + "Design safe, step-by-step refactor plans based on ACTUAL repository state " + "discovered during exploration" +) + +_EXPLORER_ROLE = "Repository Explorer" +_EXPLORER_GOAL = ( + "Thoroughly explore the repository structure, identify key files, and report findings" +) +_EXPLORER_BACKSTORY = ( + "You are a meticulous code archaeologist. You use the available tools to " + "list files, read content, and build a complete picture of the repository " + "before any change is planned." +) + +_LITE_ROLE = "GitPilot Lite" +_LITE_GOAL = "Help the user with their repository" +_LITE_BACKSTORY = "You are a helpful coding assistant. Be concise." + + +def system_prompt_text(*, lite_mode: bool) -> str: + """Return the persona text that the active topology will inject into + every LLM call. Used for the ``system_prompt`` breakdown row.""" + if lite_mode: + return " ".join((_LITE_ROLE, _LITE_GOAL, _LITE_BACKSTORY)) + return " ".join( + ( + _EXPLORER_ROLE, + _EXPLORER_GOAL, + _EXPLORER_BACKSTORY, + _PLANNER_ROLE, + _PLANNER_GOAL, + _PLANNER_BACKSTORY, + ) + ) + + +def count_system_prompt_tokens(*, lite_mode: bool) -> int: + return count_tokens(system_prompt_text(lite_mode=lite_mode)) + + +def count_messages_tokens(messages: Iterable[object]) -> int: + """Sum estimated tokens over an iterable of message-like objects. + + Accepts any object exposing a ``.content`` attribute (matches the + :class:`gitpilot.session.Message` dataclass) or a ``"content"`` + mapping key. Other shapes are ignored, which is the safe default + for partially-typed history records. + """ + total = 0 + for m in messages: + if m is None: + continue + if isinstance(m, dict): + content = m.get("content") + else: + content = getattr(m, "content", None) + if isinstance(content, str) and content: + total += count_tokens(content) + return total + + +def count_tool_schema_tokens(tool_lists: Iterable[Iterable[object]]) -> int: + """Sum tokens over every tool's ``name`` + ``description`` + JSON + schema across the supplied tool lists. This approximates what the + LLM sees in its function/tool-calling preamble. + + Tools that don't expose name/description are skipped silently β€” + we're not the place to enforce CrewAI tool contracts. + """ + import json as _json + + total = 0 + for group in tool_lists: + if not group: + continue + for tool in group: + name = getattr(tool, "name", "") or "" + description = getattr(tool, "description", "") or "" + schema = getattr(tool, "args_schema", None) + schema_text = "" + if schema is not None: + # Pydantic v2 model class β€” model_json_schema() is cheap. + model_schema = getattr(schema, "model_json_schema", None) + if callable(model_schema): + try: + schema_text = _json.dumps(model_schema()) + except Exception: # pragma: no cover - defensive + schema_text = "" + else: + schema_text = str(schema) + total += count_tokens(f"{name} {description} {schema_text}") + return total + + +# ---------------------------------------------------------------------- +# Top-level builder +# ---------------------------------------------------------------------- + +# Reserved-for-response budget: the LLM needs headroom to actually emit +# an answer. 4 k is a sane fixed value across providers β€” small enough +# not to crowd Ollama's 8 k window, large enough for a reasonable plan. +RESERVED_RESPONSE_TOKENS = 4_096 + + +def build_usage( + settings: AppSettings, + *, + breakdown: Mapping[str, int], + tool_count: int, + lite_mode: bool, + extra_tools: int = 0, + reserved_response: Optional[int] = None, +) -> ContextUsage: + """Assemble a :class:`ContextUsage` from the inputs the API endpoint + can cheaply collect. All token counts come from the caller β€” this + function only does arithmetic and lookup, so it's trivially testable.""" + provider, model = resolve_provider_model(settings) + window = resolve_context_window(settings) + reserved = RESERVED_RESPONSE_TOKENS if reserved_response is None else reserved_response + used = sum(int(v) for v in breakdown.values() if v) + topology = describe_topology( + lite_mode=lite_mode, tool_count=tool_count, extra_tools=extra_tools + ) + return ContextUsage( + provider=provider, + model=model, + context_window=window, + used=used, + reserved_response=reserved, + topology=topology, + tool_count=tool_count + extra_tools, + breakdown=dict(breakdown), + is_estimate=not has_real_tokenizer(settings), + ) diff --git a/gitpilot/doctor.py b/gitpilot/doctor.py new file mode 100644 index 0000000..b915124 --- /dev/null +++ b/gitpilot/doctor.py @@ -0,0 +1,370 @@ +# gitpilot/doctor.py +"""``gitpilot doctor`` β€” install + environment health check. + +Reports a green / amber / red status for each prerequisite GitPilot needs. +Built to halve install-time support load: a single command tells the user +what's missing and how to fix it. + +The implementation is pure-stdlib + optional ``rich`` for pretty output. +``--offline`` skips every network probe so the command stays under the +2-second budget on a healthy machine. ``--json`` emits a machine-readable +payload for CI use. + +This module is invoked through :mod:`gitpilot.cli` but works standalone:: + + python -m gitpilot.doctor --json +""" +from __future__ import annotations + +import dataclasses +import json +import os +import platform +import shutil +import subprocess +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Sequence + + +# ---------------------------------------------------------------------- +# Status model +# ---------------------------------------------------------------------- + +LEVELS = ("green", "amber", "red") + + +@dataclass +class CheckResult: + """Outcome of a single health check.""" + + name: str + level: str # "green" | "amber" | "red" + summary: str + hint: Optional[str] = None + detail: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return dataclasses.asdict(self) + + +@dataclass +class DoctorReport: + """Aggregate report for one run.""" + + results: List[CheckResult] = field(default_factory=list) + duration_ms: int = 0 + offline: bool = False + + @property + def worst_level(self) -> str: + ranking = {"green": 0, "amber": 1, "red": 2} + return max((r.level for r in self.results), key=lambda lvl: ranking.get(lvl, 0), default="green") + + @property + def exit_code(self) -> int: + return {"green": 0, "amber": 0, "red": 1}.get(self.worst_level, 1) + + def to_dict(self) -> Dict[str, Any]: + return { + "results": [r.to_dict() for r in self.results], + "duration_ms": self.duration_ms, + "offline": self.offline, + "worst_level": self.worst_level, + "exit_code": self.exit_code, + } + + +# ---------------------------------------------------------------------- +# Individual checks (each returns a CheckResult) +# ---------------------------------------------------------------------- + +def check_python() -> CheckResult: + major, minor = sys.version_info.major, sys.version_info.minor + if major == 3 and minor >= 11: + return CheckResult("python", "green", f"Python {major}.{minor} ({platform.python_implementation()})") + return CheckResult( + "python", "red", + f"Python {major}.{minor} is too old", + hint="GitPilot requires Python >= 3.11. Install via uv: `uv python install 3.11`.", + ) + + +def check_node() -> CheckResult: + path = shutil.which("node") + if not path: + return CheckResult( + "node", "amber", + "node not found on PATH", + hint="Optional for the frontend. Install via nvm or your package manager.", + ) + try: + out = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=2, check=False) + version = out.stdout.strip() or "unknown" + except Exception as exc: # pragma: no cover - defensive + return CheckResult("node", "amber", f"node failed to run: {exc}") + return CheckResult("node", "green", f"node {version}") + + +def check_uv() -> CheckResult: + path = shutil.which("uv") + if not path: + return CheckResult( + "uv", "amber", + "uv not found on PATH", + hint="Optional but recommended. Install via `pip install uv` or the official installer.", + ) + try: + out = subprocess.run([path, "--version"], capture_output=True, text=True, timeout=2, check=False) + version = out.stdout.strip() or "unknown" + except Exception as exc: # pragma: no cover - defensive + return CheckResult("uv", "amber", f"uv failed to run: {exc}") + return CheckResult("uv", "green", version) + + +def check_workspace_files(workspace: Path) -> CheckResult: + workspace = workspace.resolve() + agents_md = workspace / "AGENTS.md" + modes = workspace / ".gitpilot" / "modes.yaml" + bits: List[str] = [] + level = "green" + hint: Optional[str] = None + if agents_md.exists(): + bits.append("AGENTS.md βœ“") + else: + bits.append("AGENTS.md missing") + level = "amber" + hint = "Run `gitpilot init` to generate a starter AGENTS.md." + if modes.exists(): + bits.append(".gitpilot/modes.yaml βœ“") + else: + bits.append(".gitpilot/modes.yaml missing") + return CheckResult("workspace", level, ", ".join(bits), hint=hint) + + +def check_modes_parses(workspace: Path) -> CheckResult: + path = workspace / ".gitpilot" / "modes.yaml" + if not path.exists(): + return CheckResult("modes.yaml", "amber", "no modes.yaml in this workspace") + try: + from gitpilot.modes import ModeRegistry # local import to keep doctor light + registry = ModeRegistry() + count = registry.load(workspace_path=workspace) + return CheckResult("modes.yaml", "green", f"parsed {count} mode(s)") + except Exception as exc: + return CheckResult( + "modes.yaml", "red", + "modes.yaml did not parse", + hint="Open the file and check for YAML syntax errors.", + detail=str(exc), + ) + + +def check_sandbox_reachable(*, offline: bool) -> CheckResult: + from gitpilot.sandbox import ( # local import + BACKEND_MATRIXLAB, + BACKEND_OFF, + BACKEND_SUBPROCESS, + get_sandbox, + ) + sb = get_sandbox() + backend = sb.backend + if backend == BACKEND_OFF: + return CheckResult( + "sandbox", "amber", + "sandbox disabled (BACKEND_OFF)", + hint="Set GITPILOT_SANDBOX=subprocess (default) or matrixlab.", + ) + if backend == BACKEND_SUBPROCESS: + return CheckResult("sandbox", "green", "subprocess backend ready") + if backend == BACKEND_MATRIXLAB: + if offline: + return CheckResult("sandbox", "amber", "matrixlab backend (skipped probe β€” offline)") + import asyncio + import contextlib + try: + health = asyncio.run(asyncio.wait_for(sb.health(), timeout=2)) + except Exception as exc: + return CheckResult( + "sandbox", "red", + "matrixlab backend not reachable", + hint="Start the runner or set GITPILOT_MATRIXLAB_URL.", + detail=str(exc), + ) + finally: + close = getattr(sb, "aclose", None) + if callable(close): # pragma: no branch + with contextlib.suppress(Exception): + asyncio.run(close()) + if health.get("ok"): + return CheckResult("sandbox", "green", "matrixlab runner reachable") + return CheckResult( + "sandbox", "red", + "matrixlab runner unhealthy", + detail=str(health.get("error", "")), + ) + return CheckResult("sandbox", "amber", f"unknown backend: {backend}") + + +def check_mcp_config(workspace: Path) -> CheckResult: + project = workspace / ".gitpilot" / "mcp.json" + user = Path.home() / ".gitpilot" / "mcp.json" + files = [p for p in (project, user) if p.exists()] + if not files: + return CheckResult("mcp", "amber", "no mcp.json found (project or user)") + try: + servers: List[str] = [] + for path in files: + data = json.loads(path.read_text(encoding="utf-8")) + for entry in data.get("servers", []) if isinstance(data, dict) else []: + if isinstance(entry, dict) and entry.get("name"): + servers.append(str(entry["name"])) + return CheckResult("mcp", "green", f"{len(servers)} MCP server(s) configured: {', '.join(sorted(set(servers))) or '(none)'}") + except Exception as exc: + return CheckResult("mcp", "red", "mcp.json did not parse", detail=str(exc)) + + +_API_KEY_HINTS = { + "openai": "Set OPENAI_API_KEY", + "anthropic": "Set ANTHROPIC_API_KEY", + "watsonx": "Set WATSONX_API_KEY (and WATSONX_PROJECT_ID)", + "ollama": "Run `ollama serve` locally; no key needed", +} + +_API_KEY_ENVS = { + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + "watsonx": "WATSONX_API_KEY", +} + + +def check_model_credentials() -> CheckResult: + provider = (os.environ.get("GITPILOT_LLM_PROVIDER") or "").lower() + if not provider: + # Best-effort: check whether any known env var is set. + present = [name for name, env in _API_KEY_ENVS.items() if os.environ.get(env)] + if present: + return CheckResult("model", "green", f"credential(s) present: {', '.join(present)}") + return CheckResult( + "model", "amber", + "no GITPILOT_LLM_PROVIDER set and no provider API key in env", + hint="Set GITPILOT_LLM_PROVIDER and the matching API key, or use ollama locally.", + ) + if provider == "ollama": + return CheckResult("model", "green", "provider=ollama (no API key needed)") + env = _API_KEY_ENVS.get(provider) + if env and os.environ.get(env): + return CheckResult("model", "green", f"provider={provider} ({env} set)") + return CheckResult( + "model", "red", + f"provider={provider} but credential is missing", + hint=_API_KEY_HINTS.get(provider, f"Set the API key env var for {provider}"), + ) + + +def check_frontend_bundle() -> CheckResult: + bundle_dir = Path(__file__).parent / "web" + index = bundle_dir / "index.html" + if not bundle_dir.exists(): + return CheckResult( + "frontend", "amber", + "frontend bundle not packaged", + hint="Run `make frontend-build` to produce the static bundle.", + ) + if not index.exists(): + return CheckResult( + "frontend", "amber", + "frontend bundle present but index.html missing", + ) + return CheckResult("frontend", "green", f"bundle at {bundle_dir}") + + +# ---------------------------------------------------------------------- +# Orchestrator +# ---------------------------------------------------------------------- + +CheckFn = Callable[[], CheckResult] + + +def _build_checks(workspace: Path, *, offline: bool) -> Sequence[CheckFn]: + return ( + check_python, + check_node, + check_uv, + lambda: check_workspace_files(workspace), + lambda: check_modes_parses(workspace), + lambda: check_sandbox_reachable(offline=offline), + lambda: check_mcp_config(workspace), + check_model_credentials, + check_frontend_bundle, + ) + + +def run_checks( + workspace: Optional[Path] = None, + *, + offline: bool = False, +) -> DoctorReport: + """Execute every check and return a :class:`DoctorReport`.""" + workspace = (workspace or Path.cwd()).resolve() + report = DoctorReport(offline=offline) + start = time.monotonic() + for fn in _build_checks(workspace, offline=offline): + try: + report.results.append(fn()) + except Exception as exc: # pragma: no cover - defensive + report.results.append( + CheckResult(getattr(fn, "__name__", "check"), "red", "check failed", detail=str(exc)) + ) + report.duration_ms = int((time.monotonic() - start) * 1000) + return report + + +# ---------------------------------------------------------------------- +# Renderers +# ---------------------------------------------------------------------- + +_LEVEL_GLYPHS = {"green": "βœ…", "amber": "⚠️ ", "red": "❌"} + + +def render_text(report: DoctorReport) -> str: + """Render a plain-text table. Used by both Typer and ``python -m``.""" + width = max((len(r.name) for r in report.results), default=8) + lines = ["gitpilot doctor"] + lines.append("-" * 60) + for r in report.results: + glyph = _LEVEL_GLYPHS.get(r.level, "?") + lines.append(f"{glyph} {r.name.ljust(width)} {r.summary}") + if r.hint: + lines.append(f" ↳ {r.hint}") + lines.append("-" * 60) + lines.append(f"worst: {report.worst_level} duration: {report.duration_ms} ms") + return "\n".join(lines) + + +def render_json(report: DoctorReport) -> str: + """Render a :class:`DoctorReport` as indented JSON for CI consumption.""" + return json.dumps(report.to_dict(), indent=2) + + +# ---------------------------------------------------------------------- +# Module-level CLI ``python -m gitpilot.doctor`` +# ---------------------------------------------------------------------- + +def _module_main(argv: Optional[Sequence[str]] = None) -> int: + import argparse + + parser = argparse.ArgumentParser(prog="gitpilot.doctor") + parser.add_argument("--workspace", type=Path, default=Path.cwd()) + parser.add_argument("--offline", action="store_true") + parser.add_argument("--json", action="store_true") + args = parser.parse_args(argv) + report = run_checks(args.workspace, offline=args.offline) + print(render_json(report) if args.json else render_text(report)) + return report.exit_code + + +if __name__ == "__main__": # pragma: no cover - manual entry + raise SystemExit(_module_main()) diff --git a/gitpilot/errors.py b/gitpilot/errors.py new file mode 100644 index 0000000..1913ef4 --- /dev/null +++ b/gitpilot/errors.py @@ -0,0 +1,205 @@ +# gitpilot/errors.py +"""Structured error envelope β€” Batch P1-D. + +Lets every backend endpoint return a uniform error shape that the UI can +render as a friendly block:: + + { + "error": { + "code": "sandbox.unreachable", + "message": "MatrixLab runner did not respond", + "hint": "Set GITPILOT_MATRIXLAB_URL or start the runner.", + "doc_url": "https://docs.gitpilot.dev/errors/sandbox-unreachable" + }, + "trace_id": "…" + } + +The envelope is opt-in via the ``error_envelope`` feature flag and the +:func:`wrap_errors_envelope` decorator. When the flag is off (the +legacy default) the decorator is a passthrough β€” uncaught exceptions +bubble up to FastAPI exactly as before so existing clients see no +change. +""" +from __future__ import annotations + +import functools +import logging +import traceback +import uuid +from dataclasses import dataclass +from typing import Any, Awaitable, Callable, Dict, Optional, TypeVar, cast + +from . import flags + +logger = logging.getLogger(__name__) + +FLAG_ERROR_ENVELOPE = "error_envelope" +DEFAULT_DOC_BASE = "https://docs.gitpilot.dev/errors" + +F = TypeVar("F", bound=Callable[..., Awaitable[Any]]) + + +# ---------------------------------------------------------------------- +# Public exception type +# ---------------------------------------------------------------------- + +@dataclass +class GitPilotError(Exception): + """Base error carrying structured fields for the envelope. + + ``code`` should be a dotted, stable identifier (``sandbox.unreachable``) + that the UI can branch on; ``message`` is human-readable; ``hint`` + suggests a remedy; ``doc_url`` deep-links to documentation. + """ + + code: str + message: str + hint: Optional[str] = None + doc_url: Optional[str] = None + status_code: int = 500 + + def __post_init__(self) -> None: + super().__init__(self.message) + + def to_envelope(self, trace_id: Optional[str] = None) -> Dict[str, Any]: + return error_envelope(self, trace_id=trace_id) + + +# Convenience subclasses for common categories. + +class ValidationError(GitPilotError): + """Raised when a request fails input validation (HTTP 400).""" + + def __init__(self, message: str, *, hint: Optional[str] = None) -> None: + super().__init__( + code="request.invalid", + message=message, + hint=hint, + doc_url=f"{DEFAULT_DOC_BASE}/request-invalid", + status_code=400, + ) + + +class NotFoundError(GitPilotError): + """Raised when a requested resource is missing (HTTP 404).""" + + def __init__(self, message: str, *, hint: Optional[str] = None) -> None: + super().__init__( + code="resource.not_found", + message=message, + hint=hint, + doc_url=f"{DEFAULT_DOC_BASE}/resource-not-found", + status_code=404, + ) + + +class UpstreamError(GitPilotError): + """Raised when an upstream provider (LLM, MCP, GitHub) returns an + unrecoverable error (HTTP 502).""" + + def __init__(self, message: str, *, hint: Optional[str] = None, code: str = "upstream.failure") -> None: + super().__init__( + code=code, + message=message, + hint=hint, + doc_url=f"{DEFAULT_DOC_BASE}/upstream-failure", + status_code=502, + ) + + +# ---------------------------------------------------------------------- +# Envelope construction +# ---------------------------------------------------------------------- + +def error_envelope( + err: BaseException, + *, + trace_id: Optional[str] = None, + fallback_code: str = "internal.unexpected", +) -> Dict[str, Any]: + """Render an exception as the canonical error payload.""" + if isinstance(err, GitPilotError): + body: Dict[str, Any] = { + "code": err.code, + "message": err.message, + } + if err.hint: + body["hint"] = err.hint + if err.doc_url: + body["doc_url"] = err.doc_url + else: + body = { + "code": fallback_code, + "message": str(err) or err.__class__.__name__, + "hint": "Re-run with GITPILOT_DEBUG=1 for a traceback in the server log.", + "doc_url": f"{DEFAULT_DOC_BASE}/internal-unexpected", + } + return { + "error": body, + "trace_id": trace_id or _new_trace_id(), + } + + +def error_envelope_response(err: BaseException, *, trace_id: Optional[str] = None) -> Any: + """Return a FastAPI ``JSONResponse`` carrying the envelope. + + Imports the FastAPI types lazily so the module remains importable in + contexts where FastAPI isn't installed (CLI, tests). + """ + from fastapi.responses import JSONResponse + + status = err.status_code if isinstance(err, GitPilotError) else 500 + return JSONResponse(status_code=status, content=error_envelope(err, trace_id=trace_id)) + + +# ---------------------------------------------------------------------- +# Endpoint decorator +# ---------------------------------------------------------------------- + +def wrap_errors_envelope(func: F) -> F: + """Decorate an async FastAPI handler to emit the envelope. + + When the ``error_envelope`` flag is **off** the wrapper re-raises so + the legacy FastAPI behaviour (default ``{detail: …}`` body or + framework traceback) applies. When the flag is **on** every + uncaught exception is translated into the structured payload. + + The decorator is a no-op for handlers that return normally. + """ + + @functools.wraps(func) + async def wrapper(*args: Any, **kwargs: Any) -> Any: + try: + return await func(*args, **kwargs) + except GitPilotError as err: + if flags.is_on(FLAG_ERROR_ENVELOPE): + trace_id = _new_trace_id() + logger.warning( + "GitPilotError code=%s trace_id=%s msg=%s", + err.code, trace_id, err.message, + ) + return error_envelope_response(err, trace_id=trace_id) + raise + except Exception as err: # noqa: BLE001 β€” top-of-stack adapter + if flags.is_on(FLAG_ERROR_ENVELOPE): + trace_id = _new_trace_id() + logger.exception( + "unhandled exception in %s (trace_id=%s)", func.__name__, trace_id, + ) + return error_envelope_response(err, trace_id=trace_id) + raise + + return cast(F, wrapper) + + +# ---------------------------------------------------------------------- +# Helpers +# ---------------------------------------------------------------------- + +def _new_trace_id() -> str: + return uuid.uuid4().hex[:16] + + +def render_traceback_for_log(err: BaseException) -> str: + """Return a short traceback suitable for structured logs.""" + return "".join(traceback.format_exception(type(err), err, err.__traceback__)).strip() diff --git a/gitpilot/flags.py b/gitpilot/flags.py new file mode 100644 index 0000000..4a7f172 --- /dev/null +++ b/gitpilot/flags.py @@ -0,0 +1,161 @@ +# gitpilot/flags.py +"""Feature-flag service β€” single source of truth for opt-in code paths. + +Lookup precedence (first hit wins): explicit override β†’ ``GITPILOT_FLAGS`` +env (``name=1,other=0``) β†’ ``/.gitpilot/flags.json`` β†’ +``~/.gitpilot/flags.json`` β†’ call-site default. Lazy, cached, RLock-safe, +zero third-party deps. +""" +from __future__ import annotations + +import json +import logging +import os +import threading +from pathlib import Path +from typing import Any, Dict, Iterator, Mapping, Optional + +logger = logging.getLogger(__name__) + +ENV_VAR = "GITPILOT_FLAGS" +PROJECT_FLAGS_REL = Path(".gitpilot") / "flags.json" +USER_FLAGS_PATH = Path.home() / ".gitpilot" / "flags.json" + +_TRUE = {"1", "true", "yes", "on", "y", "t"} +_FALSE = {"0", "false", "no", "off", "n", "f"} + +_lock = threading.RLock() +_overrides: Dict[str, bool] = {} +_cache: Optional[Dict[str, bool]] = None +_workspace: Optional[Path] = None + + +def _coerce(value: Any) -> Optional[bool]: + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + if isinstance(value, str): + v = value.strip().lower() + if v in _TRUE: + return True + if v in _FALSE: + return False + return None + + +def _parse_env(raw: str) -> Dict[str, bool]: + out: Dict[str, bool] = {} + for piece in raw.split(","): + piece = piece.strip() + if not piece: + continue + if "=" in piece: + name, _, value = piece.partition("=") + parsed = _coerce(value) + else: + name, parsed = piece, True + name = name.strip() + if not name or parsed is None: + continue + out[name] = parsed + return out + + +def _load_file(path: Path) -> Dict[str, bool]: + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception as exc: # pragma: no cover - logged, returns empty + logger.warning("could not parse %s: %s", path, exc) + return {} + if not isinstance(data, Mapping): + return {} + out: Dict[str, bool] = {} + for key, value in data.items(): + parsed = _coerce(value) + if parsed is not None and isinstance(key, str): + out[key.strip()] = parsed + return out + + +def _build_cache() -> Dict[str, bool]: + merged: Dict[str, bool] = {} + merged.update(_load_file(USER_FLAGS_PATH)) + if _workspace is not None: + merged.update(_load_file(_workspace / PROJECT_FLAGS_REL)) + env_raw = os.environ.get(ENV_VAR, "") + if env_raw: + merged.update(_parse_env(env_raw)) + merged.update(_overrides) + return merged + + +def _ensure_cache() -> Dict[str, bool]: + global _cache + if _cache is None: + _cache = _build_cache() + return _cache + + +# --- public API -------------------------------------------------------- + +def set_workspace(workspace: Optional[Path]) -> None: + """Register the active workspace so its ``.gitpilot/flags.json`` loads.""" + global _workspace + with _lock: + _workspace = workspace.resolve() if workspace is not None else None + _invalidate() + + +def is_on(name: str, default: bool = False) -> bool: + """Return whether feature flag *name* is enabled.""" + with _lock: + return _ensure_cache().get(name, default) + + +def enabled_flags() -> Dict[str, bool]: + """Return a snapshot of the currently merged flag map.""" + with _lock: + return dict(_ensure_cache()) + + +def set_override(name: str, value: bool) -> None: + """Set a runtime override that beats every other source (tests, REPL).""" + with _lock: + _overrides[name] = bool(value) + _invalidate() + + +def clear_override(name: str) -> None: + """Remove a previously registered override.""" + with _lock: + _overrides.pop(name, None) + _invalidate() + + +def clear_all_overrides() -> None: + """Drop every runtime override. Mostly useful for test teardown.""" + with _lock: + _overrides.clear() + _invalidate() + + +def reload() -> Dict[str, bool]: + """Reread environment + files. Returns the new merged map.""" + with _lock: + _invalidate() + return dict(_ensure_cache()) + + +def iter_known(defaults: Mapping[str, bool]) -> Iterator[tuple[str, bool, bool]]: + """Yield ``(name, current, default)`` for every flag in *defaults*.""" + snapshot = enabled_flags() + for name, default in defaults.items(): + yield name, snapshot.get(name, default), default + + +def _invalidate() -> None: + global _cache + _cache = None diff --git a/gitpilot/init_wizard.py b/gitpilot/init_wizard.py new file mode 100644 index 0000000..8cfdb73 --- /dev/null +++ b/gitpilot/init_wizard.py @@ -0,0 +1,653 @@ +# gitpilot/init_wizard.py +"""First-run wizard β€” Batch P3-G. + +Walks a new user through the four decisions that previously required +reading three pages of documentation: + +1. Pick a model provider (Anthropic, OpenAI, Ollama, Watsonx). +2. Supply the matching API key (skipped for local-only providers). +3. Pick a starter mode (``coder``, ``planner``, ``reviewer``). +4. Trust the current workspace (records it in + :class:`gitpilot.trusted_folders.TrustStore`). + +Output artefacts, all written atomically: + +* ``.env`` β€” only the keys the user actually picked +* ``.gitpilot/modes.yaml`` β€” one starter mode for the selection +* ``AGENTS.md`` β€” via :func:`gitpilot.agents_md.run_init` +* trust entry in ``~/.gitpilot/trusted.json`` + +Design rules +------------ +* **Atomic** β€” every file is written to a sibling temp file, fsynced, + then renamed. An abort (Ctrl-C, KeyboardInterrupt, validation + error) leaves the workspace untouched. +* **Secret-safe** β€” the wizard never echoes the API key back to stdout. + Confirmation messages report ``set`` / ``not set`` only. +* **Idempotent** β€” re-running the wizard with the same answers + produces a byte-identical ``.env`` and ``.gitpilot/modes.yaml``. An + existing file is preserved by default; ``overwrite=True`` is opt-in. +* **Non-interactive friendly** β€” every prompt can be pre-answered via + the :class:`WizardAnswers` dataclass so the wizard runs in CI and + scripts without TTY access. +* **Flag-gated** β€” public entry points consult ``init_wizard``. With + the flag off the function refuses to run, leaving manual ``init`` + intact. +""" +from __future__ import annotations + +import dataclasses +import logging +import os +import re +import stat +import tempfile +import time +from contextlib import contextmanager +from dataclasses import dataclass, field +from pathlib import Path +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, +) + +from . import flags +from .agents_md import run_init as run_agents_md_init +from .trusted_folders import TrustStore + +logger = logging.getLogger(__name__) + +FLAG_INIT_WIZARD = "init_wizard" +SECRET_REDACTED = "***" + + +# ---------------------------------------------------------------------- +# Catalog of providers +# ---------------------------------------------------------------------- + +@dataclass(frozen=True) +class _ProviderSpec: + slug: str # canonical lowercase id + label: str # display name + env_key: Optional[str] # secret env var (None for local providers) + default_model: str + notes: str + + @property + def needs_key(self) -> bool: + return self.env_key is not None + + +SUPPORTED_PROVIDERS: Tuple[_ProviderSpec, ...] = ( + _ProviderSpec("anthropic", "Anthropic Claude", "ANTHROPIC_API_KEY", + "claude-sonnet-4-5", "Default for hosted use."), + _ProviderSpec("openai", "OpenAI", "OPENAI_API_KEY", + "gpt-4o-mini", ""), + _ProviderSpec("watsonx", "IBM watsonx", "WATSONX_API_KEY", + "meta-llama/llama-3-1-8b-instruct", + "Set WATSONX_PROJECT_ID separately."), + _ProviderSpec("ollama", "Ollama (local)", None, + "llama3.1", "Runs locally; no key needed."), +) + + +def provider_by_slug(slug: str) -> Optional[_ProviderSpec]: + s = slug.strip().lower() + for prov in SUPPORTED_PROVIDERS: + if prov.slug == s: + return prov + return None + + +# ---------------------------------------------------------------------- +# Starter modes +# ---------------------------------------------------------------------- + +@dataclass(frozen=True) +class _ModeSpec: + slug: str + label: str + role: str + when: str + groups: Tuple[Any, ...] + + +STARTER_MODES: Tuple[_ModeSpec, ...] = ( + _ModeSpec( + slug="coder", + label="Coder", + role=("You write code, run tests, and self-correct on failure. " + "Keep changes small and reversible."), + when="Use to implement features and fix bugs.", + groups=("read", "edit", "command"), + ), + _ModeSpec( + slug="planner", + label="Planner", + role=("You explore the repo and draft step-by-step plans with risks " + "and acceptance criteria. You never write code yourself."), + when="Use before implementing a complex change.", + groups=("read",), + ), + _ModeSpec( + slug="reviewer", + label="Reviewer", + role=("You audit diffs, suggest improvements, and draft commit " + "messages. You never modify the working tree."), + when="Use after a change is ready, before commit.", + groups=("read",), + ), +) + + +def mode_by_slug(slug: str) -> Optional[_ModeSpec]: + s = slug.strip().lower() + for mode in STARTER_MODES: + if mode.slug == s: + return mode + return None + + +# ---------------------------------------------------------------------- +# Answers + result +# ---------------------------------------------------------------------- + +@dataclass +class WizardAnswers: + """Inputs collected from the user (or supplied non-interactively).""" + + provider: str = "anthropic" + api_key: Optional[str] = None # ``None`` for providers without a key + mode_slug: str = "coder" + workspace_trust: bool = True + overwrite_env: bool = False + overwrite_modes: bool = False + overwrite_agents_md: bool = False + + +@dataclass +class WizardResult: + """Outcome of one wizard run.""" + + workspace: Path + files_written: List[Path] = field(default_factory=list) + files_skipped: List[Tuple[Path, str]] = field(default_factory=list) + trust_recorded: bool = False + provider: str = "" + mode_slug: str = "" + duration_ms: int = 0 + aborted: bool = False + reason: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "workspace": str(self.workspace), + "files_written": [str(p) for p in self.files_written], + "files_skipped": [(str(p), why) for p, why in self.files_skipped], + "trust_recorded": self.trust_recorded, + "provider": self.provider, + "mode_slug": self.mode_slug, + "duration_ms": self.duration_ms, + "aborted": self.aborted, + "reason": self.reason, + } + + +class WizardError(Exception): + """Surfaced when validation fails before any file is written.""" + + +# ---------------------------------------------------------------------- +# Prompt protocols (so tests can drive the wizard without a TTY) +# ---------------------------------------------------------------------- + +class Prompter: + """Tiny abstraction over Typer prompts. Implementations are simple + enough to swap for a recorded transcript in tests.""" + + def text(self, message: str, *, default: Optional[str] = None) -> str: + raise NotImplementedError + + def secret(self, message: str) -> str: + raise NotImplementedError + + def select(self, message: str, options: List[str], *, default: int = 0) -> int: + raise NotImplementedError + + def confirm(self, message: str, *, default: bool = True) -> bool: + raise NotImplementedError + + def echo(self, message: str = "") -> None: + raise NotImplementedError + + +class _TyperPrompter(Prompter): + """Real prompts backed by Typer / Rich. Imported lazily.""" + + def __init__(self) -> None: + import typer # local + self._typer = typer + + def text(self, message: str, *, default: Optional[str] = None) -> str: + return str(self._typer.prompt(message, default=default or "")) + + def secret(self, message: str) -> str: + return str(self._typer.prompt( + message, hide_input=True, default="", show_default=False, + )) + + def select(self, message: str, options: List[str], *, default: int = 0) -> int: + self.echo(message) + for i, opt in enumerate(options): + self.echo(f" [{i + 1}] {opt}") + while True: + raw = self._typer.prompt("Choose", default=str(default + 1)) + try: + idx = int(raw) - 1 + if 0 <= idx < len(options): + return idx + except ValueError: + pass + self.echo(f"Please enter a number between 1 and {len(options)}.") + + def confirm(self, message: str, *, default: bool = True) -> bool: + return self._typer.confirm(message, default=default) + + def echo(self, message: str = "") -> None: + self._typer.echo(message) + + +@dataclass +class ScriptedPrompter(Prompter): + """Prompter driven by a list of pre-recorded answers. Test-only.""" + + answers: List[Any] + echoed: List[str] = field(default_factory=list) + _cursor: int = 0 + + def _pop(self) -> Any: + if self._cursor >= len(self.answers): + raise WizardError("scripted prompter ran out of answers") + value = self.answers[self._cursor] + self._cursor += 1 + return value + + def text(self, message: str, *, default: Optional[str] = None) -> str: + return str(self._pop()) + + def secret(self, message: str) -> str: + return str(self._pop()) + + def select(self, message: str, options: List[str], *, default: int = 0) -> int: + value = self._pop() + if isinstance(value, int): + return value + # Strings can pass either the slug or the label + s = str(value).strip().lower() + for i, opt in enumerate(options): + if opt.lower() == s: + return i + raise WizardError(f"scripted option {value!r} not in {options}") + + def confirm(self, message: str, *, default: bool = True) -> bool: + return bool(self._pop()) + + def echo(self, message: str = "") -> None: + self.echoed.append(message) + + +# ---------------------------------------------------------------------- +# Core runner +# ---------------------------------------------------------------------- + +def collect_answers( + *, + prompter: Prompter, + presets: Optional[WizardAnswers] = None, +) -> WizardAnswers: + """Drive the interactive prompts. ``presets`` short-circuits any + field that is already set (anything not ``None``).""" + presets = presets or WizardAnswers() + prompter.echo("== GitPilot first-run wizard ==") + + # 1. Provider + options = [f"{p.label}" for p in SUPPORTED_PROVIDERS] + chosen_idx = next( + (i for i, p in enumerate(SUPPORTED_PROVIDERS) if p.slug == presets.provider), + 0, + ) + idx = prompter.select("Which model provider?", options, default=chosen_idx) + provider_spec = SUPPORTED_PROVIDERS[idx] + + # 2. API key (if needed and not pre-supplied) + api_key: Optional[str] = presets.api_key + if provider_spec.needs_key: + if api_key is None: + api_key = prompter.secret(f"Paste your {provider_spec.env_key}").strip() + if not api_key: + raise WizardError( + f"{provider_spec.env_key} is required for the {provider_spec.label} provider." + ) + + # 3. Starter mode + mode_options = [f"{m.label} β€” {m.when}" for m in STARTER_MODES] + mode_idx = next( + (i for i, m in enumerate(STARTER_MODES) if m.slug == presets.mode_slug), + 0, + ) + selected_mode = prompter.select( + "Starter mode?", mode_options, default=mode_idx, + ) + mode_spec = STARTER_MODES[selected_mode] + + # 4. Workspace trust + workspace_trust = prompter.confirm( + "Trust this workspace (allow tool execution)?", + default=presets.workspace_trust, + ) + + return WizardAnswers( + provider=provider_spec.slug, + api_key=api_key, + mode_slug=mode_spec.slug, + workspace_trust=workspace_trust, + overwrite_env=presets.overwrite_env, + overwrite_modes=presets.overwrite_modes, + overwrite_agents_md=presets.overwrite_agents_md, + ) + + +def run_wizard( + workspace: Path, + *, + prompter: Optional[Prompter] = None, + presets: Optional[WizardAnswers] = None, + trust_store: Optional[TrustStore] = None, + enabled: Optional[bool] = None, +) -> WizardResult: + """Run the full wizard end-to-end and return a :class:`WizardResult`. + + Raises :class:`WizardError` for validation failures *before* any + file is touched. Mid-run aborts (Ctrl-C, partial writes) leave the + workspace untouched thanks to :func:`_atomic_write`. + """ + flag_on = enabled if enabled is not None else flags.is_on(FLAG_INIT_WIZARD) + if not flag_on: + raise WizardError( + "init_wizard flag is off; run `gitpilot init` for the legacy flow." + ) + + start = time.monotonic() + workspace = workspace.resolve() + workspace.mkdir(parents=True, exist_ok=True) + result = WizardResult(workspace=workspace) + + # Phase 0 β€” validate any presets that we *can* validate before + # touching prompts. A typed-but-unknown provider/mode in the + # presets is a clean abort, not a fall-through to prompts. + if presets is not None: + if provider_by_slug(presets.provider) is None: + result.aborted = True + result.reason = f"unsupported provider: {presets.provider!r}" + result.duration_ms = int((time.monotonic() - start) * 1000) + return result + if mode_by_slug(presets.mode_slug) is None: + result.aborted = True + result.reason = f"unsupported mode: {presets.mode_slug!r}" + result.duration_ms = int((time.monotonic() - start) * 1000) + return result + + # Phase 1 β€” collect (no writes yet) + try: + prompter = prompter or _TyperPrompter() + if presets and _is_complete(presets): + answers = presets + else: + answers = collect_answers(prompter=prompter, presets=presets) + except KeyboardInterrupt: + result.aborted = True + result.reason = "user aborted" + result.duration_ms = int((time.monotonic() - start) * 1000) + return result + + result.provider = answers.provider + result.mode_slug = answers.mode_slug + + # Phase 2 β€” render in-memory artefacts + env_text = _render_env(answers) + modes_text = _render_modes(answers) + + # Phase 3 β€” write atomically (rollback any partial writes on failure) + rollback_handlers: List[Callable[[], None]] = [] + try: + env_path = workspace / ".env" + if env_path.exists() and not answers.overwrite_env: + result.files_skipped.append((env_path, "exists")) + else: + _atomic_write(env_path, env_text, mode=0o600, + rollback=rollback_handlers) + result.files_written.append(env_path) + + gitpilot_dir = workspace / ".gitpilot" + gitpilot_dir.mkdir(exist_ok=True) + modes_path = gitpilot_dir / "modes.yaml" + if modes_path.exists() and not answers.overwrite_modes: + result.files_skipped.append((modes_path, "exists")) + else: + _atomic_write(modes_path, modes_text, mode=0o644, + rollback=rollback_handlers) + result.files_written.append(modes_path) + + agents_md_path = workspace / "AGENTS.md" + if agents_md_path.exists() and not answers.overwrite_agents_md: + result.files_skipped.append((agents_md_path, "exists")) + else: + report = run_agents_md_init(workspace, overwrite=answers.overwrite_agents_md) + if report.created: + result.files_written.append(agents_md_path) + + def _agents_rollback(p: Path = agents_md_path) -> None: + _unlink_quiet(p) + + rollback_handlers.append(_agents_rollback) + else: + result.files_skipped.append((agents_md_path, report.skipped_reason or "exists")) + + if answers.workspace_trust: + store = trust_store or TrustStore.default() + store.trust(workspace, note="set up via wizard") + result.trust_recorded = True + + except Exception as exc: + # Atomic rollback β€” undo any successful writes so the user can + # safely re-run. We log the error rather than re-raise so the + # WizardResult always describes what happened. + for fn in reversed(rollback_handlers): + try: + fn() + except Exception: + logger.exception("rollback handler failed") + result.aborted = True + result.reason = str(exc) or exc.__class__.__name__ + result.files_written = [] + logger.exception("wizard failed") + + result.duration_ms = int((time.monotonic() - start) * 1000) + return result + + +# ---------------------------------------------------------------------- +# Renderers β€” pure functions, easy to snapshot in tests +# ---------------------------------------------------------------------- + +def _render_env(answers: WizardAnswers) -> str: + spec = provider_by_slug(answers.provider) + if spec is None: + raise WizardError(f"unsupported provider: {answers.provider!r}") + lines: List[str] = [ + "# GitPilot environment β€” generated by `gitpilot init --wizard`.", + "# Only the keys you actually need are listed; add more as required.", + f"GITPILOT_LLM_PROVIDER={spec.slug}", + f"GITPILOT_DEFAULT_MODEL={spec.default_model}", + ] + if spec.needs_key: + if not answers.api_key: + raise WizardError(f"{spec.env_key} is required") + _validate_env_value(answers.api_key) + lines.append(f"{spec.env_key}={answers.api_key}") + return "\n".join(lines) + "\n" + + +def _render_modes(answers: WizardAnswers) -> str: + spec = mode_by_slug(answers.mode_slug) + if spec is None: + raise WizardError(f"unsupported mode: {answers.mode_slug!r}") + groups_yaml = "\n".join(f" - {g}" for g in spec.groups) + return ( + "# GitPilot modes β€” generated by `gitpilot init --wizard`.\n" + "# Edit freely; new modes can be added under customModes.\n" + "customModes:\n" + f" - slug: {spec.slug}\n" + f" name: {spec.label}\n" + f" description: {spec.label} starter mode\n" + f" roleDefinition: |\n" + f" {spec.role}\n" + f" whenToUse: |\n" + f" {spec.when}\n" + " groups:\n" + f"{groups_yaml}\n" + ) + + +# ---------------------------------------------------------------------- +# Safety helpers +# ---------------------------------------------------------------------- + +_FORBIDDEN_ENV_CHARS = re.compile(r"[\r\n\x00]") + + +def _validate_env_value(value: str) -> None: + """Reject newlines and NULs so the secret can't break out of the file.""" + if _FORBIDDEN_ENV_CHARS.search(value): + raise WizardError("API key contains forbidden control characters") + + +def _atomic_write( + path: Path, + text: str, + *, + mode: int = 0o644, + rollback: List[Callable[[], None]], +) -> None: + """Write *text* to *path* atomically. + + The file is written to a sibling temp file in the same directory, + fsynced for durability, then renamed over the target. A rollback + handler that deletes the renamed file is appended to *rollback* + so the wizard can undo all writes on a later failure. + """ + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_name = tempfile.mkstemp( + prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent), + ) + tmp_path = Path(tmp_name) + try: + with os.fdopen(fd, "w", encoding="utf-8") as handle: + handle.write(text) + handle.flush() + try: + os.fsync(handle.fileno()) + except OSError: + pass + os.chmod(tmp_path, mode) + os.replace(tmp_path, path) + except Exception: + _unlink_quiet(tmp_path) + raise + + def _undo(p: Path = path) -> None: + _unlink_quiet(p) + + rollback.append(_undo) + + +def _unlink_quiet(path: Path) -> None: + try: + path.unlink() + except FileNotFoundError: + return + except OSError: + logger.exception("could not unlink %s", path) + + +def _is_complete(answers: WizardAnswers) -> bool: + """True if presets cover every prompt β€” wizard runs non-interactively.""" + spec = provider_by_slug(answers.provider) + if spec is None: + return False + if spec.needs_key and not answers.api_key: + return False + return mode_by_slug(answers.mode_slug) is not None + + +# ---------------------------------------------------------------------- +# Rendering helpers exported for tests +# ---------------------------------------------------------------------- + +def render_env(answers: WizardAnswers) -> str: + """Public render helper for snapshot tests.""" + return _render_env(answers) + + +def render_modes(answers: WizardAnswers) -> str: + """Public render helper for snapshot tests.""" + return _render_modes(answers) + + +def supported_provider_slugs() -> List[str]: + """Return the canonical slug for each provider the wizard supports.""" + return [p.slug for p in SUPPORTED_PROVIDERS] + + +def starter_mode_slugs() -> List[str]: + """Return the slug for each starter mode the wizard can write.""" + return [m.slug for m in STARTER_MODES] + + +# ---------------------------------------------------------------------- +# Module-level entry β€” ``python -m gitpilot.init_wizard --provider …`` +# ---------------------------------------------------------------------- + +def _module_main(argv: Optional[List[str]] = None) -> int: # pragma: no cover - manual + import argparse + + parser = argparse.ArgumentParser(prog="gitpilot.init_wizard") + parser.add_argument("--workspace", type=Path, default=Path.cwd()) + parser.add_argument("--provider", default="anthropic") + parser.add_argument("--api-key", default=None) + parser.add_argument("--mode", default="coder") + parser.add_argument("--no-trust", action="store_true") + args = parser.parse_args(argv) + presets = WizardAnswers( + provider=args.provider, + api_key=args.api_key, + mode_slug=args.mode, + workspace_trust=not args.no_trust, + ) + flags.set_override(FLAG_INIT_WIZARD, True) + result = run_wizard(args.workspace, presets=presets, + prompter=ScriptedPrompter(answers=[])) + import json + print(json.dumps(result.to_dict(), indent=2)) + return 0 if not result.aborted else 1 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(_module_main()) diff --git a/gitpilot/llm_provider.py b/gitpilot/llm_provider.py index cb4d2af..9df0108 100644 --- a/gitpilot/llm_provider.py +++ b/gitpilot/llm_provider.py @@ -225,6 +225,48 @@ def build_llm() -> Any: raise ValueError(f"Unsupported provider: {provider}") +# --------------------------------------------------------------------------- +# Batch P2-A β€” structured system-prompt builder. +# +# This helper is purely additive: it composes a :class:`SystemPayload` with +# cacheable / non-cacheable segments via :mod:`gitpilot.prompt_cache`. The +# legacy code path (callers that feed a flat ``system`` string into +# ``build_llm()`` results) is untouched β€” they keep working with no behaviour +# change. Callers that want the cache markers should adopt this helper +# incrementally. +# --------------------------------------------------------------------------- +def build_system_blocks( + *, + base_system: str = "", + workspace: Any = None, + mode_slug: Any = None, + tool_defs: Any = None, + session_conventions: str = "", +) -> Any: + """Return the structured system payload for the active provider. + + The active provider is read from settings; the prompt-cache markers + are emitted only when both ``prompt_cache`` is on and the provider + is Anthropic. For every other provider the payload still carries + the same content and a stable ordering, just without cache markers. + """ + from .prompt_cache import build_system_blocks as _build # local import + + try: + provider = get_settings().provider.value # type: ignore[union-attr] + except Exception: + provider = None + + return _build( + base_system=base_system, + workspace=workspace, + mode_slug=mode_slug, + tool_defs=tool_defs, + session_conventions=session_conventions, + provider=provider, + ) + + def validate_provider_config(settings) -> tuple[bool, list[str]]: """Validate provider configuration and return (is_valid, errors).""" errors = [] diff --git a/gitpilot/mcp_toggles.py b/gitpilot/mcp_toggles.py new file mode 100644 index 0000000..27614d0 --- /dev/null +++ b/gitpilot/mcp_toggles.py @@ -0,0 +1,184 @@ +# gitpilot/mcp_toggles.py +"""Per-server MCP tool toggles and ``alwaysAllow`` semantics. + +Additive overlay on :mod:`gitpilot.mcp_client`. The existing client is +left untouched; callers that want fine-grained control wrap their server +configs with :class:`MCPServerToggles` and ask :meth:`filter_tools` / +:meth:`is_always_allowed` before exposing a tool to the model. + +Project file:: + + .gitpilot/mcp.json + + { + "servers": [ + { + "name": "github", + "transport": "stdio", + "command": "uvx", "args": ["mcp-github"], + "enabledTools": ["search_code", "list_issues"], + "disabledTools": [], + "alwaysAllow": ["search_code"], + "disabled": false + } + ] + } + +User overrides at ``~/.gitpilot/mcp.json`` are merged underneath the +project file, with the project taking precedence on name conflicts. +""" +from __future__ import annotations + +import fnmatch +import json +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Set + +logger = logging.getLogger(__name__) + +GLOBAL_MCP_PATH = Path.home() / ".gitpilot" / "mcp.json" +PROJECT_MCP_REL = Path(".gitpilot") / "mcp.json" + + +@dataclass +class MCPServerToggles: + """Configurable visibility for an MCP server's tools.""" + + name: str + enabled_tools: Set[str] = field(default_factory=set) # empty == all + disabled_tools: Set[str] = field(default_factory=set) + always_allow: Set[str] = field(default_factory=set) + disabled: bool = False + + def is_tool_enabled(self, tool_name: str) -> bool: + if self.disabled: + return False + if _glob_in_set(tool_name, self.disabled_tools): + return False + if not self.enabled_tools: + return True + return _glob_in_set(tool_name, self.enabled_tools) + + def is_always_allowed(self, tool_name: str) -> bool: + return _glob_in_set(tool_name, self.always_allow) + + def filter_tools(self, tools: Iterable[Any]) -> List[Any]: + """Filter a list of tool descriptors by name. + + Each ``tool`` must expose a ``.name`` attribute (the + :class:`gitpilot.mcp_client.MCPTool` dataclass already does). + """ + return [t for t in tools if self.is_tool_enabled(getattr(t, "name", ""))] + + def to_dict(self) -> Dict[str, Any]: + return { + "name": self.name, + "enabledTools": sorted(self.enabled_tools), + "disabledTools": sorted(self.disabled_tools), + "alwaysAllow": sorted(self.always_allow), + "disabled": self.disabled, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "MCPServerToggles": + return cls( + name=data.get("name", ""), + enabled_tools=set(data.get("enabledTools", [])), + disabled_tools=set(data.get("disabledTools", [])), + always_allow=set(data.get("alwaysAllow", [])), + disabled=bool(data.get("disabled", False)), + ) + + +@dataclass +class MCPToggleRegistry: + """Aggregate toggles loaded from global + project config files.""" + + by_server: Dict[str, MCPServerToggles] = field(default_factory=dict) + + def get(self, server: str) -> MCPServerToggles: + return self.by_server.get(server) or MCPServerToggles(name=server) + + def is_tool_enabled(self, server: str, tool: str) -> bool: + return self.get(server).is_tool_enabled(tool) + + def is_always_allowed(self, server: str, tool: str) -> bool: + return self.get(server).is_always_allowed(tool) + + def register(self, toggles: MCPServerToggles) -> None: + self.by_server[toggles.name] = toggles + + @classmethod + def load(cls, workspace_path: Optional[Path] = None) -> "MCPToggleRegistry": + reg = cls() + # Global first… + reg._merge_from(GLOBAL_MCP_PATH) + # …then project (overrides on name conflicts). + if workspace_path is not None: + reg._merge_from(workspace_path / PROJECT_MCP_REL) + return reg + + def _merge_from(self, path: Path) -> None: + if not path.exists(): + return + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception as e: + logger.warning("could not read %s: %s", path, e) + return + servers = data.get("servers", []) if isinstance(data, dict) else data + if not isinstance(servers, list): + return + for entry in servers: + if not isinstance(entry, dict) or not entry.get("name"): + continue + toggles = MCPServerToggles.from_dict(entry) + self.by_server[toggles.name] = toggles + + +# ---------------------------------------------------------------------- +# Output validator (defends against context poisoning via tool replies) +# ---------------------------------------------------------------------- + +@dataclass +class ToolOutputCheck: + """Result of a tool-output sanity check.""" + + ok: bool + reason: Optional[str] = None + sanitised: Optional[str] = None + + +def validate_tool_output( + raw: str, + *, + max_bytes: int = 256_000, + forbid_control_chars: bool = True, +) -> ToolOutputCheck: + """Validate the text a tool wants to inject into context history. + + The check is conservative: oversize outputs are truncated rather + than rejected (truncation is recorded via ``sanitised``), but + obviously contaminated payloads (NUL bytes, bell, etc.) are flagged + so the caller can ask the user instead of poisoning the prompt. + """ + if raw is None: + return ToolOutputCheck(ok=True, sanitised="") + text = str(raw) + if forbid_control_chars: + bad = [c for c in text if ord(c) < 0x09 or (0x0B <= ord(c) <= 0x1F and c not in "\r")] + if bad: + return ToolOutputCheck(ok=False, reason=f"control characters ({len(bad)})") + if len(text.encode("utf-8", errors="replace")) > max_bytes: + return ToolOutputCheck( + ok=True, + reason="truncated", + sanitised=text[: max_bytes // 2] + "\n…\n[truncated]\n", + ) + return ToolOutputCheck(ok=True) + + +def _glob_in_set(name: str, patterns: Iterable[str]) -> bool: + return any(fnmatch.fnmatchcase(name, p) for p in patterns) diff --git a/gitpilot/mcp_tools_bridge.py b/gitpilot/mcp_tools_bridge.py index 864e768..24d8df5 100644 --- a/gitpilot/mcp_tools_bridge.py +++ b/gitpilot/mcp_tools_bridge.py @@ -253,11 +253,21 @@ def build_mcp_agent_tools( store: MCPStore | None = None, include_mutation: bool = False, max_tools: int | None = None, + policy: Any = None, ) -> list[Any]: """Build the live list of CrewAI tools backed by enabled MCP tools. Returns an empty list if MCP is disabled, no servers are enabled, or CrewAI is not importable. Never raises. + + Batch P2-B β€” accepts an optional ``policy`` (a + :class:`gitpilot.tool_groups.ToolPolicy`). When the + ``lazy_tool_defs`` flag is on and ``policy`` is supplied, the + descriptors are filtered through + :func:`gitpilot.tool_def_pruner.prune_descriptors` *before* they + enter the LLM tool definitions; smaller tool list β†’ smaller + prompt. When ``policy`` is ``None`` or the flag is off, behaviour + is identical to the legacy code path. """ s = store or MCPStore() snap = s.load() @@ -273,6 +283,16 @@ def build_mcp_agent_tools( ) descriptors = descriptors[:cap] + if policy is not None: + # Lazy import to keep this module decoupled from the pruner. + from .tool_def_pruner import prune_descriptors as _prune + descriptors, report = _prune(descriptors, policy=policy) + if report.dropped: + logger.info( + "mcp-bridge: lazy_tool_defs pruned %d/%d descriptor(s) (%s)", + report.dropped, report.dropped + report.kept, report.reason_counts, + ) + try: from crewai.tools import tool as crewai_tool except Exception: diff --git a/gitpilot/mentions.py b/gitpilot/mentions.py new file mode 100644 index 0000000..c119b9b --- /dev/null +++ b/gitpilot/mentions.py @@ -0,0 +1,232 @@ +# gitpilot/mentions.py +"""@-mention parser for chat input β€” additive context expander. + +Recognised tokens (additive, non-destructive β€” unknown tokens are left as-is):: + + @/abs/path β€” single file (path under workspace) + @./rel/path β€” relative path resolved against workspace + @glob:src/**/*.ts β€” file glob expanded under workspace + @problems β€” current diagnostics (read from .gitpilot/problems.json + if present, otherwise empty) + @commit: β€” `git show ` summary + @diff: β€” `git diff ` summary + @selection β€” selection sent from the editor (falls back to + the GITPILOT_SELECTION env var) + @pr: β€” placeholder block; resolved by API layer + +The parser is intentionally pure-Python and side-effect-free except for +shelling out to git when a commit/diff mention is encountered. All output +is size-capped so a noisy mention can never blow the prompt budget. +""" +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +from dataclasses import dataclass, field +from pathlib import Path +from typing import List, Optional + +logger = logging.getLogger(__name__) + +MAX_FILE_BYTES = 16_000 +MAX_GLOB_FILES = 12 +MAX_DIAGNOSTICS = 50 +MAX_GIT_OUTPUT = 8_000 + +# A mention starts with @ and runs until whitespace OR the next @ that is +# clearly the start of a fresh token (preceded by whitespace). We scan +# greedily on the leading @ then stop at whitespace. +_MENTION_RE = re.compile(r"(? str: + """Render expansions as a single markdown block, or '' if none.""" + if not self.expansions: + return "" + parts = ["## Mentions"] + for exp in self.expansions: + head = f"### `@{exp.token}` ({exp.kind})" + if exp.error: + parts.append(f"{head}\n\n_error: {exp.error}_") + else: + parts.append(f"{head}\n\n{exp.body}") + return "\n\n".join(parts) + + +class MentionParser: + """Parse and expand @-mentions in a chat message.""" + + def __init__( + self, + workspace_path: Path, + *, + max_file_bytes: int = MAX_FILE_BYTES, + max_glob_files: int = MAX_GLOB_FILES, + ) -> None: + self.workspace_path = workspace_path.resolve() + self.max_file_bytes = max_file_bytes + self.max_glob_files = max_glob_files + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + def parse(self, message: str) -> MentionResult: + if not message: + return MentionResult(cleaned_message=message) + + expansions: List[ExpandedMention] = [] + for match in _MENTION_RE.finditer(message): + token = match.group(1) + expansions.append(self._expand_token(token)) + + return MentionResult(cleaned_message=message, expansions=expansions) + + # ------------------------------------------------------------------ + # Token dispatch + # ------------------------------------------------------------------ + def _expand_token(self, token: str) -> ExpandedMention: + try: + if token == "problems": + return self._expand_problems(token) + if token == "selection": + return self._expand_selection(token) + if token.startswith("glob:"): + return self._expand_glob(token, token[5:]) + if token.startswith("commit:"): + return self._expand_commit(token, token[7:]) + if token.startswith("diff:"): + return self._expand_diff(token, token[5:]) + if token.startswith("pr:"): + return ExpandedMention( + token=token, + kind="pr", + body=f"_PR reference `{token[3:]}` will be resolved by the API layer._", + ) + # Path-like: @/..., @./..., @../..., @name/path + if token.startswith(("/", "./", "../")) or "/" in token or token.endswith( + (".py", ".ts", ".tsx", ".js", ".md", ".json", ".yaml", ".yml") + ): + return self._expand_file(token, token) + return ExpandedMention(token=token, kind="unknown", body="", error="unrecognised token") + except Exception as exc: # pragma: no cover - defensive + logger.debug("mention %s failed", token, exc_info=True) + return ExpandedMention(token=token, kind="unknown", body="", error=str(exc)) + + # ------------------------------------------------------------------ + # Expanders + # ------------------------------------------------------------------ + def _resolve_under_workspace(self, raw: str) -> Path: + if raw.startswith("/"): + # Allow absolute paths but only if they live under the workspace. + p = Path(raw).resolve() + else: + p = (self.workspace_path / raw.lstrip("./")).resolve() + if not str(p).startswith(str(self.workspace_path)): + raise PermissionError(f"path escapes workspace: {raw}") + return p + + def _expand_file(self, token: str, raw: str) -> ExpandedMention: + path = self._resolve_under_workspace(raw) + if not path.exists() or not path.is_file(): + return ExpandedMention(token=token, kind="file", body="", error="not found") + data = path.read_bytes()[: self.max_file_bytes] + text = data.decode("utf-8", errors="replace") + rel = path.relative_to(self.workspace_path) + body = f"```{_guess_lang(path)} title={rel}\n{text}\n```" + return ExpandedMention(token=token, kind="file", body=body) + + def _expand_glob(self, token: str, pattern: str) -> ExpandedMention: + files = sorted(self.workspace_path.glob(pattern))[: self.max_glob_files] + if not files: + return ExpandedMention(token=token, kind="glob", body="", error="no matches") + rel = [str(p.relative_to(self.workspace_path)) for p in files] + body = "Matched files:\n" + "\n".join(f"- `{r}`" for r in rel) + return ExpandedMention(token=token, kind="glob", body=body) + + def _expand_problems(self, token: str) -> ExpandedMention: + path = self.workspace_path / ".gitpilot" / "problems.json" + if not path.exists(): + return ExpandedMention(token=token, kind="problems", body="_no diagnostics file present_") + try: + items = json.loads(path.read_text())[:MAX_DIAGNOSTICS] + except Exception as e: + return ExpandedMention(token=token, kind="problems", body="", error=str(e)) + lines = [] + for it in items: + sev = it.get("severity", "info") + file_ = it.get("file", "?") + line = it.get("line", "?") + msg = it.get("message", "") + lines.append(f"- [{sev}] {file_}:{line} β€” {msg}") + return ExpandedMention(token=token, kind="problems", body="\n".join(lines) or "_no diagnostics_") + + def _expand_selection(self, token: str) -> ExpandedMention: + text = os.environ.get("GITPILOT_SELECTION", "") + if not text: + return ExpandedMention(token=token, kind="selection", body="", error="no selection") + return ExpandedMention(token=token, kind="selection", body=f"```\n{text[:self.max_file_bytes]}\n```") + + def _expand_commit(self, token: str, sha: str) -> ExpandedMention: + out = self._git("show", "--stat", "--patch", sha) + if out is None: + return ExpandedMention(token=token, kind="commit", body="", error="git failed") + return ExpandedMention(token=token, kind="commit", body=f"```diff\n{out[:MAX_GIT_OUTPUT]}\n```") + + def _expand_diff(self, token: str, rng: str) -> ExpandedMention: + out = self._git("diff", "--stat", "--patch", rng) + if out is None: + return ExpandedMention(token=token, kind="diff", body="", error="git failed") + return ExpandedMention(token=token, kind="diff", body=f"```diff\n{out[:MAX_GIT_OUTPUT]}\n```") + + def _git(self, *args: str) -> Optional[str]: + try: + proc = subprocess.run( + ["git", *args], + cwd=str(self.workspace_path), + capture_output=True, + text=True, + timeout=15, + check=False, + ) + if proc.returncode != 0: + return None + return proc.stdout + except Exception: + return None + + +_LANG_BY_EXT = { + ".py": "python", ".ts": "ts", ".tsx": "tsx", ".js": "js", ".jsx": "jsx", + ".rs": "rust", ".go": "go", ".java": "java", ".rb": "ruby", + ".md": "md", ".json": "json", ".yaml": "yaml", ".yml": "yaml", + ".toml": "toml", ".sql": "sql", ".sh": "bash", +} + + +def _guess_lang(path: Path) -> str: + return _LANG_BY_EXT.get(path.suffix.lower(), "") + + +def expand(message: str, workspace_path: Path) -> MentionResult: + """Module-level convenience wrapper.""" + return MentionParser(workspace_path).parse(message) diff --git a/gitpilot/modes.py b/gitpilot/modes.py new file mode 100644 index 0000000..7328dc1 --- /dev/null +++ b/gitpilot/modes.py @@ -0,0 +1,493 @@ +# gitpilot/modes.py +"""Custom modes β€” declarative YAML personas with bound tool policies. + +A mode is a YAML record describing GitPilot's behaviour for a session. +Schema is intentionally minimal so a developer can add a new mode (and +attach new MCP servers to it) in a few lines. + +Files searched, in this order:: + + ~/.gitpilot/modes.yaml β€” user-global modes + .gitpilot/modes.yaml β€” project modes (project wins on slug clash) + +Example:: + + customModes: + - slug: db-pilot + name: "DB Pilot" + description: "Natural-language queries against staging Postgres" + roleDefinition: | + You are a senior DBA. Always EXPLAIN before mutating. + whenToUse: | + User asks about schema, queries, or migrations. + groups: + - read + - mcp: + allow: ["postgres.query", "postgres.explain"] + alwaysAllow: ["postgres.explain"] + - edit: + fileRegex: "^migrations/.*\\.sql$" + customInstructions: | + Refuse DROP / TRUNCATE without explicit confirmation. + mcpServers: + postgres: + command: uvx + args: [mcp-postgres-server] + env: { PG_URL: "${STAGING_PG_URL}" } + alwaysAllow: [postgres.explain] + +Nothing in :mod:`gitpilot.modes` mutates the legacy code path β€” callers +opt in by instantiating :class:`ModeRegistry` and asking for the +:class:`Mode` they want to activate. +""" +from __future__ import annotations + +import json +import logging +import os +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from .tool_groups import ToolPolicy + +logger = logging.getLogger(__name__) + +USER_MODES_FILE = Path.home() / ".gitpilot" / "modes.yaml" +PROJECT_MODES_REL = Path(".gitpilot") / "modes.yaml" + + +# ---------------------------------------------------------------------- +# Data +# ---------------------------------------------------------------------- + +@dataclass +class ModeMCPServer: + """An MCP server declared inline by a mode.""" + + name: str + command: Optional[str] = None + args: List[str] = field(default_factory=list) + env: Dict[str, str] = field(default_factory=dict) + url: Optional[str] = None + http_url: Optional[str] = None + headers: Dict[str, str] = field(default_factory=dict) + always_allow: List[str] = field(default_factory=list) + enabled_tools: List[str] = field(default_factory=list) + + def to_mcp_client_dict(self) -> Dict[str, Any]: + """Render as the dict shape :class:`MCPServerConfig` accepts.""" + transport = "stdio" + if self.http_url: + transport = "http" + elif self.url: + transport = "sse" + return { + "name": self.name, + "transport": transport, + "command": self.command, + "args": self.args, + "env": self.env, + "url": self.url or self.http_url, + "headers": self.headers, + } + + +@dataclass +class Mode: + """A declarative GitPilot mode.""" + + slug: str + name: str + description: str = "" + role_definition: str = "" + when_to_use: str = "" + custom_instructions: str = "" + groups: List[Any] = field(default_factory=list) + mcp_servers: Dict[str, ModeMCPServer] = field(default_factory=dict) + source: str = "" # "user" | "project" + + def tool_policy(self) -> ToolPolicy: + return ToolPolicy.from_mode_groups(self.groups) + + def system_prompt_block(self) -> str: + parts: List[str] = [] + if self.role_definition: + parts.append(f"## Role\n{self.role_definition.strip()}") + if self.when_to_use: + parts.append(f"## When to use this mode\n{self.when_to_use.strip()}") + if self.custom_instructions: + parts.append(f"## Mode instructions\n{self.custom_instructions.strip()}") + return "\n\n".join(parts) + + +# ---------------------------------------------------------------------- +# Registry / loader +# ---------------------------------------------------------------------- + +class ModeRegistry: + """Discover modes from user + project YAML files.""" + + def __init__(self) -> None: + self._modes: Dict[str, Mode] = {} + + # ----- public --------------------------------------------------- + def load(self, workspace_path: Optional[Path] = None) -> int: + count = 0 + count += self._load_file(USER_MODES_FILE, source="user") + if workspace_path is not None: + count += self._load_file(workspace_path / PROJECT_MODES_REL, source="project") + return count + + def register(self, mode: Mode) -> None: + self._modes[mode.slug] = mode + + def get(self, slug: str) -> Optional[Mode]: + return self._modes.get(slug) + + def all(self) -> List[Mode]: + return list(self._modes.values()) + + def listing(self) -> List[Dict[str, str]]: + return [ + { + "slug": m.slug, + "name": m.name, + "description": m.description, + "source": m.source, + } + for m in self._modes.values() + ] + + # ----- loading -------------------------------------------------- + def _load_file(self, path: Path, *, source: str) -> int: + if not path.exists(): + return 0 + try: + data = _load_yaml_or_json(path.read_text(encoding="utf-8")) + except Exception as e: + logger.warning("could not parse modes file %s: %s", path, e) + return 0 + modes = data.get("customModes") if isinstance(data, dict) else None + if not isinstance(modes, list): + return 0 + count = 0 + for entry in modes: + if not isinstance(entry, dict): + continue + slug = entry.get("slug") + if not slug: + continue + mode = _build_mode(entry, source=source) + self._modes[slug] = mode # project loaded second, wins + count += 1 + return count + + +def _build_mode(entry: Dict[str, Any], *, source: str) -> Mode: + mcp_servers: Dict[str, ModeMCPServer] = {} + raw_servers = entry.get("mcpServers") or {} + if isinstance(raw_servers, dict): + for name, cfg in raw_servers.items(): + if not isinstance(cfg, dict): + continue + mcp_servers[name] = ModeMCPServer( + name=name, + command=cfg.get("command"), + args=list(cfg.get("args", [])), + env={k: _expand_env(v) for k, v in (cfg.get("env") or {}).items()}, + url=cfg.get("url"), + http_url=cfg.get("httpURL") or cfg.get("http_url"), + headers={**(cfg.get("headers") or {})}, + always_allow=list(cfg.get("alwaysAllow", [])), + enabled_tools=list(cfg.get("enabledTools", [])), + ) + return Mode( + slug=str(entry["slug"]), + name=str(entry.get("name", entry["slug"])), + description=str(entry.get("description", "")), + role_definition=str(entry.get("roleDefinition", "")), + when_to_use=str(entry.get("whenToUse", "")), + custom_instructions=str(entry.get("customInstructions", "")), + groups=list(entry.get("groups", [])), + mcp_servers=mcp_servers, + source=source, + ) + + +# ---------------------------------------------------------------------- +# Session lifecycle helper +# ---------------------------------------------------------------------- + +@dataclass +class ActiveModeContext: + """Bundle of artefacts derived from the active mode for a session. + + Returned by :func:`activate_mode` so the caller can: + + * inject ``system_prompt_block`` into the agent system prompt + * pass ``tool_policy`` to the executor + * spin up the MCP servers listed in ``mcp_server_configs`` + (each dict is ready for :class:`gitpilot.mcp_client.MCPServerConfig.from_dict`) + """ + + mode: Mode + system_prompt_block: str + tool_policy: ToolPolicy + mcp_server_configs: List[Dict[str, Any]] + extra_mcp_toggles: List[Tuple[str, List[str], List[str]]] # (server, allow, alwaysAllow) + + +def activate_mode(registry: ModeRegistry, slug: str) -> Optional[ActiveModeContext]: + """Resolve a mode by slug and return the bundle to apply. + + Returns ``None`` for an unknown slug β€” callers should fall back to + the legacy unconfigured behaviour. + """ + mode = registry.get(slug) + if mode is None: + return None + server_configs = [s.to_mcp_client_dict() for s in mode.mcp_servers.values()] + extras = [ + (s.name, list(s.enabled_tools), list(s.always_allow)) + for s in mode.mcp_servers.values() + ] + return ActiveModeContext( + mode=mode, + system_prompt_block=mode.system_prompt_block(), + tool_policy=mode.tool_policy(), + mcp_server_configs=server_configs, + extra_mcp_toggles=extras, + ) + + +# ---------------------------------------------------------------------- +# Minimal YAML loader (no PyYAML dependency) +# ---------------------------------------------------------------------- + +def _expand_env(value: Any) -> str: + if isinstance(value, str): + return os.path.expandvars(value) + return str(value) + + +def _load_yaml_or_json(text: str) -> Dict[str, Any]: + """Parse YAML or JSON text. Prefers ``yaml`` when installed. + + Falls back to ``json`` for ``.yaml`` files that happen to be JSON + and to a tiny in-tree YAML subset otherwise. The subset supports + the shape used by ``modes.yaml``: nested mappings, lists, and + folded/block scalars. + """ + try: + import yaml + + loaded = yaml.safe_load(text) + if isinstance(loaded, dict): + return loaded + return {} + except ImportError: + pass + # Fast path: JSON masquerading as YAML. + stripped = text.strip() + if stripped.startswith("{"): + try: + parsed_json = json.loads(stripped) + if isinstance(parsed_json, dict): + return parsed_json + except Exception: + pass + return _tiny_yaml(text) + + +# --- in-tree minimal YAML parser --------------------------------------- +# Supports: scalars, lists ("- foo"), nested maps via indentation, block +# scalars ("|" and ">-"), and inline ``{a: 1, b: 2}`` / ``[a, b]`` flows. +# Sufficient for ``modes.yaml`` examples shipped with GitPilot. + +_BLOCK_SCALAR_RE = re.compile(r"^(?P[^:#\s][^:]*):\s*(?P