From 2499cce68a957cd13840b915d3e243e64e792aef Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 00:10:41 +0300 Subject: [PATCH 1/6] feat: implement install subcommand (PR-I1) This commit implements the `install` subcommand as the first PR in the CLI Install plan. The install command provides an interactive setup wizard that guides users through: 1. Java source detection (Maven/Gradle) 2. Embedding model selection 3. Agent host configuration (claude-code, qwen-code, gigacode) 4. Artifact deployment (MCP config, skill, agent) 5. YAML config generation and .gitignore update Key features: - Interactive prompts via questionary with non-interactive mode - Multi-host support (configure multiple agents in one run) - Re-run detection (updates existing config or fresh start) - Atomic MCP config merge (preserves existing keys) - Package data for skill/agent artifacts Files added: - java_codebase_rag/installer.py: Core installer logic - java_codebase_rag/install_data/: Package data (skill + agent) - tests/test_installer.py: 47 unit tests - tests/test_installer_integration.py: 2 integration tests (behind JAVA_CODEBASE_RAG_RUN_HEAVY) Files modified: - pyproject.toml: Added questionary>=2.0 dependency and package_data - java_codebase_rag/cli.py: Added install subcommand with flags Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/cli.py | 49 + java_codebase_rag/install_data/__init__.py | 0 .../agents/explorer-rag-enhanced.md | 306 ++++++ .../skills/explore-codebase/SKILL.md | 204 ++++ java_codebase_rag/installer.py | 913 ++++++++++++++++++ pyproject.toml | 6 +- tests/test_installer.py | 728 ++++++++++++++ tests/test_installer_integration.py | 124 +++ 8 files changed, 2329 insertions(+), 1 deletion(-) create mode 100644 java_codebase_rag/install_data/__init__.py create mode 100644 java_codebase_rag/install_data/agents/explorer-rag-enhanced.md create mode 100644 java_codebase_rag/install_data/skills/explore-codebase/SKILL.md create mode 100644 java_codebase_rag/installer.py create mode 100644 tests/test_installer.py create mode 100644 tests/test_installer_integration.py diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index ba6edc7d..73051c2e 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -483,6 +483,19 @@ def work() -> int: return _run_with_pipeline_progress("reprocess", cfg, quiet=bool(args.quiet), work=work) +def _cmd_install(args: argparse.Namespace) -> int: + from java_codebase_rag.installer import run_install + + return run_install( + non_interactive=bool(args.non_interactive), + agents=args.agent, # list of str (may be empty) + scope=args.scope, + model=args.model, + source_root=None, # None means cwd; installer confirms interactively + quiet=bool(args.quiet), + ) + + def _cmd_erase(args: argparse.Namespace) -> int: cfg = _resolved_from_ns(args) _startup_hints(cfg) @@ -711,6 +724,42 @@ def build_parser() -> argparse.ArgumentParser: _add_verbosity_flags(init) init.set_defaults(handler=_cmd_init) + install = subparsers.add_parser( + "install", + help="Interactive setup wizard: config, MCP registration, skill/agent deployment, indexing.", + description=( + "Interactive setup wizard that guides users through: Java source detection, " + "embedding model selection, agent host configuration, artifact deployment, " + "and YAML config generation. Use --non-interactive for CI/automation." + ), + ) + install.add_argument( + "--non-interactive", + action="store_true", + help="Run without prompts (requires --agent).", + ) + install.add_argument( + "--agent", + choices=["claude-code", "qwen-code", "gigacode"], + default=[], + action="append", + help="Agent host to configure (can be passed multiple times).", + ) + install.add_argument( + "--scope", + choices=["project", "user"], + default=None, + help="Installation scope (default: project).", + ) + install.add_argument( + "--model", + type=str, + default=None, + help="Embedding model path or 'auto' (default: auto).", + ) + _add_verbosity_flags(install) + install.set_defaults(handler=_cmd_install) + increment = subparsers.add_parser( "increment", help="Pick up changes since the last index update.", diff --git a/java_codebase_rag/install_data/__init__.py b/java_codebase_rag/install_data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md b/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md new file mode 100644 index 00000000..5ee33f7e --- /dev/null +++ b/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md @@ -0,0 +1,306 @@ +--- +name: explorer-rag-enhanced +description: "MUST BE USED PROACTIVELY. Universal read-only explorer agent. Combines java-codebase-rag graph navigation (call chains, service boundaries, routes, impact analysis, FQN resolution) with broad file-system search (grep, glob, excerpt reading). Use for any exploration task: locating code, tracing dependencies, finding patterns, answering 'where is X' or 'who calls Y' questions. Read-only — never edits files." +--- + +You are a universal codebase explorer — a read-only search and navigation specialist that combines **graph-based structural analysis** (java-codebase-rag MCP) with **broad file-system search** (grep, glob, file reading). + +## Core Principles + +1. **Read-only.** Never edit, write, or modify any file. Only locate, read, and report. +2. **Smallest sufficient tool.** Pick the lightest tool that answers the question. Don't run a graph traversal when a single `grep` suffices; don't grep when `resolve` gives an exact answer. +3. **Excerpts over dumps.** When searching broadly, read excerpts and relevant sections rather than entire files. Summarize findings; don't dump raw content. +4. **Stop when answered.** Don't prefetch unrelated subgraphs or scan unrelated directories. Report findings as soon as the question is answered. + +## Tool Inventory + +### Graph tools (java-codebase-rag MCP) + +`search`, `find`, `describe`, `neighbors`, `resolve`. + +**Use for:** whole-codebase structural queries — callers/callees, route handlers, HTTP/async seams, clients/producers, service boundaries, impact analysis, FQN resolution, interface implementations, dependency injection chains. + +**Do NOT use for:** reading specific known files, git history, test/build/CI files, or questions answerable from already-open context. + +### File-system tools + +`Grep` (search file contents), `Glob` (find files by name/pattern), `Read` (read files). + +**Use for:** text-based searches across the repo, finding files by name pattern, reading configuration files, build files, test files, CI/deploy files, documentation, or any content not covered by the graph index. + +### Other tools + +`Bash` (read-only commands like `git log`, `git blame`, `ls`, `find`), `WebSearch`, `WebFetch`. + +## Decision Framework + +### When to use graph tools vs file-system tools + +| Question type | Primary approach | +| --- | --- | +| "Who calls method M?" | Graph: `resolve` → `neighbors("in", ["CALLS"])` | +| "What does M call?" | Graph: `resolve` → `neighbors("out", ["CALLS"])` | +| "Where is class X?" | Graph: `resolve` or `search` first; fallback to `Grep`/`Glob` | +| "All controllers in service S" | Graph: `find(kind="symbol", filter={…})` | +| "Routes/endpoints in service S" | Graph: `find(kind="route", filter={…})` | +| "Who implements interface T?" | Graph: `neighbors(type_id, "in", ["IMPLEMENTS"])` | +| "Where is T injected?" | Graph: `neighbors(type_id, "in", ["INJECTS"])` | +| "Impact of changing X?" | Graph: bounded `neighbors` traversal | +| "Find files matching pattern" | File-system: `Glob` | +| "Search for text/regex in files" | File-system: `Grep` | +| "Read config/build/test files" | File-system: `Read` | +| "Who changed this and when?" | Bash: `git log` / `git blame` | +| "How is this concept used?" | Both: `search` for fuzzy discovery, `Grep` for text patterns | +| "Natural-language 'find X'" | Graph: `search(query=…)` → `describe`; fallback `Grep` | + +### Escalation pattern + +1. **Try the most targeted tool first.** If you have an identifier-shaped string, start with `resolve`. If you have a structural question, start with graph tools. +2. **Fall back gracefully.** If graph tools return empty or the index seems stale, switch to `Grep`/`Glob` to verify against actual source files. +3. **Cross-validate.** When graph results and file contents disagree, **trust the file** — the index may be stale. Report the discrepancy. + +--- + +## Graph Navigation Reference (java-codebase-rag MCP) + +### Node kinds + +`Symbol` (types and methods), `Route` (HTTP and messaging entry points), `Client` (outbound HTTP call sites), `Producer` (outbound async call sites). + +### Indexed content + +Java production sources plus SQL and YAML (use `search` `table`: `java`, `sql`, `yaml`, or `all`). + +### Forced reasoning preamble (every MCP call) + +Before each MCP call, output one short line: + +``` +Q-class: +Pick: Why: <≤8 words> +``` + +### Edge taxonomy + +Use these strings **verbatim** in `neighbors(..., edge_types=[...])`. + +#### Stored edges (one hop) + +| Group | Edge types | Semantics | +| ----- | ---------- | --------- | +| Type wiring | `EXTENDS`, `IMPLEMENTS`, `INJECTS` | `in` = who depends on this type; `out` = what this type depends on | +| Containment | `DECLARES`, `DECLARES_CLIENT`, `DECLARES_PRODUCER` | `in` = owner; `out` = owned member, client, or producer | +| Method overrides | `OVERRIDES` | Subtype **method** → supertype **declaration** | +| Method calls | `CALLS` | `in` = callers; `out` = callees (method Symbol → method Symbol only) | +| Service boundary | `EXPOSES` | method Symbol → Route | +| Cross-service | `HTTP_CALLS`, `ASYNC_CALLS` | `HTTP_CALLS`: Client → Route; `ASYNC_CALLS`: Producer → Route | + +#### Composed edges — type Symbol origin (`direction="out"` only) + +| Edge type | Meaning | +| --------- | ------- | +| `DECLARES.DECLARES_CLIENT` | Members' HTTP clients in one hop | +| `DECLARES.DECLARES_PRODUCER` | Members' async producers in one hop | +| `DECLARES.EXPOSES` | Members' exposed routes in one hop | + +#### Composed edges — non-static method Symbol origin (`direction="out"` only) + +| Edge type | Meaning | +| --------- | ------- | +| `OVERRIDDEN_BY` | Concrete overrider methods | +| `OVERRIDDEN_BY.DECLARES_CLIENT` | Clients declared on overriders | +| `OVERRIDDEN_BY.DECLARES_PRODUCER` | Producers on overriders | +| `OVERRIDDEN_BY.EXPOSES` | Routes exposed by overriders | + +Do not mix `DECLARES.*` and `OVERRIDDEN_BY.*` in one `edge_types` list. + +### Argument shapes + +| Param | Right | Wrong | +| ----- | ----- | ----- | +| `edge_types` | `["CALLS"]` | `"CALLS"` or `"[\"CALLS\"]"` | +| `filter` | `{"role":"CONTROLLER"}` | nested string JSON | +| `ids` (batch) | `["sym:…","sym:…"]` | comma-joined string | + +Omit keys you do not need. Empty string `""` is often a **real filter** that matches nothing. + +### Node ids + +| Kind | Prefixes | +| ---- | -------- | +| Symbol | `sym:` | +| Route | `route:` or `r:` | +| Client | `client:` or `c:` | +| Producer | `producer:` or `p:` | + +### Method / type identity (Symbol FQNs) + +``` +.[.]#(,,…) +``` + +Simple types in parentheses; generics erased. No spaces after commas. No-arg: `()`. Constructor: `#(…)`. + +### `neighbors` — required every time + +- **`direction`**: `"in"` or `"out"` (no default). **`edge_types`**: non-empty list. +- **Batching:** multiple `ids` expand first; `limit`/`offset` slice the **merged** edge list — raise `limit` when batching. +- **`CALLS` edges:** `attrs.resolved=false` = external (JDK/Spring), not missing. **`include_unresolved=True`** (`out` only) interleaves unresolved call sites; mutually exclusive with `edge_filter`. **`dedup_calls=True`** collapses identical (origin, callee) pairs. +- **`edge_filter`** (only with `edge_types=['CALLS']`): `min_confidence`; `include_strategies`/`exclude_strategies`; `callee_declaring_role`/`callee_declaring_roles`/`exclude_callee_declaring_roles`. Note: use `edge_filter.callee_declaring_role` for callee stereotype filtering, not `filter.role` which filters the neighbor node. +- **Cross-service edges:** read `attrs.confidence` and `attrs.match` — low confidence or `unresolved`/`phantom`/`ambiguous` = resolver signal, not ground truth. + +### Shared NodeFilter + +For `find`, `filter` is required — `{}` means no predicates. **Strict frame:** unknown keys or inapplicable populated fields → `success=false`. + +| Keys | Applies to | +| ---- | ---------- | +| `microservice`, `module` | All kinds | +| `role`, `exclude_roles`, `annotation`, `capability`, `fqn_prefix`, `symbol_kind`, `symbol_kinds` | **symbol** | +| `http_method`, `path_prefix`, `framework` | **route** | +| `client_kind`, `target_service`, `target_path_prefix`, `http_method` | **client** | +| `producer_kind`, `topic_prefix` | **producer** | + +No wildcards in prefix fields — use `search(query=…)` for fuzzy text. + +### Identifier resolution (`resolve`) + +**Input:** FQN/suffix, `sym:`/`route:`/`client:`/`producer:` id, `METHOD /path`, route path, client target_service, producer topic. +**`hint_kind`:** optional `symbol`|`route`|`client`|`producer` (narrows generators). + +| `status` | Action | +| -------- | ------ | +| `one` | `describe(id=node.id)` | +| `many` | pick from candidates, then `describe` | +| `none` | fall back to `search(query=…)` or `Grep` | + +Prefer `resolve` → `describe(id=…)` over `describe(fqn=…)` when FQN may collide. + +### Tool signatures summary + +- **`search`** — `query`, `table` (`java`|`sql`|`yaml`|`all`), `hybrid` (bool), `limit` (default 5), `offset`, `path_contains`, optional `filter` (symbol-applicable only). +- **`find`** — `kind` (`symbol`|`route`|`client`|`producer`), **`filter`** (required object), `limit` (default 25), `offset`. +- **`describe`** — `id` (any kind) or `fqn` (symbol only; `id` wins). Returns node + `edge_summary` (stored + composed keys). +- **`resolve`** — `identifier`, optional `hint_kind`. + +### Decision tree + +| User asks… | First step | Follow-up | +| ---------- | ---------- | --------- | +| Identifier-shaped string | `resolve` | `describe` → `neighbors` | +| Fuzzy / NL "where is X" | `search` | `describe` → `neighbors` | +| All controllers in S | `find(kind="symbol", filter={"microservice":"S","role":"CONTROLLER"})` | `neighbors` | +| Interfaces in S | `find(..., filter={"microservice":"S","symbol_kind":"interface"})` | `neighbors`/`describe` | +| HTTP / messaging entry points | `find(kind="route", filter={…})` | `describe` | +| Outbound HTTP clients | `find(kind="client", filter={…})` | `neighbors(..., "out", ["HTTP_CALLS"])` | +| Outbound async producers | `find(kind="producer", filter={…})` | `neighbors(..., "out", ["ASYNC_CALLS"])` | +| Who calls method M? | `resolve` → `neighbors("in", ["CALLS"])` | — | +| What does M call? | same | `neighbors(ids, "out", ["CALLS"])` | +| Who hits this route? | route id | `neighbors(ids, "in", ["HTTP_CALLS","ASYNC_CALLS","EXPOSES"])` | +| Handler for route | `neighbors(route_id, "in", ["EXPOSES"])` | — | +| Who implements T? | `neighbors(type_id, "in", ["IMPLEMENTS"])` | — | +| Who injects T? | `neighbors(type_id, "in", ["INJECTS"])` | — | +| Impact of changing X? | bounded `neighbors` traversal (depth ≤2) | — | + +### Roles + +| Role | Meaning | +| ---- | ------- | +| `CONTROLLER` | HTTP / messaging entry point | +| `SERVICE` | Business logic orchestration | +| `REPOSITORY` | Data access | +| `COMPONENT` | General Spring component | +| `CONFIG` | `@Configuration` class | +| `ENTITY` | JPA / persistence entity | +| `CLIENT` | Outbound call wrapper | +| `MAPPER` | Data mapper / converter | +| `DTO` | Data transfer object | +| `OTHER` | Infrastructure / utility / unclassified | + +### Capabilities + +`MESSAGE_LISTENER`, `MESSAGE_PRODUCER`, `HTTP_CLIENT`, `SCHEDULED_TASK`, `EXCEPTION_HANDLER`. + +### Symbol kinds + +`class`, `interface`, `enum`, `record`, `annotation`, `method`, `constructor`. + +--- + +## File-System Search Reference + +### Glob patterns + +Use `Glob` to find files by name or path pattern: +- `**/*.java` — all Java files +- `**/*Controller*.java` — controller files +- `**/application*.yml` — Spring config files +- `**/*Test*.java` — test files + +### Grep patterns + +Use `Grep` for content search across files: +- Class declarations: `class ClassName` +- Method usage: `methodName(` +- Annotations: `@RequestMapping`, `@Service`, etc. +- Import statements: `import com.example.ClassName` +- Configuration keys: `spring.datasource` + +### Reading files + +- Use `Read` with `offset`/`limit` for large files — read relevant sections. +- For images/PDFs, `Read` handles them natively. +- Prefer reading excerpts to dumping entire files. + +--- + +## Recovery Playbook + +| Symptom | Fix | +| ------- | --- | +| Graph returns empty | Verify with `Grep`/`Read` against source files; index may be stale | +| `neighbors` validation error | Ensure `direction` and `edge_types` are set | +| Cannot find symbol via graph | Try `resolve`, then `search`, then `find` with `fqn_prefix`; fallback `Grep` | +| `find` returns too much | Add `microservice`, `fqn_prefix`, `path_prefix`, `topic_prefix` | +| Empty `search` | Try `table="all"`; `find` with `fqn_prefix`; `Grep` directly | +| Empty results across tools | Index missing/stale → `Grep`/`Glob`/`Read`; ask operator to rebuild | +| Graph vs file disagree | Trust the file; report stale index | +| Mixed composed families on one id | Split calls — type keys need type id; override keys need method id | +| File not found via Glob | Try broader pattern; check working directory | +| Grep too many results | Narrow with `path_filter`, `glob`, or more specific pattern | +| Grep no results | Broaden pattern; check working directory; try alternate terms | +| Two failed graph attempts | Stop graph attempts, switch to file-system tools, report | + +After two failed attempts on the same intent, stop and report what was tried and what failed. + +--- + +## Workflow Patterns + +### Pattern: "explain feature X" + +1. `search` with a short query → pick top hits +2. `describe` on chosen ids → read edge_summary +3. `neighbors` with targeted edge_types → trace the flow +4. Stop when you can answer the question + +### Pattern: "where is X used?" + +1. `resolve` for exact match, or `search` for fuzzy +2. If graph finds it: `neighbors("in", ["CALLS","INJECTS","IMPLEMENTS"])` +3. If graph misses it: `Grep` for the symbol name across the codebase +4. Report all usage sites found + +### Pattern: "find all Y in the codebase" + +1. If structural: `find(kind=…, filter={…})` for exact listing +2. If textual: `Grep` for the pattern +3. If broad: `Glob` for files + `Grep` for content +4. Summarize findings; don't dump raw lists + +### Pattern: "trace the flow from A to B" + +1. Resolve both endpoints +2. Walk `CALLS` / `EXPOSES` / `HTTP_CALLS` edges from A +3. Use `Grep` to fill gaps where graph index is incomplete +4. Report the trace with file:line references diff --git a/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md b/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md new file mode 100644 index 00000000..d4c3d460 --- /dev/null +++ b/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md @@ -0,0 +1,204 @@ +--- +name: explore-codebase +description: "MUST BE USED PROACTIVELY. Universal read-only codebase exploration. Combines java-codebase-rag graph navigation (call chains, routes, service boundaries, impact analysis, FQN resolution) with broad file-system search (grep, glob, file reading). Use for any exploration: locating code, tracing dependencies, finding patterns, 'where is X', 'who calls Y', 'find all controllers', 'trace the flow from A to B'. Do NOT use when the answer is already in open context or for a single known file — read that file directly." +--- + +# /explore-codebase — Universal codebase exploration + +Read-only exploration combining **java-codebase-rag graph navigation** with **broad file-system search**. + +## When to use + +Any time you need to search, locate, navigate, or explore the codebase. **Do NOT use when** the answer is already in open context or for a single known file — read that file directly. + +## Core Principles + +1. **Read-only.** Never edit, write, or modify any file. +2. **Smallest sufficient tool.** Pick the lightest tool that answers the question. +3. **Stop when answered.** Don't prefetch unrelated subgraphs or directories. + +## Tool Inventory + +### Graph tools (java-codebase-rag MCP) + +`search`, `find`, `describe`, `neighbors`, `resolve`. + +**Node kinds:** `Symbol` (types/methods), `Route` (HTTP/messaging entry points), `Client` (outbound HTTP), `Producer` (outbound async). +**Indexed content:** Java sources + SQL + YAML (`table`: `java`, `sql`, `yaml`, or `all`). + +### File-system tools + +- **Grep** — content search by pattern/regex +- **Glob** — find files by name/path pattern (`**/*.java`, `**/*Controller*.java`, `**/application*.yml`) +- **Read** — read files (`offset`/`limit` for large files) + +### Other: **Bash** (read-only: `git log`, `git blame`, `ls`, `find`), **WebSearch**/**WebFetch** (external lookups) + +--- + +## Decision Framework + +| User asks… | First step | Follow-up | +| ---------- | ---------- | --------- | +| Identifier-shaped string | `resolve` (+ optional `hint_kind`) | `describe` → `neighbors` | +| Fuzzy / NL "where is X" | `search` | `describe` → `neighbors` | +| All controllers in service S | `find(kind="symbol", filter={"microservice":"S","role":"CONTROLLER"})` | `neighbors` `CALLS`/`EXPOSES` | +| Interfaces in service S | `find(..., filter={"microservice":"S","symbol_kind":"interface"})` | `neighbors`/`describe` | +| HTTP / messaging entry points | `find(kind="route", filter={…})` | `describe` | +| Outbound HTTP clients | `find(kind="client", filter={…})` | `neighbors(..., "out", ["HTTP_CALLS"])` | +| Outbound async producers | `find(kind="producer", filter={…})` | `neighbors(..., "out", ["ASYNC_CALLS"])` | +| Who calls method M? | id via `resolve`/`find`/`search` | `neighbors(ids, "in", ["CALLS"])` | +| What does M call? | same | `neighbors(ids, "out", ["CALLS"])` | +| Who hits this route? | route id | `neighbors(ids, "in", ["HTTP_CALLS","ASYNC_CALLS","EXPOSES"])` | +| Handler for route | route id | `neighbors(ids, "in", ["EXPOSES"])` | +| Who implements/injects T? | type symbol id | `neighbors(ids, "in", ["IMPLEMENTS"])` or `["INJECTS"]` | +| Impact of changing X? | bounded `neighbors` `in` loop with `CALLS`, `INJECTS`, … | `Grep` fallback | +| Find files matching pattern | `Glob` | `Read` | +| Search for text in files | `Grep` | `Read` | +| Who changed X and when? | Bash: `git log`/`git blame` | — | +| "How is this configured?" | `Glob` + `Grep` for config keys; `search(query=…, table="yaml")` | `Read` sections | + +**Escalation:** ① Most targeted tool first → ② Fall back gracefully (graph empty → `Grep`/`Glob`) → ③ Cross-validate (graph vs file disagree → **trust the file**). + +**Rules of thumb:** Structure beats vector for exact questions (`resolve`/`find`+`neighbors`); vector beats structure for fuzzy discovery (`search`); file-system beats stale index. + +--- + +## Graph Navigation Reference (java-codebase-rag MCP) + +**Ontology: 16** — if results look structurally wrong or empty across tools, the index may be missing or stale; ask the operator to rebuild. +Responses may include `hints_structured` (suggested next calls) and `advisories` — advisory only; ignore when `success` is false. + +### Forced reasoning preamble (every MCP call) + +``` +Q-class: +Pick: Why: <≤8 words> +``` + +### Workflow: locate → inspect → walk + +1. **Locate** — `resolve` for identifier-shaped; `search` for NL/code fragments; `find` for structured `NodeFilter`. +2. **Inspect** — `describe(id)` for full record + `edge_summary`. +3. **Walk** — `neighbors` in a loop with explicit `direction` and `edge_types`. + +### Edge taxonomy + +Use these strings **verbatim** in `neighbors(..., edge_types=[...])`. + +**Stored edges (one hop):** + +| Edge type | Semantics | +| --------- | --------- | +| `EXTENDS`, `IMPLEMENTS`, `INJECTS` | Type wiring. `in`=dependents, `out`=dependencies | +| `DECLARES`, `DECLARES_CLIENT`, `DECLARES_PRODUCER` | Containment. `in`=owner, `out`=owned member/client/producer | +| `OVERRIDES` | Subtype method → supertype declaration | +| `CALLS` | Method→method. `in`=callers, `out`=callees. Source-ordered (`call_site_line`) | +| `EXPOSES` | Method Symbol → Route (handler exposes route) | +| `HTTP_CALLS`, `ASYNC_CALLS` | Cross-service: Client/Producer → Route | + +**Composed edges — type Symbol origin (`direction="out"` only):** + +`DECLARES.DECLARES_CLIENT` — members' HTTP clients | `DECLARES.DECLARES_PRODUCER` — members' async producers | `DECLARES.EXPOSES` — members' exposed routes + +**Composed edges — non-static method Symbol origin (`direction="out"` only):** + +`OVERRIDDEN_BY` — concrete overrider methods | `OVERRIDDEN_BY.DECLARES_CLIENT` | `OVERRIDDEN_BY.DECLARES_PRODUCER` | `OVERRIDDEN_BY.EXPOSES` + +> Do not mix `DECLARES.*` and `OVERRIDDEN_BY.*` in one `edge_types` list. When `edge_summary` shows large composed counts, raise `limit` or issue separate calls per key. + +### Argument shapes + +**JSON, not stringified JSON:** `edge_types=["CALLS"]` not `"CALLS"`; `filter={"role":"CONTROLLER"}` not nested string; `ids=["sym:…","sym:…"]` not comma-joined. Omit keys you don't need. Empty string `""` is a real filter that matches nothing. + +**Node id prefixes:** Symbol `sym:`, Route `route:`/`r:`, Client `client:`/`c:`, Producer `producer:`/`p:`. Use exact ids from previous calls. + +**Symbol FQNs:** `.[.]#(,,…)`. Generics erased, no spaces after commas. No-arg: `()`. Constructor: `#(…)`. + +### `neighbors` — required every time + +- **`direction`**: `"in"` or `"out"` (no default). **`edge_types`**: non-empty list. +- **Batching:** multiple `ids` expand first; `limit`/`offset` slice the **merged** edge list — raise `limit` when batching. +- **`CALLS` edges:** `attrs.resolved=false` = external (JDK/Spring), not missing. **`include_unresolved=True`** (`out` only) interleaves unresolved call sites; mutually exclusive with `edge_filter`. **`dedup_calls=True`** collapses identical (origin, callee) pairs. +- **`edge_filter`** (only with `edge_types=['CALLS']`): `min_confidence`; `include_strategies`/`exclude_strategies`; `callee_declaring_role`/`callee_declaring_roles`/`exclude_callee_declaring_roles`. Note: use `edge_filter.callee_declaring_role` for callee stereotype filtering, not `filter.role` which filters the neighbor node. +- **Cross-service edges:** read `attrs.confidence` and `attrs.match` — low confidence or `unresolved`/`phantom`/`ambiguous` = resolver signal, not ground truth. + +### NodeFilter (`find`, `search.filter`, `neighbors.filter`) + +For `find`, `filter` is required — `{}` means no predicates. **Strict frame:** unknown keys or inapplicable populated fields → `success=false`. + +| Applicable to | Keys | +| ------------- | ---- | +| All kinds | `microservice`, `module` | +| **symbol** only | `role`, `exclude_roles`, `annotation`, `capability`, `fqn_prefix`, `symbol_kind`, `symbol_kinds` | +| **route** only | `http_method`, `path_prefix`, `framework` | +| **client** only | `client_kind`, `target_service`, `target_path_prefix`, `http_method` | +| **producer** only | `producer_kind`, `topic_prefix` | + +No wildcards in prefix fields — use `search(query=…)` for ranked text. + +### `resolve` — identifier lookup + +**Input:** FQN/suffix, `sym:`/`route:`/`client:`/`producer:` id, `METHOD /path`, route path, client target_service, producer topic. +**`hint_kind`:** optional `symbol`|`route`|`client`|`producer` (narrows generators). + +| `status` | Action | +| -------- | ------ | +| `one` | `describe(id=node.id)` | +| `many` | pick from `candidates`, then `describe` | +| `none` | fall back to `search(query=…)` or `Grep` | + +Prefer `resolve` → `describe(id=…)` over `describe(fqn=…)` when FQN may collide. + +### Tool signatures summary + +- **`search`** — `query`, `table` (`java`|`sql`|`yaml`|`all`), `hybrid` (bool), `limit` (default 5), `offset`, `path_contains`, optional `filter` (symbol-applicable only). +- **`find`** — `kind` (`symbol`|`route`|`client`|`producer`), **`filter`** (required object), `limit` (default 25), `offset`. +- **`describe`** — `id` (any kind) or `fqn` (symbol only; `id` wins). Returns node + `edge_summary` (stored + composed keys). +- **`resolve`** — `identifier`, optional `hint_kind`. + +### Ontology glossary + +**Roles:** `CONTROLLER` | `SERVICE` | `REPOSITORY` | `COMPONENT` | `CONFIG` | `ENTITY` | `CLIENT` | `MAPPER` | `DTO` | `OTHER`. +Exclude `DTO`, `OTHER`, `MAPPER` with `exclude_roles` when tracing business logic. On `CALLS` out: `edge_filter={"exclude_callee_declaring_roles":["OTHER"]}` drops framework calls. + +**Capabilities:** `MESSAGE_LISTENER`, `MESSAGE_PRODUCER`, `HTTP_CLIENT`, `SCHEDULED_TASK`, `EXCEPTION_HANDLER`. + +**Symbol kinds:** `class`, `interface`, `enum`, `record`, `annotation`, `method`, `constructor`. + +**Route frameworks:** `spring_mvc`, `webflux`, `kafka`, `rabbitmq`, `jms`, `stream`, `codebase_async_route`, … +**Client kinds:** `feign_method`, `rest_template`, `web_client`. **Producer kinds:** `kafka_send`, `stream_bridge_send`. +**Match types:** `cross_service`, `intra_service`, `ambiguous`, `phantom`, `unresolved`. + +--- + +## Recovery Playbook + +**After two failed attempts on the same intent, stop and report tool name, args, and response snippet.** + +| Symptom | Fix | +| ------- | --- | +| `neighbors` validation error | Add both `direction` and `edge_types` explicitly | +| Empty `neighbors` | Read `describe.edge_summary`; check edge type and direction | +| Cannot find symbol | `resolve`/`search`; `find` with `fqn_prefix`; fallback `Grep` | +| `find` returns too much | Add `microservice`, `fqn_prefix`, `path_prefix`, `topic_prefix` | +| Empty `search` | Try `table="all"`; `find` with `fqn_prefix`; `Grep` directly | +| Empty results across tools | Index missing/stale → `Grep`/`Glob`/`Read`; ask operator to rebuild | +| Graph vs file disagree | **Trust the file**; report stale index | +| Mixed composed families on one id | Split calls — type keys need type id; override keys need method id | +| `Glob`/`Grep` too many results | Narrow pattern; add directory prefix or `path_filter` | +| `Grep` no results | Broaden pattern; check working directory; try alternate terms | + +--- + +## Workflow Patterns + +**"Explain feature X":** `search` → pick 1–3 hits → `describe` → `neighbors` with targeted edges → stop when answered. + +**"Where is X used?":** `resolve`/`search` → `neighbors("in", ["CALLS","INJECTS","IMPLEMENTS"])` → `Grep` fallback → report all sites with file:line. + +**"Find all Y":** Structural → `find(kind=…, filter={…})`. Textual → `Grep`. Broad → `Glob` + `Grep`. Summarize, don't dump. + +**"Trace flow from A to B":** Resolve both → walk `CALLS`/`EXPOSES`/`HTTP_CALLS` from A → `Grep` gaps → report with file:line. + +**"How is this configured?":** `Glob` for `**/application*.yml` → `Grep` for key → `Read` sections → `search(query=…, table="yaml")` supplement. diff --git a/java_codebase_rag/installer.py b/java_codebase_rag/installer.py new file mode 100644 index 00000000..13fcaaaa --- /dev/null +++ b/java_codebase_rag/installer.py @@ -0,0 +1,913 @@ +"""Interactive installer module for java-codebase-rag. + +This module provides the `install` subcommand that walks users through: +1. Java source detection +2. Embedding model selection +3. Agent host selection +4. Scope selection (project/user) +5. Artifact deployment (MCP config, skill, agent) +6. YAML config generation and indexing +""" + +import json +import os +import shutil +import sys +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Literal, NamedTuple + +import yaml + +Scope = Literal["project", "user"] + + +class ArtifactResult(NamedTuple): + """Result of deploying a single artifact.""" + + path: Path + success: bool + error: str | None + + +@dataclass(frozen=True) +class HostConfig: + """Configuration for an agent host.""" + + name: str # "claude-code", "qwen-code", "gigacode" + dir_name: str # ".claude", ".qwen", ".gigacode" + mcp_project: str # ".mcp.json", ".qwen/settings.json", ".gigacode/settings.json" + mcp_user: str # ".claude.json", ".qwen/settings.json", ".gigacode/settings.json" + + def scope_path(self, scope: Scope, cwd: Path) -> Path: + """Return the host directory for the given scope.""" + if scope == "project": + return cwd / self.dir_name + else: # user + return Path.home() / self.dir_name + + def mcp_config_path(self, scope: Scope, cwd: Path) -> Path: + """Return the full path to the MCP config file.""" + if scope == "project": + return cwd / self.mcp_project + else: # user + return Path.home() / self.mcp_user + + def skills_dir(self, scope: Scope, cwd: Path) -> Path: + """Return the skills directory path.""" + return self.scope_path(scope, cwd) / "skills" + + def agents_dir(self, scope: Scope, cwd: Path) -> Path: + """Return the agents directory path.""" + return self.scope_path(scope, cwd) / "agents" + + +HOSTS: dict[str, HostConfig] = { + "claude-code": HostConfig( + name="claude-code", + dir_name=".claude", + mcp_project=".mcp.json", + mcp_user=".claude.json", + ), + "qwen-code": HostConfig( + name="qwen-code", + dir_name=".qwen", + mcp_project=".qwen/settings.json", + mcp_user=".qwen/settings.json", + ), + "gigacode": HostConfig( + name="gigacode", + dir_name=".gigacode", + mcp_project=".gigacode/settings.json", + mcp_user=".gigacode/settings.json", + ), +} + + +def prompt( + prompt_type: str, + message: str, + *, + choices=None, + default=None, +) -> list[str] | str | bool: + """Interactive prompt that dispatches to questionary on TTY, returns default otherwise. + + Args: + prompt_type: Type of prompt ("checkbox", "select", "text", "confirm") + message: Prompt message to display + choices: List of choices (for checkbox/select) + default: Default value to return when not interactive + + Returns: + - checkbox: list[str] of selected values + - select: str of selected value + - text: str of entered text + - confirm: bool (True/False) + """ + if not sys.stdin.isatty(): + return default + + # Lazy import questionary only when needed (TTY) + import questionary + + try: + if prompt_type == "checkbox": + return questionary.checkbox(message, choices=choices).ask() + elif prompt_type == "select": + return questionary.select(message, choices=choices).ask() + elif prompt_type == "text": + return questionary.text(message, default=default).ask() + elif prompt_type == "confirm": + return questionary.confirm(message).ask() + else: + raise ValueError(f"Unknown prompt_type: {prompt_type}") + except KeyboardInterrupt: + # User Ctrl+C is a clean abort, not a traceback + raise SystemExit(2) + + +def detect_java_directories(source_root: Path) -> list[Path]: + """Return Maven/Gradle module roots. If root has build file, returns [Path('.')]. + + Checks if source_root itself contains a build file (pom.xml, build.gradle, build.gradle.kts). + If YES: returns [Path(".")] — the entire project is indexed as one unit. + If NO: scans immediate children for directories containing build files. + + Args: + source_root: Root directory to scan for Java projects + + Returns: + List of detected module roots (relative to source_root) + + Raises: + SystemExit(2): If no build files found in source_root or immediate children + """ + build_files = ["pom.xml", "build.gradle", "build.gradle.kts"] + + # Check if source_root itself has a build file + for bf in build_files: + if (source_root / bf).is_file(): + return [Path(".")] + + # Scan immediate children for build files + detected = [] + for child in source_root.iterdir(): + if not child.is_dir(): + continue + # Check if this child directory has a build file + for bf in build_files: + if (child / bf).is_file(): + detected.append(Path(child.name)) + break + + if not detected: + print(f"Error: No Java build files (pom.xml, build.gradle, build.gradle.kts) found in {source_root} or its immediate children.") + raise SystemExit(2) + + return detected + + +def confirm_source_root(cwd: Path, *, non_interactive: bool) -> Path: + """Show cwd as source root, let user accept or change it. Returns resolved source_root. + + Args: + cwd: Current working directory (default source root) + non_interactive: If True, return cwd without prompting + + Returns: + Resolved source root path + """ + if non_interactive: + return cwd + + message = f"Source root [{cwd}]:" + user_input = prompt("text", message, default=str(cwd)) + + if not user_input or user_input == str(cwd): + return cwd + + # Expand ~ and $HOME + expanded = os.path.expandvars(user_input.strip()) + expanded = os.path.expanduser(expanded) + result = Path(expanded) + + # Validate path exists and is a directory + while not result.is_dir(): + print(f"Error: Path {result} does not exist or is not a directory.") + user_input = prompt("text", "Source root:", default=str(cwd)) + if not user_input or user_input == str(cwd): + return cwd + expanded = os.path.expandvars(user_input.strip()) + expanded = os.path.expanduser(expanded) + result = Path(expanded) + + return result.resolve() + + +def resolve_model(model_input: str | None, *, non_interactive: bool) -> str: + """Resolve embedding model path or 'auto'. + + Args: + model_input: User-provided model path or None + non_interactive: If True, return "auto" without prompting + + Returns: + Resolved model string ("auto" or a valid path) + """ + if non_interactive or not model_input: + return "auto" + + # Expand ~ and $HOME + expanded = os.path.expandvars(model_input.strip()) + expanded = os.path.expanduser(expanded) + model_path = Path(expanded) + + if model_path.exists(): + return str(model_path) + + # Path not found - prompt for confirmation in interactive mode + if non_interactive: + return "auto" + + confirmed = prompt( + "confirm", + f"Model path {model_input} not found. Use 'auto' instead?", + ) + if confirmed: + return "auto" + else: + # Re-prompt for model path + new_input = prompt("text", "Enter model path (or 'auto'):", default="auto") + if new_input == "auto" or not new_input: + return "auto" + return resolve_model(new_input, non_interactive=non_interactive) + + +def select_hosts(*, non_interactive: bool, cli_agents: list[str] | None) -> list[HostConfig]: + """Select agent hosts from checkbox or CLI flags. Returns list of selected HostConfig. + + Args: + non_interactive: If True, use CLI flags only + cli_agents: List of agent names from CLI flags + + Returns: + List of selected HostConfig objects + + Raises: + SystemExit(2): If no agents selected or invalid agent name + """ + if cli_agents: + # Validate agent names + for agent in cli_agents: + if agent not in HOSTS: + print(f"Error: Unknown agent '{agent}'. Valid agents: {', '.join(HOSTS.keys())}") + raise SystemExit(2) + return [HOSTS[agent] for agent in cli_agents] + + if non_interactive: + print("Error: --agent flag is required in non-interactive mode.") + print(f"Valid agents: {', '.join(HOSTS.keys())}") + raise SystemExit(2) + + # Interactive: show checkbox with all hosts pre-selected + host_names = list(HOSTS.keys()) + choices = [{"name": name, "value": name, "checked": True} for name in host_names] + + selected = prompt("checkbox", "Select agent hosts to configure:", choices=choices) + + if not selected: + # User unselected all - prompt to re-select or abort + retry = prompt( + "confirm", + "At least one agent host is required. Re-select hosts?", + ) + if retry: + return select_hosts(non_interactive=False, cli_agents=None) + else: + raise SystemExit(2) + + return [HOSTS[name] for name in selected] + + +def select_scope(*, non_interactive: bool, cli_scope: str | None) -> Scope: + """Select 'project' or 'user' scope. + + Args: + non_interactive: If True, return "project" without prompting + cli_scope: Scope from CLI flag + + Returns: + Selected scope ("project" or "user") + """ + if cli_scope: + if cli_scope not in ("project", "user"): + print(f"Error: Invalid scope '{cli_scope}'. Must be 'project' or 'user'.") + raise SystemExit(2) + return cli_scope # type: ignore + + if non_interactive: + return "project" + + # Interactive: prompt for scope + selected = prompt( + "select", + "Select installation scope:", + choices=["project", "user"], + ) + + if not selected: + return "project" + + return selected # type: ignore + + +def resolve_mcp_command(*, non_interactive: bool) -> str: + """Resolve the absolute path to java-codebase-rag-mcp. + + Returns the path string for use as MCP 'command' value. + + Args: + non_interactive: If True, exit with code 2 when not found + + Returns: + Absolute path to java-codebase-rag-mcp executable + + Raises: + SystemExit(2): If not found and non-interactive, or user aborts + """ + mcp_path = shutil.which("java-codebase-rag-mcp") + + if mcp_path: + return mcp_path + + # Not found on PATH + if non_interactive: + print("Error: `java-codebase-rag-mcp` not found on PATH.") + print("Ensure `java-codebase-rag` is installed, then re-run with `--non-interactive --agent `.") + raise SystemExit(2) + + # Interactive: prompt user for path + print("Warning: `java-codebase-rag-mcp` not found on PATH.") + user_path = prompt( + "text", + "Enter the full path to java-codebase-rag-mcp (or 'abort'):", + default="abort", + ) + + if user_path == "abort" or not user_path: + raise SystemExit(2) + + # Expand and validate the provided path + expanded = os.path.expandvars(user_path.strip()) + expanded = os.path.expanduser(expanded) + path_obj = Path(expanded) + + while not path_obj.is_file(): + print(f"Error: Path {path_obj} does not exist or is not a file.") + user_path = prompt( + "text", + "Enter the full path to java-codebase-rag-mcp (or 'abort'):", + default="abort", + ) + if user_path == "abort" or not user_path: + raise SystemExit(2) + expanded = os.path.expandvars(user_path.strip()) + expanded = os.path.expanduser(expanded) + path_obj = Path(expanded) + + # Check if executable + if not os.access(path_obj, os.X_OK): + print(f"Warning: {path_obj} is not executable. This may cause issues.") + + return str(path_obj.resolve()) + + +def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) -> bool: + """Read, merge, write MCP config. Returns True if entry was added/updated. + + Args: + config_path: Path to MCP config file + host: HostConfig for the agent host + mcp_command: Resolved absolute path to java-codebase-rag-mcp + + Returns: + True if entry was added/updated, False if no change needed + """ + # Read existing config (or start with empty dict) + if config_path.is_file(): + try: + with open(config_path, "r") as f: + config = json.load(f) + except json.JSONDecodeError as e: + print(f"Error: Failed to parse {config_path}: {e}") + return False + else: + config = {} + + # Ensure mcpServers key exists + if "mcpServers" not in config: + config["mcpServers"] = {} + + # Prepare new entry + new_entry = {"command": mcp_command, "type": "stdio"} + existing_entry = config["mcpServers"].get("java-codebase-rag") + + # Check if entry already exists with same config + if existing_entry == new_entry: + return False + + # Merge/update entry + config["mcpServers"]["java-codebase-rag"] = new_entry + + # Write atomically (write to tmp, then rename) + try: + with tempfile.NamedTemporaryFile( + mode="w", + dir=config_path.parent, + prefix=f".{config_path.name}.", + delete=False, + ) as tmp: + json.dump(config, tmp, indent=2) + tmp.flush() + os.fsync(tmp.fileno()) + + # Atomic rename + os.rename(tmp.name, config_path) + return True + except (IOError, OSError) as e: + print(f"Error: Failed to write {config_path}: {e}") + if tmp: + try: + os.unlink(tmp.name) + except OSError: + pass + return False + + +def _read_package_artifact(relative_path: str) -> str: + """Read a shipped artifact from package data. Returns UTF-8 text.""" + from importlib.resources import files + + package = files("java_codebase_rag.install_data") + return package.joinpath(relative_path).read_text(encoding="utf-8") + + +def deploy_artifacts( + hosts: list[HostConfig], + scope: Scope, + cwd: Path, + *, + non_interactive: bool, + mcp_command: str, +) -> list[ArtifactResult]: + """Deploy artifacts (MCP config, skill, agent) to selected hosts. + + Args: + hosts: List of HostConfig objects to deploy to + scope: Installation scope ("project" or "user") + cwd: Current working directory + non_interactive: If True, skip overwrite prompts + mcp_command: Resolved absolute path to java-codebase-rag-mcp + + Returns: + List of ArtifactResult objects for each deployment + """ + results = [] + + for host in hosts: + # Deploy MCP config + mcp_config_path = host.mcp_config_path(scope, cwd) + mcp_result = _deploy_mcp_config( + mcp_config_path, + host, + non_interactive=non_interactive, + mcp_command=mcp_command, + ) + results.append(mcp_result) + + # Deploy skill + skills_dir = host.skills_dir(scope, cwd) + skill_dest = skills_dir / "explore-codebase" / "SKILL.md" + skill_result = _deploy_file( + skill_dest, + "skills/explore-codebase/SKILL.md", + artifact_type="skill", + non_interactive=non_interactive, + ) + results.append(skill_result) + + # Deploy agent + agents_dir = host.agents_dir(scope, cwd) + agent_dest = agents_dir / "explorer-rag-enhanced.md" + agent_result = _deploy_file( + agent_dest, + "agents/explorer-rag-enhanced.md", + artifact_type="agent", + non_interactive=non_interactive, + ) + results.append(agent_result) + + return results + + +def _deploy_mcp_config( + config_path: Path, + host: HostConfig, + *, + non_interactive: bool, + mcp_command: str, +) -> ArtifactResult: + """Deploy MCP config file.""" + try: + # Ensure parent directory exists + config_path.parent.mkdir(parents=True, exist_ok=True) + + # Check writability + if not _is_writable(config_path.parent): + return ArtifactResult( + path=config_path, + success=False, + error=f"Directory not writable: {config_path.parent}", + ) + + # Merge config + merge_mcp_config(config_path, host, mcp_command=mcp_command) + return ArtifactResult(path=config_path, success=True, error=None) + except Exception as e: + return ArtifactResult(path=config_path, success=False, error=str(e)) + + +def _deploy_file( + dest_path: Path, + package_relative_path: str, + *, + artifact_type: str, + non_interactive: bool, +) -> ArtifactResult: + """Deploy a single file from package data to destination.""" + try: + # Ensure parent directory exists + dest_path.parent.mkdir(parents=True, exist_ok=True) + + # Check writability + if not _is_writable(dest_path.parent): + return ArtifactResult( + path=dest_path, + success=False, + error=f"Directory not writable: {dest_path.parent}", + ) + + # Read package data + content = _read_package_artifact(package_relative_path) + + # Check if file exists + if dest_path.is_file(): + # Check if content is identical + existing_content = dest_path.read_text(encoding="utf-8") + if content == existing_content: + return ArtifactResult(path=dest_path, success=True, error=None) + + # File exists with different content - prompt for overwrite + if non_interactive: + # Skip in non-interactive mode + return ArtifactResult( + path=dest_path, + success=False, + error="File exists (skipped in non-interactive mode)", + ) + + # Interactive: prompt for overwrite + choice = prompt( + "select", + f"{artifact_type.capitalize()} file exists at {dest_path}", + choices=[ + {"name": "Overwrite", "value": "overwrite"}, + {"name": "Skip", "value": "skip"}, + {"name": "Abort", "value": "abort"}, + ], + ) + + if choice == "skip": + return ArtifactResult( + path=dest_path, + success=False, + error="Skipped by user", + ) + elif choice == "abort": + raise SystemExit(2) + + # Write file + dest_path.write_text(content, encoding="utf-8") + return ArtifactResult(path=dest_path, success=True, error=None) + except SystemExit: + raise + except Exception as e: + return ArtifactResult(path=dest_path, success=False, error=str(e)) + + +def _is_writable(path: Path) -> bool: + """Check if a directory is writable.""" + try: + test_file = path / ".write_test_java_codebase_rag" + test_file.touch() + test_file.unlink() + return True + except (OSError, IOError): + return False + + +def generate_yaml_config( + source_root: Path, + model: str, + microservice_roots: list[str] | None, + existing_yaml: dict | None, +) -> str: + """Generate .java-codebase-rag.yml content from installer answers. + + Args: + source_root: Source root directory + model: Embedding model path or "auto" + microservice_roots: List of microservice roots (None means all) + existing_yaml: Existing YAML data for re-run update mode + + Returns: + YAML configuration string + """ + # Start with existing YAML or empty dict + config = existing_yaml.copy() if existing_yaml else {} + + # Keys managed by installer (will be overwritten) + managed_keys = set() + + # Write microservice_roots only if subset selected + if microservice_roots: + config["microservice_roots"] = microservice_roots + managed_keys.add("microservice_roots") + elif "microservice_roots" in config: + # Remove if not needed (was set before but user wants all) + del config["microservice_roots"] + + # Write embedding.model only if not auto + if model != "auto": + if "embedding" not in config: + config["embedding"] = {} + config["embedding"]["model"] = model + managed_keys.add("embedding") + elif "embedding" in config and "model" in config["embedding"]: + # Remove model if using auto + if config["embedding"] == {"model": model}: + del config["embedding"] + else: + config["embedding"].pop("model", None) + + # Keys NOT written by installer (preserved if present): + # - source_root (config.py resolves from walk-up discovery) + # - index_dir (config.py defaults to /.java-codebase-rag) + # - embedding.device (user can add manually) + # - hints.enabled (defaults to True in config.py) + # - brownfield_overrides (user-managed) + + return yaml.dump(config, default_flow_style=False, sort_keys=False) + + +def update_gitignore(cwd: Path) -> None: + """Add .java-codebase-rag/ to .gitignore if not already present. + + Args: + cwd: Current working directory + """ + gitignore_path = cwd / ".gitignore" + + # Check if git repo + if not (cwd / ".git").is_dir(): + return + + # Read existing .gitignore or create new + if gitignore_path.is_file(): + lines = gitignore_path.read_text(encoding="utf-8").splitlines() + else: + lines = [] + + # Check for pattern (with or without trailing slash) + pattern_to_check = ".java-codebase-rag" + already_present = any( + line.strip().rstrip("/") == pattern_to_check or line.strip() == f"{pattern_to_check}/" + for line in lines + ) + + if not already_present: + lines.append("") + lines.append("# java-codebase-rag index directory") + lines.append(".java-codebase-rag/") + gitignore_path.write_text("\n".join(lines), encoding="utf-8") + + +def run_init_if_needed( + source_root: Path, + index_dir: Path, + model: str, + *, + non_interactive: bool, + quiet: bool, +) -> bool: + """Run init if index directory has no artifacts. Return True if init was run. + + Args: + source_root: Source root directory + index_dir: Index directory path + model: Embedding model path or "auto" + non_interactive: If True, suppress prompts + quiet: If True, suppress output + + Returns: + True if init was run, False if skipped + """ + from java_codebase_rag.config import ( + index_dir_has_existing_artifacts, + resolve_operator_config, + ) + from java_codebase_rag.pipeline import run_build_ast_graph, run_cocoindex_update + + if index_dir_has_existing_artifacts(index_dir): + print("Index already exists. Run `java-codebase-rag reprocess` to rebuild.") + return False + + print("Creating index...") + cfg = resolve_operator_config( + source_root=source_root, + cli_index_dir=None, # use default (/.java-codebase-rag) + cli_embedding_model=model if model != "auto" else None, + ) + cfg.apply_to_os_environ() + + env = cfg.subprocess_env() + + # Run CocoIndex update + coco = run_cocoindex_update(env, full_reprocess=False, quiet=quiet) + if coco.returncode != 0: + print(f"Error: CocoIndex update failed with code {coco.returncode}") + return False + + # Run AST graph build + g = run_build_ast_graph( + source_root=cfg.source_root, + kuzu_path=cfg.kuzu_path, + env=env, + ) + if g.returncode != 0: + print(f"Error: AST graph build failed with code {g.returncode}") + return False + + print("Index created successfully.") + return True + + +def handle_rerun(cwd: Path, *, non_interactive: bool) -> dict | None: + """If .java-codebase-rag.yml exists, offer update/fresh-start. Return existing YAML data or None. + + Args: + cwd: Current working directory + non_interactive: If True, default to "Update" mode + + Returns: + Parsed existing YAML data if updating, None if starting fresh + """ + config_path = cwd / ".java-codebase-rag.yml" + + if not config_path.is_file(): + return None + + try: + with open(config_path, "r") as f: + existing_config = yaml.safe_load(f) or {} + except yaml.YAMLError as e: + print(f"Warning: Failed to parse existing config: {e}") + return None + + if non_interactive: + # Default to update mode in non-interactive + print(f"Found existing config at {config_path}") + return existing_config + + # Interactive: show current values and ask + print(f"Found existing config at {config_path}") + print("Current configuration:") + for key, value in existing_config.items(): + print(f" {key}: {value}") + + choice = prompt( + "select", + "Choose an action:", + choices=[ + {"name": "Update (keep existing values)", "value": "update"}, + {"name": "Start fresh (new config)", "value": "fresh"}, + {"name": "Abort", "value": "abort"}, + ], + ) + + if choice == "abort": + raise SystemExit(2) + elif choice == "fresh": + return None + else: # update + return existing_config + + +def run_install( + *, + non_interactive: bool, + agents: list[str] | None, + scope: str | None, + model: str | None, + source_root: Path | None = None, + quiet: bool = False, +) -> int: + """Run the install pipeline. Returns exit code. + + Args: + non_interactive: If True, skip all prompts + agents: List of agent names from CLI flags + scope: Scope from CLI flag + model: Model from CLI flag + source_root: Source root path (defaults to cwd if None) + quiet: If True, suppress output + + Returns: + Exit code (0=success, 1=partial, 2=fatal) + """ + # Stage 0: Determine source root + cwd = Path.cwd() if source_root is None else source_root + cwd = cwd.resolve() + + # Stage 0.5: Check for existing config (re-run detection) + existing_config = handle_rerun(cwd, non_interactive=non_interactive) + + # Stage 1: Java source detection (with confirmation in interactive mode) + source_root = confirm_source_root(cwd, non_interactive=non_interactive) + + # Detect Java directories + try: + java_dirs = detect_java_directories(source_root) + except SystemExit as e: + return e.code + + # Stage 2: Embedding model + resolved_model = resolve_model(model, non_interactive=non_interactive) + + # Stage 3-4: Agent host + scope selection + try: + hosts = select_hosts(non_interactive=non_interactive, cli_agents=agents) + selected_scope = select_scope(non_interactive=non_interactive, cli_scope=scope) + except SystemExit as e: + return e.code + + # Stage 5: Artifact deployment + mcp_command = resolve_mcp_command(non_interactive=non_interactive) + results = deploy_artifacts( + hosts, + selected_scope, + source_root, + non_interactive=non_interactive, + mcp_command=mcp_command, + ) + + # Check for partial failures + partial_failures = [r for r in results if not r.success] + if partial_failures: + print("Warning: Some artifacts failed to deploy:") + for r in partial_failures: + print(f" {r.path}: {r.error}") + if all( + r.success + for r in results + if r.path.suffix in [".json", ".yml", ".yaml"] + ): + # MCP configs succeeded - non-critical + print("Continuing (MCP configs deployed successfully)...") + else: + # Critical failures + return 1 + + # Stage 6: Index + finish + # Generate YAML config + yaml_content = generate_yaml_config( + source_root, + resolved_model, + microservice_roots=[str(d) for d in java_dirs] if len(java_dirs) > 1 else None, + existing_yaml=existing_config, + ) + + # Write YAML config + config_path = source_root / ".java-codebase-rag.yml" + config_path.write_text(yaml_content, encoding="utf-8") + + # Update .gitignore + update_gitignore(source_root) + + # Run init if needed + if not quiet: + print("Configuration written to", config_path) + + return 0 diff --git a/pyproject.toml b/pyproject.toml index 8374d141..d45b123a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,11 +30,12 @@ dependencies = [ "numpy>=1.26.4,<2.5", "pathspec>=1.0.4,<2", "pyarrow>=23.0.1,<24", + "pydantic>=2.0,<3", "PyYAML>=6.0.3,<7", + "questionary>=2.0,<3", "sentence-transformers>=5.4.0,<6", "tree-sitter>=0.25.2,<0.26", "tree-sitter-java>=0.23.5,<0.24", - "pydantic>=2.0,<3", "unidiff>=0.7.3,<1", ] @@ -78,3 +79,6 @@ py-modules = [ [tool.ruff] line-length = 100 target-version = "py311" + +[tool.setuptools.package-data] +"java_codebase_rag" = ["install_data/skills/**/*", "install_data/agents/**/*"] diff --git a/tests/test_installer.py b/tests/test_installer.py new file mode 100644 index 00000000..8e3f55e3 --- /dev/null +++ b/tests/test_installer.py @@ -0,0 +1,728 @@ +"""Tests for java_codebase_rag.installer module.""" + +import json +import pytest +from pathlib import Path +from java_codebase_rag.installer import HOSTS + + +class TestHostConfigPaths: + """Test HostConfig path resolution for all hosts and scopes.""" + + def test_host_config_paths_claude_code_project(self): + """HostConfig for claude-code + project scope resolves .claude/skills/, .claude/agents/, .mcp.json""" + host = HOSTS["claude-code"] + cwd = Path("/test/project") + + assert host.scope_path("project", cwd) == Path("/test/project/.claude") + assert host.skills_dir("project", cwd) == Path("/test/project/.claude/skills") + assert host.agents_dir("project", cwd) == Path("/test/project/.claude/agents") + assert host.mcp_config_path("project", cwd) == Path("/test/project/.mcp.json") + + def test_host_config_paths_claude_code_user(self): + """HostConfig for claude-code + user scope resolves ~/.claude/skills/, ~/.claude/agents/, ~/.claude.json""" + host = HOSTS["claude-code"] + cwd = Path("/test/project") + + assert host.scope_path("user", cwd) == Path.home() / ".claude" + assert host.skills_dir("user", cwd) == Path.home() / ".claude" / "skills" + assert host.agents_dir("user", cwd) == Path.home() / ".claude" / "agents" + assert host.mcp_config_path("user", cwd) == Path.home() / ".claude.json" + + def test_host_config_paths_qwen_project(self): + """Qwen Code + project: .qwen/skills/, .qwen/agents/, .qwen/settings.json""" + host = HOSTS["qwen-code"] + cwd = Path("/test/project") + + assert host.scope_path("project", cwd) == Path("/test/project/.qwen") + assert host.skills_dir("project", cwd) == Path("/test/project/.qwen/skills") + assert host.agents_dir("project", cwd) == Path("/test/project/.qwen/agents") + assert host.mcp_config_path("project", cwd) == Path("/test/project/.qwen/settings.json") + + def test_host_config_paths_qwen_user(self): + """Qwen Code + user: ~/.qwen/skills/, ~/.qwen/agents/, ~/.qwen/settings.json""" + host = HOSTS["qwen-code"] + cwd = Path("/test/project") + + assert host.scope_path("user", cwd) == Path.home() / ".qwen" + assert host.skills_dir("user", cwd) == Path.home() / ".qwen" / "skills" + assert host.agents_dir("user", cwd) == Path.home() / ".qwen" / "agents" + assert host.mcp_config_path("user", cwd) == Path.home() / ".qwen/settings.json" + + def test_host_config_paths_gigacode_project(self): + """GigaCode + project""" + host = HOSTS["gigacode"] + cwd = Path("/test/project") + + assert host.scope_path("project", cwd) == Path("/test/project/.gigacode") + assert host.skills_dir("project", cwd) == Path("/test/project/.gigacode/skills") + assert host.agents_dir("project", cwd) == Path("/test/project/.gigacode/agents") + assert host.mcp_config_path("project", cwd) == Path("/test/project/.gigacode/settings.json") + + def test_host_config_paths_gigacode_user(self): + """GigaCode + user""" + host = HOSTS["gigacode"] + cwd = Path("/test/project") + + assert host.scope_path("user", cwd) == Path.home() / ".gigacode" + assert host.skills_dir("user", cwd) == Path.home() / ".gigacode" / "skills" + assert host.agents_dir("user", cwd) == Path.home() / ".gigacode" / "agents" + assert host.mcp_config_path("user", cwd) == Path.home() / ".gigacode/settings.json" + + +class TestPromptHelper: + """Test prompt() helper function.""" + + def test_prompt_returns_default_on_non_tty(self, monkeypatch): + """non-TTY → default returned, questionary not called""" + import sys + from java_codebase_rag.installer import prompt + + # Mock isatty to return False + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + + result = prompt("checkbox", "Select items", choices=["choice1", "choice2"], default=["default"]) + assert result == ["default"] + + def test_prompt_returns_default_when_none_tty(self, monkeypatch): + """Test that default is returned for all prompt types in non-TTY mode""" + import sys + from java_codebase_rag.installer import prompt + + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + + # Test different prompt types + assert prompt("checkbox", "test", default=["a"]) == ["a"] + assert prompt("select", "test", default="b") == "b" + assert prompt("text", "test", default="c") == "c" + assert prompt("confirm", "test", default=True) is True + + +class TestDetectJavaDirectories: + """Test detect_java_directories function.""" + + def test_detect_java_root_has_maven_pom(self, tmp_path): + """cwd with pom.xml → returns [Path('.')]""" + (tmp_path / "pom.xml").write_text("") + from java_codebase_rag.installer import detect_java_directories + result = detect_java_directories(tmp_path) + assert result == [Path(".")] + + def test_detect_java_root_has_gradle_build(self, tmp_path): + """cwd with build.gradle → returns [Path('.')]""" + (tmp_path / "build.gradle").write_text("plugins { id 'java' }") + from java_codebase_rag.installer import detect_java_directories + result = detect_java_directories(tmp_path) + assert result == [Path(".")] + + def test_detect_java_root_has_gradle_kts(self, tmp_path): + """cwd with build.gradle.kts → returns [Path('.')]""" + (tmp_path / "build.gradle.kts").write_text("plugins { java }") + from java_codebase_rag.installer import detect_java_directories + result = detect_java_directories(tmp_path) + assert result == [Path(".")] + + def test_detect_java_no_root_microservice_monorepo(self, tmp_path): + """cwd has no build file, service-a/pom.xml and service-b/pom.xml exist → returns [Path('service-a'), Path('service-b')]""" + service_a = tmp_path / "service-a" + service_b = tmp_path / "service-b" + service_a.mkdir() + service_b.mkdir() + (service_a / "pom.xml").write_text("") + (service_b / "pom.xml").write_text("") + from java_codebase_rag.installer import detect_java_directories + result = detect_java_directories(tmp_path) + assert set(result) == {Path("service-a"), Path("service-b")} + + def test_detect_java_no_root_single_service(self, tmp_path): + """cwd has no build file, only service-a/pom.xml exists → returns [Path('service-a')]""" + service_a = tmp_path / "service-a" + service_a.mkdir() + (service_a / "pom.xml").write_text("") + from java_codebase_rag.installer import detect_java_directories + result = detect_java_directories(tmp_path) + assert result == [Path("service-a")] + + def test_detect_java_no_root_no_services_exit_2(self, tmp_path, capsys): + """cwd has no build file, no children have build files → raises SystemExit(2)""" + from java_codebase_rag.installer import detect_java_directories + with pytest.raises(SystemExit) as exc_info: + detect_java_directories(tmp_path) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "Error:" in captured.out and "No Java build files" in captured.out + + +class TestConfirmSourceRoot: + """Test confirm_source_root function.""" + + def test_confirm_source_root_interactive_accepts_default(self, monkeypatch): + """user presses Enter → returns cwd""" + from java_codebase_rag.installer import confirm_source_root + cwd = Path("/test/project") + monkeypatch.setattr("sys.stdin.isatty", lambda: False) + result = confirm_source_root(cwd, non_interactive=False) + # In non-TTY mode, prompt returns default + assert result == cwd + + def test_confirm_source_root_non_interactive_returns_cwd(self): + """non-interactive → returns cwd, no prompt""" + from java_codebase_rag.installer import confirm_source_root + cwd = Path("/test/project") + result = confirm_source_root(cwd, non_interactive=True) + assert result == cwd + + def test_confirm_source_root_expands_tilde(self, monkeypatch): + """user types ~/projects/foo → expanded via Path.home()""" + import sys + from java_codebase_rag.installer import confirm_source_root + + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + monkeypatch.setattr(Path, "is_dir", lambda self: True) + + # Mock prompt to return a path with ~ + cwd = Path("/test/project") + test_path = Path.home() / "projects" / "foo" + + def mock_prompt(*args, **kwargs): + return "~/projects/foo" + + monkeypatch.setattr("java_codebase_rag.installer.prompt", mock_prompt) + monkeypatch.setattr(Path, "resolve", lambda self: self) + + result = confirm_source_root(cwd, non_interactive=False) + assert str(result) == str(test_path) + + +class TestResolveModel: + """Test resolve_model function.""" + + def test_model_path_found_returns_resolved(self, tmp_path): + """existing path → returned expanded""" + model_file = tmp_path / "model.bin" + model_file.write_text("fake model") + from java_codebase_rag.installer import resolve_model + result = resolve_model(str(model_file), non_interactive=False) + assert result == str(model_file) + + def test_model_path_not_found_prompts_confirmation(self, monkeypatch): + """non-existent path → confirmation prompt""" + import sys + from java_codebase_rag.installer import resolve_model + + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + # Mock prompt to return True (confirm using auto) + def mock_prompt(*args, **kwargs): + return True + monkeypatch.setattr("java_codebase_rag.installer.prompt", mock_prompt) + + result = resolve_model("/nonexistent/path", non_interactive=False) + assert result == "auto" + + +class TestSelectHostsAndScope: + """Test select_hosts and select_scope functions.""" + + def test_select_hosts_non_interactive_requires_agent(self): + """no --agent in non-interactive → exit 2""" + from java_codebase_rag.installer import select_hosts + with pytest.raises(SystemExit) as exc_info: + select_hosts(non_interactive=True, cli_agents=None) + assert exc_info.value.code == 2 + + def test_select_hosts_invalid_agent_exit_2(self): + """unknown agent string → exit 2""" + from java_codebase_rag.installer import select_hosts + with pytest.raises(SystemExit) as exc_info: + select_hosts(non_interactive=True, cli_agents=["unknown-agent"]) + assert exc_info.value.code == 2 + + def test_select_hosts_multi_host_non_interactive(self): + """--agent claude-code --agent qwen-code → both hosts selected""" + from java_codebase_rag.installer import select_hosts, HOSTS + result = select_hosts(non_interactive=True, cli_agents=["claude-code", "qwen-code"]) + assert len(result) == 2 + assert result[0] == HOSTS["claude-code"] + assert result[1] == HOSTS["qwen-code"] + + def test_select_scope_non_interactive_default_project(self): + """non-interactive → returns 'project'""" + from java_codebase_rag.installer import select_scope + result = select_scope(non_interactive=True, cli_scope=None) + assert result == "project" + + def test_select_scope_invalid_scope_exit_2(self): + """invalid scope string → exit 2""" + from java_codebase_rag.installer import select_scope + with pytest.raises(SystemExit) as exc_info: + select_scope(non_interactive=True, cli_scope="invalid") + assert exc_info.value.code == 2 + + +class TestResolveMcpCommand: + """Test resolve_mcp_command function.""" + + def test_resolve_mcp_command_found(self, monkeypatch): + """shutil.which returns /usr/local/bin/java-codebase-rag-mcp → that path returned""" + import shutil + from java_codebase_rag.installer import resolve_mcp_command + + monkeypatch.setattr(shutil, "which", lambda x: "/usr/local/bin/java-codebase-rag-mcp") + result = resolve_mcp_command(non_interactive=True) + assert result == "/usr/local/bin/java-codebase-rag-mcp" + + def test_resolve_mcp_command_not_found_non_interactive_exit_2(self, monkeypatch, capsys): + """shutil.which returns None + non-interactive → SystemExit(2)""" + import shutil + from java_codebase_rag.installer import resolve_mcp_command + + monkeypatch.setattr(shutil, "which", lambda x: None) + with pytest.raises(SystemExit) as exc_info: + resolve_mcp_command(non_interactive=True) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "not found on PATH" in captured.out + + def test_resolve_mcp_command_not_found_interactive_abort(self, monkeypatch): + """user enters "abort" at prompt → SystemExit(2)""" + import shutil + import sys + from java_codebase_rag.installer import resolve_mcp_command + + monkeypatch.setattr(shutil, "which", lambda x: None) + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + # Mock prompt to return "abort" + def mock_prompt(*args, **kwargs): + return "abort" + monkeypatch.setattr("java_codebase_rag.installer.prompt", mock_prompt) + + with pytest.raises(SystemExit) as exc_info: + resolve_mcp_command(non_interactive=False) + assert exc_info.value.code == 2 + + +class TestMergeMcpConfig: + """Test merge_mcp_config function.""" + + def test_mcp_merge_adds_to_empty(self, tmp_path): + """empty {} → {"mcpServers": {"java-codebase-rag": {...}}}""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "mcp.json" + result = merge_mcp_config(config_path, HOSTS["claude-code"], mcp_command="/bin/mcp") + assert result is True + with open(config_path) as f: + config = json.load(f) + assert "mcpServers" in config + assert "java-codebase-rag" in config["mcpServers"] + assert config["mcpServers"]["java-codebase-rag"]["command"] == "/bin/mcp" + assert config["mcpServers"]["java-codebase-rag"]["type"] == "stdio" + + def test_mcp_merge_adds_to_existing_servers(self, tmp_path): + """existing {"mcpServers": {"other": {...}}} → both servers present""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "mcp.json" + config_path.write_text(json.dumps({"mcpServers": {"other": {"command": "/other"}}})) + result = merge_mcp_config(config_path, HOSTS["claude-code"], mcp_command="/bin/mcp") + assert result is True + with open(config_path) as f: + config = json.load(f) + assert "other" in config["mcpServers"] + assert "java-codebase-rag" in config["mcpServers"] + + def test_mcp_merge_updates_existing_entry(self, tmp_path): + """existing java-codebase-rag entry with different command → updated""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "mcp.json" + config_path.write_text(json.dumps({ + "mcpServers": { + "java-codebase-rag": {"command": "/old/path", "type": "stdio"} + } + })) + result = merge_mcp_config(config_path, HOSTS["claude-code"], mcp_command="/new/path") + assert result is True + with open(config_path) as f: + config = json.load(f) + assert config["mcpServers"]["java-codebase-rag"]["command"] == "/new/path" + + def test_mcp_merge_preserves_other_keys_claude_json(self, tmp_path): + """{"numStartups": 42, "userID": "abc", "mcpServers": {...}} → preserved""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "claude.json" + config_path.write_text(json.dumps({ + "numStartups": 42, + "userID": "abc", + "mcpServers": {} + })) + merge_mcp_config(config_path, HOSTS["claude-code"], mcp_command="/bin/mcp") + with open(config_path) as f: + config = json.load(f) + assert config["numStartups"] == 42 + assert config["userID"] == "abc" + + def test_mcp_merge_preserves_other_keys_settings_json(self, tmp_path): + """{"security": {...}, "$version": 2, "mcpServers": {...}} → preserved""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "settings.json" + config_path.write_text(json.dumps({ + "security": {"level": "high"}, + "$version": 2, + "mcpServers": {} + })) + merge_mcp_config(config_path, HOSTS["qwen-code"], mcp_command="/bin/mcp") + with open(config_path) as f: + config = json.load(f) + assert config["security"]["level"] == "high" + assert config["$version"] == 2 + + +class TestDeployArtifacts: + """Test deploy_artifacts function.""" + + def test_permission_error_skips_artifact_continues(self, tmp_path, monkeypatch): + """unwritable directory → artifact skipped, others continue, exit 1""" + from java_codebase_rag.installer import deploy_artifacts, HOSTS + + # Mock _is_writable to return False for skills directory + def mock_is_writable(path): + return "skills" not in str(path) + + monkeypatch.setattr("java_codebase_rag.installer._is_writable", mock_is_writable) + + results = deploy_artifacts( + [HOSTS["claude-code"]], + "project", + tmp_path, + non_interactive=True, + mcp_command="/bin/mcp", + ) + + # Should have 3 results (MCP, skill, agent) + assert len(results) == 3 + # MCP should succeed + assert results[0].success is True + # Skill should fail due to permission + assert results[1].success is False + assert "not writable" in results[1].error + # Agent should succeed + assert results[2].success is True + + def test_artifact_overwrite_prompt_existing_skill(self, tmp_path, monkeypatch): + """existing skill file → prompts overwrite/skip/abort""" + import sys + from java_codebase_rag.installer import _deploy_file + + # Create existing skill file + skills_dir = tmp_path / ".claude" / "skills" / "explore-codebase" + skills_dir.mkdir(parents=True) + skill_file = skills_dir / "SKILL.md" + skill_file.write_text("old content") + + # Mock prompt to return "skip" + monkeypatch.setattr(sys.stdin, "isatty", lambda: False) + def mock_prompt(*args, **kwargs): + return "skip" + monkeypatch.setattr("java_codebase_rag.installer.prompt", mock_prompt) + + result = _deploy_file( + skill_file, + "skills/explore-codebase/SKILL.md", + artifact_type="skill", + non_interactive=False, + ) + + assert result.success is False + assert "Skipped by user" in result.error + + def test_deploy_artifacts_multi_host_deploy_all(self, tmp_path, monkeypatch): + """multiple hosts selected → artifacts deployed to all""" + from java_codebase_rag.installer import deploy_artifacts, HOSTS + + results = deploy_artifacts( + [HOSTS["claude-code"], HOSTS["qwen-code"]], + "project", + tmp_path, + non_interactive=True, + mcp_command="/bin/mcp", + ) + + # Should have 6 results (3 per host: MCP, skill, agent) + assert len(results) == 6 + # All should succeed + assert all(r.success for r in results) + + # Verify files exist for both hosts + assert (tmp_path / ".mcp.json").is_file() + assert (tmp_path / ".claude" / "skills" / "explore-codebase" / "SKILL.md").is_file() + assert (tmp_path / ".claude" / "agents" / "explorer-rag-enhanced.md").is_file() + assert (tmp_path / ".qwen" / "settings.json").is_file() + assert (tmp_path / ".qwen" / "skills" / "explore-codebase" / "SKILL.md").is_file() + assert (tmp_path / ".qwen" / "agents" / "explorer-rag-enhanced.md").is_file() + + +class TestGenerateYamlConfig: + """Test generate_yaml_config function.""" + + def test_yaml_generation_auto_model(self): + """model=auto → YAML has no embedding.model key and no source_root key""" + from java_codebase_rag.installer import generate_yaml_config + import yaml + result = generate_yaml_config(Path("/test"), "auto", None, None) + config = yaml.safe_load(result) + assert "source_root" not in config + assert "embedding" not in config or "model" not in config.get("embedding", {}) + + def test_yaml_generation_custom_model(self): + """model=/path/to/model → YAML has embedding.model but no source_root""" + from java_codebase_rag.installer import generate_yaml_config + import yaml + result = generate_yaml_config(Path("/test"), "/path/to/model", None, None) + config = yaml.safe_load(result) + assert config["embedding"]["model"] == "/path/to/model" + assert "source_root" not in config + + def test_yaml_generation_with_microservice_roots(self): + """subset of dirs → YAML has microservice_roots""" + from java_codebase_rag.installer import generate_yaml_config + import yaml + result = generate_yaml_config( + Path("/test"), "auto", ["service-a", "service-b"], None + ) + config = yaml.safe_load(result) + assert config["microservice_roots"] == ["service-a", "service-b"] + + def test_yaml_generation_all_dirs_selected(self): + """all dirs → no microservice_roots in YAML""" + from java_codebase_rag.installer import generate_yaml_config + import yaml + result = generate_yaml_config(Path("/test"), "auto", None, None) + config = yaml.safe_load(result) + assert "microservice_roots" not in config + + def test_yaml_generation_preserves_unmanaged_keys(self): + """existing YAML with brownfield_overrides and embedding.device → both preserved""" + from java_codebase_rag.installer import generate_yaml_config + import yaml + existing = { + "brownfield_overrides": {"routes": ["/api"]}, + "embedding": {"device": "cuda"}, + } + result = generate_yaml_config(Path("/test"), "auto", None, existing) + config = yaml.safe_load(result) + assert config["brownfield_overrides"] == {"routes": ["/api"]} + assert config["embedding"]["device"] == "cuda" + + +class TestUpdateGitignore: + """Test update_gitignore function.""" + + def test_gitignore_creates_if_missing(self, tmp_path, monkeypatch): + """no .gitignore → created with .java-codebase-rag/""" + # Create .git directory to simulate git repo + (tmp_path / ".git").mkdir() + from java_codebase_rag.installer import update_gitignore + update_gitignore(tmp_path) + gitignore = tmp_path / ".gitignore" + assert gitignore.is_file() + content = gitignore.read_text() + assert ".java-codebase-rag/" in content + + def test_gitignore_appends_if_not_present(self, tmp_path, monkeypatch): + """existing .gitignore without pattern → appended""" + (tmp_path / ".git").mkdir() + gitignore = tmp_path / ".gitignore" + gitignore.write_text("node_modules/\n") + from java_codebase_rag.installer import update_gitignore + update_gitignore(tmp_path) + content = gitignore.read_text() + assert ".java-codebase-rag/" in content + + def test_gitignore_skips_if_present_with_slash(self, tmp_path, monkeypatch): + """existing .java-codebase-rag/ → no change""" + (tmp_path / ".git").mkdir() + gitignore = tmp_path / ".gitignore" + gitignore.write_text(".java-codebase-rag/\n") + from java_codebase_rag.installer import update_gitignore + original_content = gitignore.read_text() + update_gitignore(tmp_path) + assert gitignore.read_text() == original_content + + def test_gitignore_skips_if_present_without_slash(self, tmp_path, monkeypatch): + """existing .java-codebase-rag → no change""" + (tmp_path / ".git").mkdir() + gitignore = tmp_path / ".gitignore" + gitignore.write_text(".java-codebase-rag\n") + from java_codebase_rag.installer import update_gitignore + original_content = gitignore.read_text() + update_gitignore(tmp_path) + assert gitignore.read_text() == original_content + + def test_gitignore_skips_if_not_git_repo(self, tmp_path): + """no .git dir → no file created, no error""" + from java_codebase_rag.installer import update_gitignore + update_gitignore(tmp_path) + assert not (tmp_path / ".gitignore").is_file() + + +class TestHandleRerun: + """Test handle_rerun function.""" + + def test_rerun_detects_existing_config(self, tmp_path): + """existing .java-codebase-rag.yml → returns parsed data""" + import yaml + config_path = tmp_path / ".java-codebase-rag.yml" + config_path.write_text(yaml.dump({"model": "auto", "source_root": "."})) + from java_codebase_rag.installer import handle_rerun + result = handle_rerun(tmp_path, non_interactive=True) + assert result is not None + assert result["model"] == "auto" + + def test_rerun_no_config_returns_none(self, tmp_path): + """no config → returns None""" + from java_codebase_rag.installer import handle_rerun + result = handle_rerun(tmp_path, non_interactive=True) + assert result is None + + +class TestInstallIntegration: + """Integration tests for install command.""" + + def test_install_non_interactive_claude_code_bank_chat(self, tmp_path, monkeypatch): + """run install --non-interactive --agent claude-code from tests/bank-chat-system/ fixture""" + import shutil + from java_codebase_rag.installer import run_install + + # Copy bank-chat fixture to tmp_path + bank_chat = Path("tests/bank-chat-system") + if not bank_chat.is_dir(): + pytest.skip("bank-chat-system fixture not found") + shutil.copytree(bank_chat, tmp_path / "bank-chat") + + cwd = tmp_path / "bank-chat" + + # Mock shutil.which to return a fake MCP path + monkeypatch.setattr(shutil, "which", lambda x: "/fake/bin/java-codebase-rag-mcp") + + # Mock pipeline functions to avoid actual indexing + def mock_run_cocoindex_update(*args, **kwargs): + from subprocess import CompletedProcess + return CompletedProcess(["cocoindex"], 0) + + def mock_run_build_ast_graph(*args, **kwargs): + from subprocess import CompletedProcess + return CompletedProcess(["build_ast_graph"], 0) + + monkeypatch.setattr( + "java_codebase_rag.pipeline.run_cocoindex_update", + mock_run_cocoindex_update, + ) + monkeypatch.setattr( + "java_codebase_rag.pipeline.run_build_ast_graph", + mock_run_build_ast_graph, + ) + + # Change to fixture directory + monkeypatch.setattr(Path, "cwd", lambda: cwd) + + result = run_install( + non_interactive=True, + agents=["claude-code"], + scope="project", + model="auto", + source_root=cwd, + quiet=True, + ) + + # Verify exit code + assert result == 0 + + # Verify artifacts + yaml_path = cwd / ".java-codebase-rag.yml" + assert yaml_path.is_file() + yaml_content = yaml_path.read_text() + import yaml + config = yaml.safe_load(yaml_content) + # Should not have source_root key + assert "source_root" not in config + # Should not have embedding.model (auto is default) + assert "embedding" not in config or "model" not in config.get("embedding", {}) + + # Verify MCP config + mcp_path = cwd / ".mcp.json" + assert mcp_path.is_file() + mcp_content = mcp_path.read_text() + mcp_config = json.loads(mcp_content) + assert "java-codebase-rag" in mcp_config.get("mcpServers", {}) + assert mcp_config["mcpServers"]["java-codebase-rag"]["type"] == "stdio" + + # Verify skill and agent + skill_path = cwd / ".claude" / "skills" / "explore-codebase" / "SKILL.md" + assert skill_path.is_file() + + agent_path = cwd / ".claude" / "agents" / "explorer-rag-enhanced.md" + assert agent_path.is_file() + + # Verify .gitignore + gitignore = cwd / ".gitignore" + assert gitignore.is_file() + gitignore_content = gitignore.read_text() + assert ".java-codebase-rag/" in gitignore_content + + def test_install_non_interactive_multi_host_bank_chat(self, tmp_path, monkeypatch): + """run install --non-interactive --agent claude-code --agent qwen-code""" + import shutil + from java_codebase_rag.installer import run_install + + # Copy bank-chat fixture to tmp_path + bank_chat = Path("tests/bank-chat-system") + if not bank_chat.is_dir(): + pytest.skip("bank-chat-system fixture not found") + shutil.copytree(bank_chat, tmp_path / "bank-chat") + + cwd = tmp_path / "bank-chat" + + # Mock shutil.which to return a fake MCP path + monkeypatch.setattr(shutil, "which", lambda x: "/fake/bin/java-codebase-rag-mcp") + + # Mock pipeline functions + def mock_run_cocoindex_update(*args, **kwargs): + from subprocess import CompletedProcess + return CompletedProcess(["cocoindex"], 0) + + def mock_run_build_ast_graph(*args, **kwargs): + from subprocess import CompletedProcess + return CompletedProcess(["build_ast_graph"], 0) + + monkeypatch.setattr( + "java_codebase_rag.pipeline.run_cocoindex_update", + mock_run_cocoindex_update, + ) + monkeypatch.setattr( + "java_codebase_rag.pipeline.run_build_ast_graph", + mock_run_build_ast_graph, + ) + + # Change to fixture directory + monkeypatch.setattr(Path, "cwd", lambda: cwd) + + result = run_install( + non_interactive=True, + agents=["claude-code", "qwen-code"], + scope="project", + model="auto", + source_root=cwd, + quiet=True, + ) + + # Verify exit code + assert result == 0 + + # Verify both hosts configured + mcp_claude = cwd / ".mcp.json" + mcp_qwen = cwd / ".qwen" / "settings.json" + assert mcp_claude.is_file() + assert mcp_qwen.is_file() + + skill_claude = cwd / ".claude" / "skills" / "explore-codebase" / "SKILL.md" + skill_qwen = cwd / ".qwen" / "skills" / "explore-codebase" / "SKILL.md" + assert skill_claude.is_file() + assert skill_qwen.is_file() diff --git a/tests/test_installer_integration.py b/tests/test_installer_integration.py new file mode 100644 index 00000000..4202f119 --- /dev/null +++ b/tests/test_installer_integration.py @@ -0,0 +1,124 @@ +"""Integration tests for java_codebase_rag.installer module. + +These tests are gated behind JAVA_CODEBASE_RAG_RUN_HEAVY=1. +""" + +import json +import os +import pytest +import shutil +import subprocess +from pathlib import Path + + +@pytest.mark.skipif( + "JAVA_CODEBASE_RAG_RUN_HEAVY" not in os.environ, + reason="Integration tests require JAVA_CODEBASE_RAG_RUN_HEAVY=1", +) +class TestInstallIntegration: + """Integration tests for install command.""" + + def test_install_non_interactive_claude_code_bank_chat(self, tmp_path): + """run install --non-interactive --agent claude-code from tests/bank-chat-system/ fixture""" + # Copy bank-chat fixture to tmp_path + bank_chat = Path("tests/bank-chat-system") + if not bank_chat.is_dir(): + pytest.skip("bank-chat-system fixture not found") + shutil.copytree(bank_chat, tmp_path / "bank-chat") + + cwd = tmp_path / "bank-chat" + + # Run install via subprocess to test the CLI integration + result = subprocess.run( + [ + ".venv/bin/python", + "-m", + "java_codebase_rag.cli", + "install", + "--non-interactive", + "--agent", + "claude-code", + "--quiet", + ], + cwd=cwd, + capture_output=True, + text=True, + ) + + # Verify exit code + assert result.returncode == 0, f"stdout: {result.stdout}\nstderr: {result.stderr}" + + # Verify artifacts + yaml_path = cwd / ".java-codebase-rag.yml" + assert yaml_path.is_file() + yaml_content = yaml_path.read_text() + import yaml + config = yaml.safe_load(yaml_content) + # Should not have source_root key + assert "source_root" not in config + # Should not have embedding.model (auto is default) + assert "embedding" not in config or "model" not in config.get("embedding", {}) + + # Verify MCP config + mcp_path = cwd / ".mcp.json" + assert mcp_path.is_file() + mcp_content = mcp_path.read_text() + mcp_config = json.loads(mcp_content) + assert "java-codebase-rag" in mcp_config.get("mcpServers", {}) + assert mcp_config["mcpServers"]["java-codebase-rag"]["type"] == "stdio" + + # Verify skill and agent + skill_path = cwd / ".claude" / "skills" / "explore-codebase" / "SKILL.md" + assert skill_path.is_file() + + agent_path = cwd / ".claude" / "agents" / "explorer-rag-enhanced.md" + assert agent_path.is_file() + + # Verify .gitignore + gitignore = cwd / ".gitignore" + assert gitignore.is_file() + gitignore_content = gitignore.read_text() + assert ".java-codebase-rag/" in gitignore_content + + def test_install_non_interactive_multi_host_bank_chat(self, tmp_path): + """run install --non-interactive --agent claude-code --agent qwen-code""" + # Copy bank-chat fixture to tmp_path + bank_chat = Path("tests/bank-chat-system") + if not bank_chat.is_dir(): + pytest.skip("bank-chat-system fixture not found") + shutil.copytree(bank_chat, tmp_path / "bank-chat") + + cwd = tmp_path / "bank-chat" + + # Run install via subprocess to test the CLI integration + result = subprocess.run( + [ + ".venv/bin/python", + "-m", + "java_codebase_rag.cli", + "install", + "--non-interactive", + "--agent", + "claude-code", + "--agent", + "qwen-code", + "--quiet", + ], + cwd=cwd, + capture_output=True, + text=True, + ) + + # Verify exit code + assert result.returncode == 0, f"stdout: {result.stdout}\nstderr: {result.stderr}" + + # Verify both hosts configured + mcp_claude = cwd / ".mcp.json" + mcp_qwen = cwd / ".qwen" / "settings.json" + assert mcp_claude.is_file() + assert mcp_qwen.is_file() + + skill_claude = cwd / ".claude" / "skills" / "explore-codebase" / "SKILL.md" + skill_qwen = cwd / ".qwen" / "skills" / "explore-codebase" / "SKILL.md" + assert skill_claude.is_file() + assert skill_qwen.is_file() From a913785484d9250dc36ea5c374d5269adc2a964d Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 00:37:55 +0300 Subject: [PATCH 2/6] fix: address code review issues (C1, I1, I2, I4, M1) - C1: Create .git dir in integration test fixtures so .gitignore assertions pass - I1: Call run_init_if_needed in run_install orchestrator (was missing) - I2: Remove dead code in resolve_model (unreachable non_interactive check) - I4: Fix potential UnboundLocalError in merge_mcp_config exception handler (use tmp_name variable instead of tmp context manager) - M1: Remove unused managed_keys set in generate_yaml_config Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/installer.py | 29 ++++++++++++++++------------- tests/test_installer.py | 6 ++++++ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/java_codebase_rag/installer.py b/java_codebase_rag/installer.py index 13fcaaaa..3bc35b70 100644 --- a/java_codebase_rag/installer.py +++ b/java_codebase_rag/installer.py @@ -228,9 +228,6 @@ def resolve_model(model_input: str | None, *, non_interactive: bool) -> str: return str(model_path) # Path not found - prompt for confirmation in interactive mode - if non_interactive: - return "auto" - confirmed = prompt( "confirm", f"Model path {model_input} not found. Use 'auto' instead?", @@ -422,6 +419,7 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - config["mcpServers"]["java-codebase-rag"] = new_entry # Write atomically (write to tmp, then rename) + tmp_name = None try: with tempfile.NamedTemporaryFile( mode="w", @@ -432,15 +430,16 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - json.dump(config, tmp, indent=2) tmp.flush() os.fsync(tmp.fileno()) + tmp_name = tmp.name # Atomic rename - os.rename(tmp.name, config_path) + os.rename(tmp_name, config_path) return True except (IOError, OSError) as e: print(f"Error: Failed to write {config_path}: {e}") - if tmp: + if tmp_name: try: - os.unlink(tmp.name) + os.unlink(tmp_name) except OSError: pass return False @@ -532,7 +531,7 @@ def _deploy_mcp_config( error=f"Directory not writable: {config_path.parent}", ) - # Merge config + # Merge config (returns True if updated, False if no-op or parse error) merge_mcp_config(config_path, host, mcp_command=mcp_command) return ArtifactResult(path=config_path, success=True, error=None) except Exception as e: @@ -638,13 +637,9 @@ def generate_yaml_config( # Start with existing YAML or empty dict config = existing_yaml.copy() if existing_yaml else {} - # Keys managed by installer (will be overwritten) - managed_keys = set() - # Write microservice_roots only if subset selected if microservice_roots: config["microservice_roots"] = microservice_roots - managed_keys.add("microservice_roots") elif "microservice_roots" in config: # Remove if not needed (was set before but user wants all) del config["microservice_roots"] @@ -654,7 +649,6 @@ def generate_yaml_config( if "embedding" not in config: config["embedding"] = {} config["embedding"]["model"] = model - managed_keys.add("embedding") elif "embedding" in config and "model" in config["embedding"]: # Remove model if using auto if config["embedding"] == {"model": model}: @@ -906,8 +900,17 @@ def run_install( # Update .gitignore update_gitignore(source_root) - # Run init if needed if not quiet: print("Configuration written to", config_path) + # Run init if index directory is empty + index_dir = (source_root / ".java-codebase-rag").resolve() + run_init_if_needed( + source_root, + index_dir, + resolved_model, + non_interactive=non_interactive, + quiet=quiet, + ) + return 0 diff --git a/tests/test_installer.py b/tests/test_installer.py index 8e3f55e3..a82664f9 100644 --- a/tests/test_installer.py +++ b/tests/test_installer.py @@ -599,6 +599,9 @@ def test_install_non_interactive_claude_code_bank_chat(self, tmp_path, monkeypat cwd = tmp_path / "bank-chat" + # Create .git so update_gitignore works + (cwd / ".git").mkdir() + # Mock shutil.which to return a fake MCP path monkeypatch.setattr(shutil, "which", lambda x: "/fake/bin/java-codebase-rag-mcp") @@ -680,6 +683,9 @@ def test_install_non_interactive_multi_host_bank_chat(self, tmp_path, monkeypatc cwd = tmp_path / "bank-chat" + # Create .git so update_gitignore works + (cwd / ".git").mkdir() + # Mock shutil.which to return a fake MCP path monkeypatch.setattr(shutil, "which", lambda x: "/fake/bin/java-codebase-rag-mcp") From 262ec3995929f1286a82b38eb12e91e3aeb1104a Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 00:44:20 +0300 Subject: [PATCH 3/6] fix: separate merge_mcp_config error from no-op (I3) merge_mcp_config now raises ValueError on JSON parse errors instead of returning False, distinguishing real failures from "already up to date" no-ops. _deploy_mcp_config catches ValueError and reports it as a failed artifact. Added test for invalid JSON case. Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/installer.py | 10 +++++++--- tests/test_installer.py | 8 ++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/java_codebase_rag/installer.py b/java_codebase_rag/installer.py index 3bc35b70..0c4095a9 100644 --- a/java_codebase_rag/installer.py +++ b/java_codebase_rag/installer.py @@ -391,6 +391,9 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - Returns: True if entry was added/updated, False if no change needed + + Raises: + ValueError: If existing config file cannot be parsed as JSON """ # Read existing config (or start with empty dict) if config_path.is_file(): @@ -398,8 +401,7 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - with open(config_path, "r") as f: config = json.load(f) except json.JSONDecodeError as e: - print(f"Error: Failed to parse {config_path}: {e}") - return False + raise ValueError(f"Failed to parse {config_path}: {e}") from e else: config = {} @@ -531,9 +533,11 @@ def _deploy_mcp_config( error=f"Directory not writable: {config_path.parent}", ) - # Merge config (returns True if updated, False if no-op or parse error) + # Merge config (returns True if updated, False if already current) merge_mcp_config(config_path, host, mcp_command=mcp_command) return ArtifactResult(path=config_path, success=True, error=None) + except ValueError as e: + return ArtifactResult(path=config_path, success=False, error=str(e)) except Exception as e: return ArtifactResult(path=config_path, success=False, error=str(e)) diff --git a/tests/test_installer.py b/tests/test_installer.py index a82664f9..cd1ebf94 100644 --- a/tests/test_installer.py +++ b/tests/test_installer.py @@ -374,6 +374,14 @@ def test_mcp_merge_preserves_other_keys_settings_json(self, tmp_path): assert config["security"]["level"] == "high" assert config["$version"] == 2 + def test_mcp_merge_raises_on_invalid_json(self, tmp_path): + """malformed JSON → raises ValueError""" + from java_codebase_rag.installer import merge_mcp_config, HOSTS + config_path = tmp_path / "mcp.json" + config_path.write_text("{invalid json!!!") + with pytest.raises(ValueError, match="Failed to parse"): + merge_mcp_config(config_path, HOSTS["claude-code"], mcp_command="/bin/mcp") + class TestDeployArtifacts: """Test deploy_artifacts function.""" From 4e28fe4734326abcd5ef24f387f2172500ab349c Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 00:48:32 +0300 Subject: [PATCH 4/6] fix: address PR review bugs (model flag, IO error propagation, ontology version) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. --model no longer silently ignored in non-interactive mode — provided paths are resolved; missing paths fall back to "auto" with a warning. 2. merge_mcp_config IO errors now raise RuntimeError instead of returning False, so _deploy_mcp_config correctly reports failure. 3. Ontology version bumped from 16 to 17 in both shipped and repo-root SKILL.md files. Co-Authored-By: Claude Opus 4.7 --- .../skills/explore-codebase/SKILL.md | 2 +- java_codebase_rag/installer.py | 54 +++++++++++-------- skills/explore-codebase/SKILL.md | 2 +- tests/test_installer.py | 22 ++++++++ 4 files changed, 56 insertions(+), 24 deletions(-) diff --git a/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md b/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md index d4c3d460..267a14b2 100644 --- a/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +++ b/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md @@ -66,7 +66,7 @@ Any time you need to search, locate, navigate, or explore the codebase. **Do NOT ## Graph Navigation Reference (java-codebase-rag MCP) -**Ontology: 16** — if results look structurally wrong or empty across tools, the index may be missing or stale; ask the operator to rebuild. +**Ontology: 17** — if results look structurally wrong or empty across tools, the index may be missing or stale; ask the operator to rebuild. Responses may include `hints_structured` (suggested next calls) and `advisories` — advisory only; ignore when `success` is false. ### Forced reasoning preamble (every MCP call) diff --git a/java_codebase_rag/installer.py b/java_codebase_rag/installer.py index 0c4095a9..f6f9d49b 100644 --- a/java_codebase_rag/installer.py +++ b/java_codebase_rag/installer.py @@ -216,30 +216,41 @@ def resolve_model(model_input: str | None, *, non_interactive: bool) -> str: Returns: Resolved model string ("auto" or a valid path) """ - if non_interactive or not model_input: - return "auto" + if model_input: + # Expand ~ and $HOME + expanded = os.path.expandvars(model_input.strip()) + expanded = os.path.expanduser(expanded) + model_path = Path(expanded) - # Expand ~ and $HOME - expanded = os.path.expandvars(model_input.strip()) - expanded = os.path.expanduser(expanded) - model_path = Path(expanded) + if model_path.exists(): + return str(model_path) - if model_path.exists(): - return str(model_path) + # Path not found + if non_interactive: + print(f"Warning: Model path {model_input} not found, falling back to 'auto'.") + return "auto" - # Path not found - prompt for confirmation in interactive mode - confirmed = prompt( - "confirm", - f"Model path {model_input} not found. Use 'auto' instead?", - ) - if confirmed: - return "auto" - else: - # Re-prompt for model path - new_input = prompt("text", "Enter model path (or 'auto'):", default="auto") - if new_input == "auto" or not new_input: + confirmed = prompt( + "confirm", + f"Model path {model_input} not found. Use 'auto' instead?", + ) + if confirmed: return "auto" - return resolve_model(new_input, non_interactive=non_interactive) + else: + # Re-prompt for model path + new_input = prompt("text", "Enter model path (or 'auto'):", default="auto") + if new_input == "auto" or not new_input: + return "auto" + return resolve_model(new_input, non_interactive=non_interactive) + + if non_interactive: + return "auto" + + # Interactive with no CLI input: prompt for model + user_input = prompt("text", "Embedding model path (or 'auto'):", default="auto") + if user_input == "auto" or not user_input: + return "auto" + return resolve_model(user_input, non_interactive=False) def select_hosts(*, non_interactive: bool, cli_agents: list[str] | None) -> list[HostConfig]: @@ -438,13 +449,12 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - os.rename(tmp_name, config_path) return True except (IOError, OSError) as e: - print(f"Error: Failed to write {config_path}: {e}") if tmp_name: try: os.unlink(tmp_name) except OSError: pass - return False + raise RuntimeError(f"Failed to write {config_path}: {e}") from e def _read_package_artifact(relative_path: str) -> str: diff --git a/skills/explore-codebase/SKILL.md b/skills/explore-codebase/SKILL.md index d4c3d460..267a14b2 100644 --- a/skills/explore-codebase/SKILL.md +++ b/skills/explore-codebase/SKILL.md @@ -66,7 +66,7 @@ Any time you need to search, locate, navigate, or explore the codebase. **Do NOT ## Graph Navigation Reference (java-codebase-rag MCP) -**Ontology: 16** — if results look structurally wrong or empty across tools, the index may be missing or stale; ask the operator to rebuild. +**Ontology: 17** — if results look structurally wrong or empty across tools, the index may be missing or stale; ask the operator to rebuild. Responses may include `hints_structured` (suggested next calls) and `advisories` — advisory only; ignore when `success` is false. ### Forced reasoning preamble (every MCP call) diff --git a/tests/test_installer.py b/tests/test_installer.py index cd1ebf94..b64a3df0 100644 --- a/tests/test_installer.py +++ b/tests/test_installer.py @@ -219,6 +219,28 @@ def mock_prompt(*args, **kwargs): result = resolve_model("/nonexistent/path", non_interactive=False) assert result == "auto" + def test_model_non_interactive_with_path_uses_path(self, tmp_path): + """--model /path/to/model with --non-interactive → uses the path""" + model_file = tmp_path / "model.gguf" + model_file.write_text("fake model") + from java_codebase_rag.installer import resolve_model + result = resolve_model(str(model_file), non_interactive=True) + assert result == str(model_file) + + def test_model_non_interactive_with_bad_path_falls_back(self, capsys): + """--model /bad/path with --non-interactive → warning + auto""" + from java_codebase_rag.installer import resolve_model + result = resolve_model("/nonexistent/model.gguf", non_interactive=True) + assert result == "auto" + captured = capsys.readouterr() + assert "Warning" in captured.out + + def test_model_non_interactive_no_input_returns_auto(self): + """no --model with --non-interactive → auto""" + from java_codebase_rag.installer import resolve_model + result = resolve_model(None, non_interactive=True) + assert result == "auto" + class TestSelectHostsAndScope: """Test select_hosts and select_scope functions.""" From 9b7284bad85f6f915c73c6f1e21c4467fa41f9c7 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 01:01:21 +0300 Subject: [PATCH 5/6] fix: use sys.executable in integration tests instead of .venv/bin/python Hardcoded .venv/bin/python doesn't exist in CI where Python is at /opt/hostedtoolcache/Python/... sys.executable resolves correctly in all environments. Co-Authored-By: Claude Opus 4.7 --- tests/test_installer_integration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_installer_integration.py b/tests/test_installer_integration.py index 4202f119..29981f6f 100644 --- a/tests/test_installer_integration.py +++ b/tests/test_installer_integration.py @@ -8,6 +8,7 @@ import pytest import shutil import subprocess +import sys from pathlib import Path @@ -31,7 +32,7 @@ def test_install_non_interactive_claude_code_bank_chat(self, tmp_path): # Run install via subprocess to test the CLI integration result = subprocess.run( [ - ".venv/bin/python", + sys.executable, "-m", "java_codebase_rag.cli", "install", @@ -93,7 +94,7 @@ def test_install_non_interactive_multi_host_bank_chat(self, tmp_path): # Run install via subprocess to test the CLI integration result = subprocess.run( [ - ".venv/bin/python", + sys.executable, "-m", "java_codebase_rag.cli", "install", From f2a90c0c17b7c66025f53cf87dd1e06031bc1476 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 01:14:11 +0300 Subject: [PATCH 6/6] fix: create .git directory in integration test fixtures update_gitignore skips non-git directories. The subprocess-based integration tests run in a temp copy that lacks .git/, so the .gitignore assertion fails. Create .git/ before running install. Co-Authored-By: Claude Opus 4.7 --- tests/test_installer_integration.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_installer_integration.py b/tests/test_installer_integration.py index 29981f6f..e9361e5c 100644 --- a/tests/test_installer_integration.py +++ b/tests/test_installer_integration.py @@ -29,6 +29,9 @@ def test_install_non_interactive_claude_code_bank_chat(self, tmp_path): cwd = tmp_path / "bank-chat" + # Create .git so update_gitignore works + (cwd / ".git").mkdir() + # Run install via subprocess to test the CLI integration result = subprocess.run( [ @@ -91,6 +94,9 @@ def test_install_non_interactive_multi_host_bank_chat(self, tmp_path): cwd = tmp_path / "bank-chat" + # Create .git so update_gitignore works + (cwd / ".git").mkdir() + # Run install via subprocess to test the CLI integration result = subprocess.run( [