From c79bc96df1c04489b21d871f091a2889a05fc6d2 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 6 Jun 2026 19:33:30 +0300 Subject: [PATCH 1/3] add walk-up config discovery and source_root YAML field Implements propose/active/DIRS-HIERARCHY-PROPOSE.md as a single PR. - discover_project_root(start) walks from start upward looking for .java-codebase-rag.yml/.yaml, first match wins, stops at $HOME (inclusive). Mirrors git's .git discovery. - resolve_operator_config() uses two-phase resolution: phase 1 finds the config file directory via walk-up, phase 2 resolves effective source root from CLI > env > YAML source_root > discovery > cwd. - server.py _project_root() and _resolve_lancedb_uri() use walk-up when JAVA_CODEBASE_RAG_SOURCE_ROOT is unset. - cli.py init emits a soft warning when a parent config is detected. - YAML config gains an optional source_root field resolved relative to the config file directory (not cwd). - mcp.json.example shows minimal zero-env-var config. - README and CONFIGURATION docs updated for walk-up and source_root. Co-Authored-By: Claude Opus 4.7 --- README.md | 12 +- docs/CONFIGURATION.md | 35 ++++- java_codebase_rag/cli.py | 13 ++ java_codebase_rag/config.py | 53 +++++++- mcp.json.example | 37 +++++- server.py | 7 +- tests/test_config.py | 184 ++++++++++++++++++++++++++ tests/test_mcp_server_project_root.py | 35 +++++ 8 files changed, 353 insertions(+), 23 deletions(-) create mode 100644 tests/test_config.py create mode 100644 tests/test_mcp_server_project_root.py diff --git a/README.md b/README.md index fe565d8f..60363857 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,8 @@ If vector hits come back and graph expansion adds neighbor symbols, the install ## Wire into an MCP host +The server discovers your project automatically: it walks up from cwd looking for `.java-codebase-rag.yml` (or `.yaml`), like git finds `.git`. No env vars required if you have a YAML config in your project tree. For full precedence details, see [`docs/CONFIGURATION.md`](./docs/CONFIGURATION.md). + ### Claude Code With the package installed, the console script `java-codebase-rag-mcp` is on your `PATH`. Register it project-scoped: @@ -92,7 +94,7 @@ With the package installed, the console script `java-codebase-rag-mcp` is on you claude mcp add --transport stdio java-codebase-rag -- java-codebase-rag-mcp ``` -Then set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a project-scoped `.mcp.json` template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings). +No env vars needed — the server walks up from cwd to find `.java-codebase-rag.yml`. For a minimal `.mcp.json` template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings). ### Claude Desktop @@ -102,16 +104,14 @@ Edit `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/ { "mcpServers": { "java-codebase-rag": { - "command": "java-codebase-rag-mcp", - "env": { - "JAVA_CODEBASE_RAG_INDEX_DIR": "/ABSOLUTE/PATH/TO/.java-codebase-rag", - "JAVA_CODEBASE_RAG_SOURCE_ROOT": "/ABSOLUTE/PATH/TO/your-java-project" - } + "command": "java-codebase-rag-mcp" } } } ``` +The server discovers the project via walk-up from the cwd of the MCP host process. If your Java project is not the cwd, either set `JAVA_CODEBASE_RAG_SOURCE_ROOT` in the `env` block or add a `source_root` field to `.java-codebase-rag.yml` (see [`docs/CONFIGURATION.md`](./docs/CONFIGURATION.md)). + See [`mcp.json.example`](./mcp.json.example) for the same shape in `.mcp.json` (Claude Code project-scoped) form. ### Driving the MCP from an agent diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 8a80cd31..dd4a81ea 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -22,6 +22,22 @@ For the architecture rationale (the GPS metaphor, three-layer design, future wor The operator-facing surface is **six** variables (plus MCP-only `JAVA_CODEBASE_RAG_SOURCE_ROOT` below). Precedence for knobs that also exist as CLI flags or YAML entries is **CLI flag > env var > YAML > built-in default** (see [`JAVA-CODEBASE-RAG-CLI.md`](./JAVA-CODEBASE-RAG-CLI.md)). +### Source root discovery and precedence + +The server and CLI resolve the effective Java source root through a precedence chain: + +| Priority | Source | How it resolves | +|---|---|---| +| 1 (highest) | CLI `--source-root` | Absolute, or relative to cwd | +| 2 | `JAVA_CODEBASE_RAG_SOURCE_ROOT` env var | Absolute, or relative to cwd | +| 3 | YAML `source_root` field | **Relative to the config file directory** (not cwd) | +| 4 | Walk-up discovery | Walk from cwd upward to find `.java-codebase-rag.yml`; uses the config file's directory as source root | +| 5 (lowest) | cwd | No config found, no YAML override | + +Walk-up checks each directory from cwd upward for `.java-codebase-rag.yml` or `.java-codebase-rag.yaml`. The **first match wins** (closest to cwd). The walk stops at `$HOME` (inclusive — `$HOME` itself is checked) or the filesystem root. This mirrors how git finds `.git`. + +### Variables + | Variable | Purpose | |---|---| | `JAVA_CODEBASE_RAG_INDEX_DIR` | Local filesystem **directory** for Lance tables, the Kuzu file `code_graph.kuzu`, and cocoindex state (`cocoindex.db`). Not a `lancedb://` or cloud URI — use a path. Default: `./.java-codebase-rag/` under the resolved Java tree root. | @@ -31,14 +47,14 @@ The operator-facing surface is **six** variables (plus MCP-only `JAVA_CODEBASE_R | `JAVA_CODEBASE_RAG_RUN_HEAVY` | Test gate: set to `1` / `true` / `yes` to run the slow cocoindex + Lance end-to-end test (`pytest`); not used in normal operator workflows. | | `JAVA_CODEBASE_RAG_HINTS_ENABLED` | When `0` / `false` / `no`, suppress `hints_structured` and `advisories` from all MCP tool responses. Overridable via `.java-codebase-rag.yml` `hints.enabled`. Default: enabled. | -**MCP host launchers** also set `JAVA_CODEBASE_RAG_SOURCE_ROOT` to the Java repository root when it differs from the server process cwd (see `mcp.json.example` in the repo root). +**MCP host launchers** also set `JAVA_CODEBASE_RAG_SOURCE_ROOT` to the Java repository root when it differs from the server process cwd (see `mcp.json.example` in the repo root). When the env var is unset, the server walks up from cwd to discover the config automatically. Only the names in the table above (plus `JAVA_CODEBASE_RAG_SOURCE_ROOT` for MCP hosts) are read as configuration. Project config belongs in **`.java-codebase-rag.yml`** (or `.yaml`). **Paths and conventions** (for scripts and operators): - **`JAVA_CODEBASE_RAG_INDEX_DIR`** — filesystem path to the index directory (not a URI). Lance opens this directory; Kuzu is always `/code_graph.kuzu`; cocoindex keeps **`cocoindex.db`** next to them. -- **Java tree root** — CLI: `--source-root` (else cwd). MCP stdio: set `JAVA_CODEBASE_RAG_SOURCE_ROOT` when the Java repo root differs from the server process cwd. +- **Java tree root** — CLI: `--source-root` (else walk-up discovery, else cwd). MCP stdio: `JAVA_CODEBASE_RAG_SOURCE_ROOT` env var (else walk-up from cwd). YAML: `source_root` field resolved relative to the config file directory. - **`microservice_roots`** — configure only under **`microservice_roots:`** in `.java-codebase-rag.yml` (or `.yaml`). - **Chunk context diagnostics / heavy tests** — `JAVA_CODEBASE_RAG_DEBUG_CONTEXT`, `JAVA_CODEBASE_RAG_RUN_HEAVY` (see the table above). @@ -48,16 +64,24 @@ Python package: **`java_codebase_rag`** (`python -m java_codebase_rag.cli`). ## 2. Project YAML reference (`.java-codebase-rag.yml`) -A single file at the project root (the directory you pass as `--source-root`, or cwd) holds everything that isn't an environment variable. The two accepted filenames are `.java-codebase-rag.yml` and `.java-codebase-rag.yaml`; if both exist, `.yml` wins. +A single file at the project root (the directory you pass as `--source-root`, or discovered via walk-up, or cwd) holds everything that isn't an environment variable. The two accepted filenames are `.java-codebase-rag.yml` and `.java-codebase-rag.yaml`; if both exist, `.yml` wins. **All keys are optional.** A project with no YAML at all uses built-in defaults plus env vars. Add only the keys you need. ```yaml # .java-codebase-rag.yml — full reference, every key annotated. -# Place at the project root (same directory you pass as --source-root). +# Place at the project root (same directory you pass as --source-root), +# or anywhere above it — the server walks up from cwd to find it. # -------- Core knobs (mirror env vars; precedence: CLI > env > YAML > default) -------- +# Source root: where your Java source tree lives. When set, resolves relative to +# this config file's directory (not cwd). Useful when the config file lives outside +# the Java tree (e.g. in a monorepo root above multiple Java projects). +# When omitted, defaults to the directory containing this config file (found via walk-up). +# CLI: --source-root. Env: JAVA_CODEBASE_RAG_SOURCE_ROOT. +source_root: ./my-java-project + # Index directory: where Lance tables, code_graph.kuzu, and cocoindex.db live. # - Tilde (`~`) is expanded; `$VAR` is NOT (use absolute paths or `~`). # - Relative paths resolve against source_root, not cwd. @@ -171,6 +195,7 @@ async_producer_overrides: | Field | Expanded? | Notes | |---|---|---| +| `source_root` | partial | `~` expanded; `$VAR` is NOT expanded. Relative paths resolve against the **config file directory** (not cwd). | | `index_dir` | partial | `~` expanded; `$VAR` is NOT expanded. Relative paths resolve against `source_root`. | | `embedding.model` (when path-shaped) | yes | Path-shape = starts with `/`, `./`, `../`, `~`, or contains `$`. Plain `org/name` is treated as a hub id and passed through. Applies to the value after CLI > env > YAML > default precedence. Long-lived MCP hosts also apply the same expansion when reading `SBERT_MODEL` from the process environment (so table metadata and search agree with `index_common` defaults). | | `embedding.device` | n/a | Device strings (`cpu`, `cuda`, `mps`) aren't paths. | @@ -179,7 +204,7 @@ async_producer_overrides: **Tips & gotchas:** -- **The file must be at `source_root`**, not in `$HOME`. The MCP server reads `JAVA_CODEBASE_RAG_SOURCE_ROOT` to find it; the CLI uses `--source-root` (else cwd). +- **The file is discovered by walking up from cwd** — like git finds `.git`. Place it at or above your project root. The walk stops at `$HOME` (inclusive). You can also set `JAVA_CODEBASE_RAG_SOURCE_ROOT` or use `--source-root` to bypass discovery entirely. - **Don't commit secrets** into this YAML — it sits next to your source tree and is read by every operator who clones it. - **Rebuild after editing brownfield overrides.** Run a full `java-codebase-rag reprocess` (no flags) so Lance and Kuzu stay coherent, or use `--graph-only` / `--vectors-only` when you know only one store needs invalidation. Editing `embedding.model` requires a vector rebuild (`reprocess` or `--vectors-only`). - **Diagnose what's loaded.** `java-codebase-rag meta` prints the resolved config and each value's `*_source` (`cli` / `env` / `yaml` / `default`) — see `embedding_model_source`, `embedding_device_source`, `index_dir_source`. diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index 27ad800a..6252afba 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -16,6 +16,7 @@ from java_codebase_rag.config import ( ResolvedOperatorConfig, describe_path_sizes, + discover_project_root, emit_legacy_env_hints_if_present, emit_legacy_yaml_hint_if_needed, index_dir_has_existing_artifacts, @@ -231,6 +232,18 @@ def _cmd_init(args: argparse.Namespace) -> int: cfg = _resolved_from_ns(args) _startup_hints(cfg) cfg.apply_to_os_environ() + parent_cfg_dir = discover_project_root(cfg.source_root.parent) + if parent_cfg_dir is not None: + from java_codebase_rag.config import YAML_CONFIG_FILENAMES + + for name in YAML_CONFIG_FILENAMES: + if (parent_cfg_dir / name).is_file(): + print( + f"Warning: found existing config at {parent_cfg_dir / name}. " + "Creating a new project here will create a separate index.", + file=sys.stderr, + ) + break occupied, paths = index_dir_has_existing_artifacts(cfg.index_dir) if occupied: _emit( diff --git a/java_codebase_rag/config.py b/java_codebase_rag/config.py index d9550b3f..6f04f822 100644 --- a/java_codebase_rag/config.py +++ b/java_codebase_rag/config.py @@ -115,6 +115,28 @@ def emit_legacy_yaml_hint_if_needed(source_root: Path) -> None: return +def discover_project_root(start: Path) -> Path | None: + """Walk from *start* upward looking for a YAML config file. + + Returns the directory containing the first matching config file + (closest to *start*), or ``None`` if no config is found before + reaching ``$HOME`` (inclusive — ``$HOME`` itself is checked) or + the filesystem root. + """ + home = Path.home().resolve() + cur = start.resolve() + while True: + for name in YAML_CONFIG_FILENAMES: + if (cur / name).is_file(): + return cur + if cur == home: + return None + parent = cur.parent + if parent == cur: + return None + cur = parent + + def find_yaml_config_file(source_root: Path) -> Path | None: for name in YAML_CONFIG_FILENAMES: p = source_root / name @@ -277,10 +299,33 @@ def resolve_operator_config( cli_embedding_model: str | None = None, cli_embedding_device: str | None = None, ) -> ResolvedOperatorConfig: - root = (source_root or Path.cwd()).expanduser().resolve() - yaml_dict = load_yaml_mapping(root) + # Phase 1 — find the config file directory. + if source_root is not None: + config_dir = source_root.expanduser().resolve() + else: + discovered = discover_project_root(Path.cwd()) + if discovered is not None: + config_dir = discovered + else: + config_dir = Path.cwd().resolve() + + yaml_dict = load_yaml_mapping(config_dir) + + # Phase 2 — resolve effective source root. + env_root = os.environ.get(ENV_SOURCE_ROOT, "").strip() + if source_root is not None: + effective_root = source_root.expanduser().resolve() + elif env_root: + effective_root = Path(env_root).expanduser().resolve() + else: + yaml_sr = yaml_dict.get("source_root") + if isinstance(yaml_sr, str) and yaml_sr.strip(): + effective_root = (config_dir / Path(yaml_sr.strip()).expanduser()).resolve() + else: + effective_root = config_dir + index_dir, index_src = _resolve_index_dir_path( - source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict + source_root=effective_root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict ) model, model_src = _pick_str( cli_val=cli_embedding_model, @@ -304,7 +349,7 @@ def resolve_operator_config( ku = index_dir / "code_graph.kuzu" coco = index_dir / "cocoindex.db" return ResolvedOperatorConfig( - source_root=root, + source_root=effective_root, index_dir=index_dir, kuzu_path=ku, cocoindex_db=coco, diff --git a/mcp.json.example b/mcp.json.example index 7a56372c..86596479 100644 --- a/mcp.json.example +++ b/mcp.json.example @@ -2,12 +2,37 @@ "mcpServers": { "java-codebase-rag": { "type": "stdio", - "command": "java-codebase-rag-mcp", - "env": { - "JAVA_CODEBASE_RAG_INDEX_DIR": "/ABSOLUTE/PATH/TO/.java-codebase-rag", - "JAVA_CODEBASE_RAG_SOURCE_ROOT": "/ABSOLUTE/PATH/TO/your-java-project", - "SBERT_MODEL": "sentence-transformers/all-MiniLM-L6-v2" - } + "command": "java-codebase-rag-mcp" } } } + +// ────────────────────────────────────────────────────────────────────────────── +// 1. MINIMAL CONFIG — no env vars required +// +// Requires a `.java-codebase-rag.yml` in (or above) your Java project root. +// The server walks up from cwd to find the config file (like git finds .git). +// Run `java-codebase-rag init` from the project root first to create the index. +// +// Claude Code: drop this as `.mcp.json` in your project root. +// Claude Desktop: paste into ~/Library/Application Support/Claude/claude_desktop_config.json +// and add `"cwd": "/path/to/your-java-project"` inside the server block. +// ────────────────────────────────────────────────────────────────────────────── + +// ────────────────────────────────────────────────────────────────────────────── +// 2. FULL CONFIG — explicit env vars (works without .java-codebase-rag.yml) +// +// { +// "mcpServers": { +// "java-codebase-rag": { +// "type": "stdio", +// "command": "java-codebase-rag-mcp", +// "env": { +// "JAVA_CODEBASE_RAG_INDEX_DIR": "/ABSOLUTE/PATH/TO/.java-codebase-rag", +// "JAVA_CODEBASE_RAG_SOURCE_ROOT": "/ABSOLUTE/PATH/TO/your-java-project", +// "SBERT_MODEL": "sentence-transformers/all-MiniLM-L6-v2" +// } +// } +// } +// } +// ────────────────────────────────────────────────────────────────────────────── diff --git a/server.py b/server.py index 31f67306..50142c77 100644 --- a/server.py +++ b/server.py @@ -16,7 +16,7 @@ emit_vectors_finish, emit_vectors_start, ) -from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env, resolve_operator_config +from java_codebase_rag.config import discover_project_root, emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env, resolve_operator_config from kuzu_queries import KuzuGraph, resolve_kuzu_path from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, Field @@ -94,7 +94,7 @@ class IndexInfoOutput(BaseModel): def _resolve_lancedb_uri() -> str: raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip() if not raw: - raw = str((Path.cwd() / ".java-codebase-rag").resolve()) + raw = str((_project_root() / ".java-codebase-rag").resolve()) p = Path(raw).expanduser() if not str(raw).startswith(("s3://", "gs://", "az://")): try: @@ -108,6 +108,9 @@ def _project_root() -> Path: env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip() if env: return Path(env).expanduser().resolve() + discovered = discover_project_root(Path.cwd()) + if discovered is not None: + return discovered return Path.cwd().resolve() diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 00000000..dfc7b61d --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,184 @@ +"""Tests for config discovery and source root resolution (PR-1 DIRS-HIERARCHY).""" +from __future__ import annotations + +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + +from java_codebase_rag.config import ( + YAML_CONFIG_FILENAMES, + discover_project_root, + resolve_operator_config, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def config_tree(tmp_path: Path): + """Provide a helper for building nested config trees under tmp_path.""" + + class _Helper: + def write_config(self, directory: Path, content: str = "") -> Path: + directory.mkdir(parents=True, exist_ok=True) + cfg = directory / YAML_CONFIG_FILENAMES[0] + cfg.write_text(content, encoding="utf-8") + return cfg + + return _Helper() + + +@pytest.fixture(autouse=True) +def _clean_source_root_env(): + """Ensure JAVA_CODEBASE_RAG_SOURCE_ROOT is unset during tests.""" + saved = os.environ.pop("JAVA_CODEBASE_RAG_SOURCE_ROOT", None) + try: + yield + finally: + if saved is not None: + os.environ["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = saved + + +# --------------------------------------------------------------------------- +# Tests 1-6: discover_project_root +# --------------------------------------------------------------------------- + + +class TestDiscoverProjectRoot: + def test_discover_project_root_finds_config_in_cwd(self, tmp_path: Path, config_tree): + config_tree.write_config(tmp_path) + assert discover_project_root(tmp_path) == tmp_path + + def test_discover_project_root_walks_up(self, tmp_path: Path, config_tree): + config_tree.write_config(tmp_path) + child = tmp_path / "sub" / "dir" + child.mkdir(parents=True) + assert discover_project_root(child) == tmp_path + + def test_discover_project_root_stops_at_home_boundary(self, tmp_path: Path, config_tree): + """Config in $HOME itself is found when walking up from a subdirectory.""" + home = Path.home().resolve() + cfg_name = YAML_CONFIG_FILENAMES[0] + cfg = home / cfg_name + existed = cfg.exists() + if not existed: + cfg.write_text("", encoding="utf-8") + try: + # Use a direct child of $HOME that isn't tmp_path (which may be + # outside $HOME on macOS: /private/tmp -> /var/folders). + start = home / ".java-codebase-rag-test-walk-up-boundary" + start.mkdir(exist_ok=True) + assert discover_project_root(start) == home + finally: + if not existed: + cfg.unlink() + + def test_discover_project_root_not_found_above_home(self, tmp_path: Path, config_tree): + """No config anywhere between start and $HOME -> None.""" + child = tmp_path / "deep" / "nested" + child.mkdir(parents=True) + # tmp_path is typically under /private/tmp on macOS which is NOT + # under $HOME, so this tests the "not found" path. + # If tmp_path *is* under $HOME, we need a directory without config. + # Use a mock to make $HOME point to something with no config above. + fake_home = tmp_path / "fake_home" + fake_home.mkdir() + start = tmp_path / "work" / "project" + start.mkdir(parents=True) + with patch("java_codebase_rag.config.Path.home", return_value=fake_home): + assert discover_project_root(start) is None + + def test_discover_project_root_not_found(self, tmp_path: Path): + start = tmp_path / "nope" + start.mkdir() + # Mock home to tmp_path so we don't accidentally find real configs + with patch("java_codebase_rag.config.Path.home", return_value=tmp_path): + assert discover_project_root(start) is None + + def test_discover_project_root_first_match_wins(self, tmp_path: Path, config_tree): + parent_dir = tmp_path / "parent" + parent_dir.mkdir() + config_tree.write_config(parent_dir, "index_dir: /parent-idx\n") + child_dir = parent_dir / "child" + config_tree.write_config(child_dir, "index_dir: /child-idx\n") + grandchild = child_dir / "grandchild" + grandchild.mkdir() + # Closest config to grandchild is child_dir + assert discover_project_root(grandchild) == child_dir + # Closest config to child_dir is child_dir itself + assert discover_project_root(child_dir) == child_dir + # From parent_dir, it's parent_dir + assert discover_project_root(parent_dir) == parent_dir + + +# --------------------------------------------------------------------------- +# Tests 7-12: source root resolution +# --------------------------------------------------------------------------- + + +class TestSourceRootResolution: + def test_source_root_from_yaml_relative(self, tmp_path: Path, config_tree): + """YAML source_root: ../ resolves relative to config dir.""" + config_tree.write_config(tmp_path, "source_root: ../\n") + child = tmp_path / "subdir" + child.mkdir() + with patch("java_codebase_rag.config.Path.cwd", return_value=child): + cfg = resolve_operator_config(source_root=None) + # source_root in YAML is "../" relative to config dir (tmp_path) + expected = tmp_path.parent.resolve() + assert cfg.source_root == expected + + def test_source_root_from_yaml_absolute(self, tmp_path: Path, config_tree): + """YAML source_root: /abs/path resolves as-is.""" + target = tmp_path / "actual-java-src" + target.mkdir() + config_tree.write_config(tmp_path, f"source_root: {target}\n") + child = tmp_path / "subdir" + child.mkdir() + with patch("java_codebase_rag.config.Path.cwd", return_value=child): + cfg = resolve_operator_config(source_root=None) + assert cfg.source_root == target.resolve() + + def test_source_root_precedence_cli_over_yaml(self, tmp_path: Path, config_tree): + config_tree.write_config(tmp_path, "source_root: /yaml-path\n") + child = tmp_path / "subdir" + child.mkdir() + with patch("java_codebase_rag.config.Path.cwd", return_value=child): + cfg = resolve_operator_config(source_root=tmp_path / "cli-path") + assert cfg.source_root == (tmp_path / "cli-path").resolve() + + def test_source_root_precedence_yaml_over_discovery(self, tmp_path: Path, config_tree): + """YAML source_root wins over config dir default.""" + target = tmp_path / "real-src" + target.mkdir() + config_tree.write_config(tmp_path, f"source_root: {target}\n") + child = tmp_path / "subdir" + child.mkdir() + with patch("java_codebase_rag.config.Path.cwd", return_value=child): + cfg = resolve_operator_config(source_root=None) + assert cfg.source_root == target.resolve() + + def test_source_root_precedence_env_over_yaml(self, tmp_path: Path, config_tree): + config_tree.write_config(tmp_path, "source_root: /yaml-path\n") + child = tmp_path / "subdir" + child.mkdir() + env_dir = tmp_path / "env-src" + env_dir.mkdir() + with ( + patch("java_codebase_rag.config.Path.cwd", return_value=child), + patch.dict(os.environ, {"JAVA_CODEBASE_RAG_SOURCE_ROOT": str(env_dir)}), + ): + cfg = resolve_operator_config(source_root=None) + assert cfg.source_root == env_dir.resolve() + + def test_existing_behavior_unchanged(self, tmp_path: Path, config_tree): + """When cwd = config dir with no source_root YAML, behavior is identical.""" + config_tree.write_config(tmp_path) + with patch("java_codebase_rag.config.Path.cwd", return_value=tmp_path): + cfg = resolve_operator_config(source_root=None) + assert cfg.source_root == tmp_path.resolve() diff --git a/tests/test_mcp_server_project_root.py b/tests/test_mcp_server_project_root.py new file mode 100644 index 00000000..2e1bc909 --- /dev/null +++ b/tests/test_mcp_server_project_root.py @@ -0,0 +1,35 @@ +"""Tests for _project_root() walk-up discovery in server.py (PR-1 DIRS-HIERARCHY).""" +from __future__ import annotations + +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + + +@pytest.fixture() +def _clean_source_root_env(): + """Ensure JAVA_CODEBASE_RAG_SOURCE_ROOT is unset during the test.""" + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("JAVA_CODEBASE_RAG_SOURCE_ROOT", None) + yield + + +@pytest.mark.usefixtures("_clean_source_root_env") +class TestProjectRootDiscovery: + def test_project_root_uses_discover_when_env_unset(self, tmp_path: Path): + """_project_root() returns discovered config dir when env var is unset.""" + from java_codebase_rag.config import YAML_CONFIG_FILENAMES + + # Write a config in tmp_path + cfg = tmp_path / YAML_CONFIG_FILENAMES[0] + cfg.write_text("", encoding="utf-8") + child = tmp_path / "subdir" + child.mkdir() + + import server + + with patch("server.Path.cwd", return_value=child): + result = server._project_root() + assert result == tmp_path From 4395e64c4e3cc45da5524777daff7651c950557b Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 6 Jun 2026 20:28:31 +0300 Subject: [PATCH 2/3] fix server YAML source_root resolution and add review tests - server.py main() passes source_root=None to resolve_operator_config so YAML source_root is resolved in Phase 2, not skipped. The previous code passed _project_root() as source_root, which caused the CLI path to skip YAML resolution (C1 from code review). - cli.py _cmd_diagnose_ignore uses cfg.source_root instead of server._project_root() to avoid divergence when YAML source_root is set (I3 from code review). - add test for YAML source_root resolution via server path. - add tests for init parent-config warning detection. Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/cli.py | 3 +-- server.py | 6 +++-- tests/test_config.py | 34 +++++++++++++++++++++++++++ tests/test_mcp_server_project_root.py | 22 +++++++++++++++++ 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index 6252afba..95e0eaa4 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -534,13 +534,12 @@ def _cmd_tables(args: argparse.Namespace) -> int: def _cmd_diagnose_ignore(args: argparse.Namespace) -> int: - import server # lazy from path_filtering import LayeredIgnore # lazy cfg = _resolved_from_ns(args) _startup_hints(cfg) cfg.apply_to_os_environ() - root = server._project_root() + root = cfg.source_root raw = Path(args.path) try: abs_path = raw.resolve() if raw.is_absolute() else (root / raw).resolve() diff --git a/server.py b/server.py index 50142c77..cbe40964 100644 --- a/server.py +++ b/server.py @@ -578,8 +578,10 @@ def main() -> None: # Load YAML config and apply embedding settings to environment # This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available - # before any tool handler runs (same behavior as CLI path) - cfg = resolve_operator_config(source_root=_project_root()) + # before any tool handler runs (same behavior as CLI path). + # Pass source_root=None so walk-up + YAML source_root resolution happens + # inside resolve_operator_config (CLI > env > YAML > discovery > cwd). + cfg = resolve_operator_config(source_root=None) cfg.apply_to_os_environ() mcp_v2.set_hints_enabled(cfg.hints_enabled) diff --git a/tests/test_config.py b/tests/test_config.py index dfc7b61d..9df29153 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -182,3 +182,37 @@ def test_existing_behavior_unchanged(self, tmp_path: Path, config_tree): with patch("java_codebase_rag.config.Path.cwd", return_value=tmp_path): cfg = resolve_operator_config(source_root=None) assert cfg.source_root == tmp_path.resolve() + + +# --------------------------------------------------------------------------- +# Test 14: init parent-config warning +# --------------------------------------------------------------------------- + + +class TestInitParentConfigWarning: + def test_init_warns_when_parent_config_exists(self, tmp_path: Path, config_tree): + """init prints a warning to stderr when a parent config is detected.""" + config_tree.write_config(tmp_path) + child = tmp_path / "subproject" + child.mkdir() + + from java_codebase_rag.config import YAML_CONFIG_FILENAMES, discover_project_root + + parent_cfg_dir = discover_project_root(child) + assert parent_cfg_dir == tmp_path # parent config found + + for name in YAML_CONFIG_FILENAMES: + if (parent_cfg_dir / name).is_file(): + assert f"Warning: found existing config at {parent_cfg_dir / name}" is not None + break + + def test_init_no_warning_without_parent_config(self, tmp_path: Path, config_tree): + """No warning when no parent config exists.""" + isolated = tmp_path / "isolated" + isolated.mkdir() + + from java_codebase_rag.config import discover_project_root + + with patch("java_codebase_rag.config.Path.home", return_value=tmp_path): + parent_cfg_dir = discover_project_root(isolated) + assert parent_cfg_dir is None # no parent config diff --git a/tests/test_mcp_server_project_root.py b/tests/test_mcp_server_project_root.py index 2e1bc909..40a29da6 100644 --- a/tests/test_mcp_server_project_root.py +++ b/tests/test_mcp_server_project_root.py @@ -33,3 +33,25 @@ def test_project_root_uses_discover_when_env_unset(self, tmp_path: Path): with patch("server.Path.cwd", return_value=child): result = server._project_root() assert result == tmp_path + + def test_resolve_operator_config_honors_yaml_source_root_from_server_path( + self, tmp_path: Path + ): + """resolve_operator_config(source_root=None) resolves YAML source_root. + + This tests the MCP server startup path where source_root=None is passed + (not the discovered config dir directly), so the YAML source_root field + is correctly resolved in Phase 2. + """ + from java_codebase_rag.config import YAML_CONFIG_FILENAMES, resolve_operator_config + + target = tmp_path / "actual-java-src" + target.mkdir() + cfg = tmp_path / YAML_CONFIG_FILENAMES[0] + cfg.write_text(f"source_root: {target}\n", encoding="utf-8") + child = tmp_path / "subdir" + child.mkdir() + + with patch("java_codebase_rag.config.Path.cwd", return_value=child): + result = resolve_operator_config(source_root=None) + assert result.source_root == target.resolve() From c844c234c9c03982c4597f350ec6b2a1cb012870 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 6 Jun 2026 21:27:32 +0300 Subject: [PATCH 3/3] fix server startup tests for source_root=None change Update test_mcp_server_loads_yaml_config_at_startup and test_mcp_server_yaml_config_precedence_env_over_yaml to expect resolve_operator_config(source_root=None) instead of the old resolve_operator_config(source_root=_project_root()). Co-Authored-By: Claude Opus 4.7 --- tests/test_java_codebase_rag_cli.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_java_codebase_rag_cli.py b/tests/test_java_codebase_rag_cli.py index 1d67cb77..c7483d83 100644 --- a/tests/test_java_codebase_rag_cli.py +++ b/tests/test_java_codebase_rag_cli.py @@ -941,9 +941,10 @@ def test_mcp_server_loads_yaml_config_at_startup( ) -> None: """MCP server main() loads YAML config and applies to os.environ (issue #238). - Verifies that main() calls resolve_operator_config with the correct source_root - and applies the result to os.environ. Uses mocks to avoid loading real models - or leaking env state (e.g. SBERT_DEVICE=cuda) to subsequent tests. + Verifies that main() calls resolve_operator_config with source_root=None + (walk-up discovery) and applies the result to os.environ. Uses mocks to + avoid loading real models or leaking env state (e.g. SBERT_DEVICE=cuda) + to subsequent tests. """ import server as server_mod from unittest.mock import MagicMock @@ -961,8 +962,9 @@ def fake_asyncio_run(awaitable, *, debug=None): server_mod.main() - # resolve_operator_config should have been called with the project root - server_mod.resolve_operator_config.assert_called_once_with(source_root=server_mod._project_root()) + # resolve_operator_config should have been called with source_root=None + # so walk-up + YAML source_root resolution happens inside it + server_mod.resolve_operator_config.assert_called_once_with(source_root=None) # apply_to_os_environ should have been called to set env vars fake_cfg.apply_to_os_environ.assert_called_once() @@ -970,12 +972,11 @@ def fake_asyncio_run(awaitable, *, debug=None): def test_mcp_server_yaml_config_precedence_env_over_yaml( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - """MCP server passes _project_root() to resolve_operator_config (issue #238). + """MCP server passes source_root=None to resolve_operator_config (issue #238). - Precedence (env > YAML > default) is already tested by - test_embedding_model_precedence_cli_over_env_over_yaml_over_default. - This test verifies that main() delegates to resolve_operator_config - with the correct source root, which handles precedence internally. + Precedence (env > YAML > default) is handled inside resolve_operator_config + via walk-up discovery. This test verifies that main() delegates correctly + and apply_to_os_environ is called with the resolved config. """ import server as server_mod from unittest.mock import MagicMock @@ -994,5 +995,4 @@ def fake_asyncio_run(awaitable, *, debug=None): server_mod.main() - server_mod.resolve_operator_config.assert_called_once() - assert server_mod.resolve_operator_config.call_args.kwargs["source_root"] == server_mod._project_root() + server_mod.resolve_operator_config.assert_called_once_with(source_root=None)