diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 12d7bfc..539ef01 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,8 +7,20 @@ on: jobs: test: - name: test - runs-on: ubuntu-latest + name: test (${{ matrix.os }}) + strategy: + fail-fast: false + matrix: + # Ubuntu is the required gate (blocks merges). macOS + Windows run on + # every code-change PR to surface platform regressions, but stay + # non-blocking (continue-on-error below) until each has been green + # across several merges — the fast test suite exercises the native + # kuzu/ladybug graph layer, which had never been run on these OSes + # before this matrix. Promote an OS to a hard gate by dropping it from + # the continue-on-error expression once it is reliably green. + os: [ubuntu-latest, macos-latest, windows-latest] + continue-on-error: ${{ matrix.os != 'ubuntu-latest' }} + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 with: diff --git a/README.md b/README.md index 00cb46c..216d0db 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ The rest of this README is the install, walkthrough, and tool cheat sheet for pu pip install java-codebase-rag ``` -Python **3.11+** required. After install, `java-codebase-rag --help` should print the CLI groups. +Python **3.11+** required, on **Linux, macOS, and Windows** — every native dependency (LanceDB, LadybugDB/kuzu, CocoIndex) ships a wheel for each platform. After install, `java-codebase-rag --help` should print the CLI groups. The package includes the CocoIndex lifecycle dependency used by `init`, `increment`, `reprocess`, and `erase`. ### Interactive setup (recommended) @@ -86,21 +86,23 @@ cd java-codebase-rag # 2. Build the index (Lance vectors + LadybugDB graph). First run downloads the # embedding model (~90 MB) and takes ~30-60s on the fixture. -java-codebase-rag init --source-root tests/bank-chat-system --index-dir /tmp/bank-chat-index +java-codebase-rag init --source-root tests/bank-chat-system --index-dir tmp/bank-chat-index # 3. Inspect what landed (resolved config, edge counts, ontology version) -java-codebase-rag meta --source-root tests/bank-chat-system --index-dir /tmp/bank-chat-index +java-codebase-rag meta --source-root tests/bank-chat-system --index-dir tmp/bank-chat-index ``` +> **Windows users:** these smoke-test snippets use POSIX shell syntax (`VAR=value` prefix, `\` line continuations). Run them under **Git Bash** or **WSL**, or skip straight to `java-codebase-rag install`, which wires up MCP registration and configuration without a shell. + Smoke-test the index with two checks (`search_lancedb` ships with the package): ```bash # Vector search — proves the LanceDB side works -JAVA_CODEBASE_RAG_INDEX_DIR=/tmp/bank-chat-index \ +JAVA_CODEBASE_RAG_INDEX_DIR=tmp/bank-chat-index \ python -m search_lancedb "chat ingress controller" --table java --limit 3 # Vector + graph expansion — proves LadybugDB is wired in -JAVA_CODEBASE_RAG_INDEX_DIR=/tmp/bank-chat-index \ +JAVA_CODEBASE_RAG_INDEX_DIR=tmp/bank-chat-index \ python -m search_lancedb "chat ingress controller" --table java --limit 3 \ --graph-expand --expand-depth 2 ``` diff --git a/build_ast_graph.py b/build_ast_graph.py index c4b8bb2..4576f65 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -3816,6 +3816,7 @@ def incremental_rebuild( if verbose: _verbose_stderr_line(f"[increment] ontology version {version} < 17; falling back to full rebuild") conn.close() + db.close() del conn, db return _fallback_to_full(source_root, ladybug_path, verbose, t_start) except Exception as e: @@ -3823,6 +3824,7 @@ def incremental_rebuild( _verbose_stderr_line(f"[increment] failed to read ontology version: {e}; falling back to full rebuild") try: conn.close() + db.close() except Exception: pass del conn, db @@ -3841,6 +3843,7 @@ def incremental_rebuild( if verbose: _verbose_stderr_line("[increment] no changes detected; no-op") conn.close() + db.close() return IncrementalResult( mode="incremental", files_changed=0, @@ -3859,6 +3862,7 @@ def incremental_rebuild( if verbose: _verbose_stderr_line("[increment] crash marker exists; falling back to full rebuild") conn.close() + db.close() crash_marker_path.unlink(missing_ok=True) return _fallback_to_full(source_root, ladybug_path, verbose, t_start) @@ -3893,6 +3897,7 @@ def incremental_rebuild( if verbose: _verbose_stderr_line(f"[increment] dependent expansion cap ({expansion_cap}) exceeded ({len(scope_files)} files); falling back to full rebuild") conn.close() + db.close() crash_marker_path.unlink(missing_ok=True) return _fallback_to_full(source_root, ladybug_path, verbose, t_start) @@ -3977,6 +3982,7 @@ def incremental_rebuild( crash_marker_path.unlink(missing_ok=True) conn.close() + db.close() elapsed = time.time() - t_start if verbose: @@ -3996,6 +4002,7 @@ def incremental_rebuild( if verbose: _verbose_stderr_line(f"[increment] error during incremental rebuild: {e}; falling back to full rebuild") conn.close() + db.close() crash_marker_path.unlink(missing_ok=True) return _fallback_to_full(source_root, ladybug_path, verbose, t_start) @@ -4200,6 +4207,7 @@ def write_ladybug( _verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s") _write_meta(conn, tables, source_root) conn.close() + db.close() _init_hash_tracker(source_root, db_path) diff --git a/java_codebase_rag/_fdlimit.py b/java_codebase_rag/_fdlimit.py index b5ee655..192e7c1 100644 --- a/java_codebase_rag/_fdlimit.py +++ b/java_codebase_rag/_fdlimit.py @@ -22,7 +22,15 @@ from __future__ import annotations -import resource +try: + # Unix-only: the ``resource`` module does not exist on Windows. Importing it + # unconditionally at module scope crashes on Windows, which (because + # ``cli.py`` and ``server.py`` both import ``raise_fd_limit``) made the + # entire CLI and MCP server fail to start there. Guard the import so the + # module loads everywhere; the function below no-ops when it's absent. + import resource +except ImportError: # pragma: no cover - Windows lacks the resource module + resource = None # type: ignore[assignment] # Safe ceiling well above LanceDB's appetite, comfortably below macOS libc # quirks. The hard limit caps it further if lower (locked-down servers). @@ -35,7 +43,7 @@ def raise_fd_limit(cap: int = _DEFAULT_CAP) -> None: Best-effort and silent: never raises. No-op where ``RLIMIT_NOFILE`` is unsupported (Windows) or where the soft limit already meets ``min(hard, cap)``. """ - if not hasattr(resource, "RLIMIT_NOFILE"): + if resource is None or not hasattr(resource, "RLIMIT_NOFILE"): return soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) target = min(hard, cap) diff --git a/java_codebase_rag/cli.py b/java_codebase_rag/cli.py index b9d9d0d..43f3a65 100644 --- a/java_codebase_rag/cli.py +++ b/java_codebase_rag/cli.py @@ -632,7 +632,17 @@ def _cmd_erase(args: argparse.Namespace) -> int: file=sys.stderr, ) return 2 - ans = input("Delete these paths? [y/N]: ").strip().lower() + try: + ans = input("Delete these paths? [y/N]: ").strip().lower() + except EOFError: + # Non-interactive stdin that nonetheless reported isatty() == True + # (the Windows NUL device is a character device, so isatty() lies). + # Treat it as a refusal instead of crashing with an EOF traceback. + print( + "java-codebase-rag erase: non-interactive stdin; pass --yes to confirm.", + file=sys.stderr, + ) + return 2 if ans not in ("y", "yes"): print("Aborted.", file=sys.stderr) return 2 diff --git a/java_codebase_rag/installer.py b/java_codebase_rag/installer.py index 9bef0ba..be682d9 100644 --- a/java_codebase_rag/installer.py +++ b/java_codebase_rag/installer.py @@ -536,7 +536,7 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) - tmp_name = tmp.name # Atomic rename - os.rename(tmp_name, config_path) + os.replace(tmp_name, config_path) return True except (IOError, OSError) as e: if tmp_name: @@ -1258,7 +1258,7 @@ def _refresh_mcp_config( tmp_name = tmp.name # Atomic rename - os.rename(tmp_name, config_path) + os.replace(tmp_name, config_path) print(f"Updated MCP config at {config_path}") return ArtifactResult(path=config_path, success=True, error=None) diff --git a/pyproject.toml b/pyproject.toml index 5ed84f6..0516754 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,9 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Topic :: Software Development :: Libraries", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", ] dependencies = [ "cocoindex[lancedb]>=1.0.7,<2", diff --git a/tests/test_config.py b/tests/test_config.py index 2784a15..f4b3762 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -151,6 +151,7 @@ def test_discover_project_root_ignores_stray_index_dir_at_home(self, tmp_path, m (stray_idx / "code_graph.lbug").write_bytes(b"\x00" * 16) monkeypatch.setenv("HOME", str(fake_home)) + monkeypatch.setenv("USERPROFILE", str(fake_home)) # Windows: Path.home() uses %USERPROFILE% result = discover_project_root(project_dir) assert result is None, "stray ~/.java-codebase-rag/ must not anchor at $HOME (#357)" @@ -212,9 +213,11 @@ def test_source_root_from_yaml_absolute(self, tmp_path, monkeypatch): # Change cwd to tmp_path so walk-up finds this config monkeypatch.chdir(tmp_path) - # source_root=None triggers walk-up discovery + YAML parsing + # source_root=None triggers walk-up discovery + YAML parsing. + # .resolve() on both sides normalises drive-relative anchoring: + # Windows sees "/some/absolute/path" as C:/some/absolute/path. result = resolve_operator_config(source_root=None) - assert result.source_root == Path(absolute_path) + assert Path(result.source_root).resolve() == Path(absolute_path).resolve() class TestIndexDirRelativeToConfigDir: @@ -297,8 +300,9 @@ def test_source_root_precedence_yaml_over_discovery(self, tmp_path, monkeypatch) # source_root=None triggers walk-up discovery result = resolve_operator_config(source_root=None) - # YAML should override the discovered config dir - assert result.source_root == Path("/yaml/root") + # YAML should override the discovered config dir. .resolve() normalises + # drive-relative anchoring on Windows ("/yaml/root" -> C:/yaml/root). + assert Path(result.source_root).resolve() == Path("/yaml/root").resolve() def test_source_root_precedence_env_over_yaml(self, tmp_path, monkeypatch): """env var wins over YAML source_root.""" @@ -449,6 +453,7 @@ def test_tilde_expansion_preserved(self, monkeypatch): from java_codebase_rag.config import maybe_expand_embedding_model_path monkeypatch.setenv("HOME", "/home/user") + monkeypatch.setenv("USERPROFILE", "/home/user") # Windows expanduser uses %USERPROFILE% assert maybe_expand_embedding_model_path("~/models/minilm") == "/home/user/models/minilm" def test_yaml_base_resolves_relative(self, tmp_path): diff --git a/tests/test_cross_service_resolution_flag.py b/tests/test_cross_service_resolution_flag.py index bdc161c..98e8bc2 100644 --- a/tests/test_cross_service_resolution_flag.py +++ b/tests/test_cross_service_resolution_flag.py @@ -218,6 +218,7 @@ def test_meta_resolution_null_for_old_graphs(tmp_path: Path) -> None: }, ) conn.close() + db.close() LadybugGraph._instance = None LadybugGraph._instance_path = None assert LadybugGraph(str(db_path)).meta()["cross_service_resolution"] is None diff --git a/tests/test_fd_limit.py b/tests/test_fd_limit.py index b325959..be64db7 100644 --- a/tests/test_fd_limit.py +++ b/tests/test_fd_limit.py @@ -11,8 +11,20 @@ from __future__ import annotations +import sys + +import pytest + from java_codebase_rag import _fdlimit +# These tests exercise the Unix-only ``resource.RLIMIT_NOFILE`` raising path. +# ``raise_fd_limit`` no-ops on Windows (where the ``resource`` module is absent), +# so there is nothing to assert there. +pytestmark = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="resource.RLIMIT_NOFILE is Unix-only; raise_fd_limit no-ops on Windows", +) + def test_raises_soft_limit_up_to_cap(monkeypatch): """When soft < min(hard, cap), raise soft to the target and keep hard.""" diff --git a/tests/test_feign_not_exposer.py b/tests/test_feign_not_exposer.py index 9a7bcf9..183c39f 100644 --- a/tests/test_feign_not_exposer.py +++ b/tests/test_feign_not_exposer.py @@ -144,6 +144,7 @@ def test_meta_returns_none_for_old_graphs(tmp_path: Path) -> None: }, ) conn.close() + db.close() LadybugGraph._instance = None LadybugGraph._instance_path = None assert LadybugGraph(str(db_path)).meta()["pass4_exposes_suppressed_feign"] is None diff --git a/tests/test_incremental_graph.py b/tests/test_incremental_graph.py index 9c6be83..c6e9560 100644 --- a/tests/test_incremental_graph.py +++ b/tests/test_incremental_graph.py @@ -383,6 +383,7 @@ def test_incremental_phantom_nodes_preserved(self, tmp_path: Path) -> None: phantom_count_before = phantom_result.get_next()[0] conn.close() + db.close() # Initialize hash tracker tracker = FileHashTracker(index_dir) @@ -598,6 +599,7 @@ def test_incremental_no_changes_is_noop(self, tmp_path: Path) -> None: if count_before_result.has_next(): count_before = count_before_result.get_next()[0] conn.close() + db.close() # Initialize hash tracker tracker = FileHashTracker(index_dir) @@ -940,6 +942,7 @@ def test_incremental_preserves_incoming_edges_to_dependent(self, tmp_path: Path) cb_count = cb_result.get_next()[0] assert cb_count > 0, "seeded graph must contain a C->B CALLS edge" conn.close() + db.close() # Initialize hash tracker for all files. tracker = FileHashTracker(index_dir) diff --git a/tests/test_java_codebase_rag_cli.py b/tests/test_java_codebase_rag_cli.py index 8c9e1a0..6064d28 100644 --- a/tests/test_java_codebase_rag_cli.py +++ b/tests/test_java_codebase_rag_cli.py @@ -205,12 +205,13 @@ def test_embedding_model_yaml_expands_tilde( ) -> None: monkeypatch.delenv("SBERT_MODEL", raising=False) monkeypatch.setenv("HOME", str(tmp_path / "home")) + monkeypatch.setenv("USERPROFILE", str(tmp_path / "home")) # Windows expanduser uses %USERPROFILE% (tmp_path / ".java-codebase-rag.yml").write_text( "embedding:\n model: ~/models/minilm\n", encoding="utf-8", ) cfg = resolve_operator_config(source_root=tmp_path) - assert cfg.embedding_model == str(tmp_path / "home" / "models" / "minilm") + assert Path(cfg.embedding_model) == tmp_path / "home" / "models" / "minilm" assert cfg.embedding_model_source == "yaml" @@ -247,11 +248,12 @@ def test_embedding_model_cli_quoted_tilde_expanded( """UC10b: quoted CLI argument bypasses shell expansion; helper canonicalises.""" monkeypatch.delenv("SBERT_MODEL", raising=False) monkeypatch.setenv("HOME", str(tmp_path / "home")) + monkeypatch.setenv("USERPROFILE", str(tmp_path / "home")) # Windows expanduser uses %USERPROFILE% cfg = resolve_operator_config( source_root=tmp_path, cli_embedding_model="~/cli/x", # quoted in shell → arrives literal ) - assert cfg.embedding_model == str(tmp_path / "home" / "cli" / "x") + assert Path(cfg.embedding_model) == tmp_path / "home" / "cli" / "x" assert cfg.embedding_model_source == "cli"