Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,20 @@ on:

jobs:
test:
name: test
runs-on: ubuntu-latest
name: test (${{ matrix.os }})
strategy:
fail-fast: false
matrix:
# Ubuntu is the required gate (blocks merges). macOS + Windows run on
# every code-change PR to surface platform regressions, but stay
# non-blocking (continue-on-error below) until each has been green
# across several merges — the fast test suite exercises the native
# kuzu/ladybug graph layer, which had never been run on these OSes
# before this matrix. Promote an OS to a hard gate by dropping it from
# the continue-on-error expression once it is reliably green.
os: [ubuntu-latest, macos-latest, windows-latest]
continue-on-error: ${{ matrix.os != 'ubuntu-latest' }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
Expand Down
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ The rest of this README is the install, walkthrough, and tool cheat sheet for pu
pip install java-codebase-rag
```

Python **3.11+** required. After install, `java-codebase-rag --help` should print the CLI groups.
Python **3.11+** required, on **Linux, macOS, and Windows** — every native dependency (LanceDB, LadybugDB/kuzu, CocoIndex) ships a wheel for each platform. After install, `java-codebase-rag --help` should print the CLI groups.
The package includes the CocoIndex lifecycle dependency used by `init`, `increment`, `reprocess`, and `erase`.

### Interactive setup (recommended)
Expand Down Expand Up @@ -86,21 +86,23 @@ cd java-codebase-rag

# 2. Build the index (Lance vectors + LadybugDB graph). First run downloads the
# embedding model (~90 MB) and takes ~30-60s on the fixture.
java-codebase-rag init --source-root tests/bank-chat-system --index-dir /tmp/bank-chat-index
java-codebase-rag init --source-root tests/bank-chat-system --index-dir tmp/bank-chat-index

# 3. Inspect what landed (resolved config, edge counts, ontology version)
java-codebase-rag meta --source-root tests/bank-chat-system --index-dir /tmp/bank-chat-index
java-codebase-rag meta --source-root tests/bank-chat-system --index-dir tmp/bank-chat-index
```

> **Windows users:** these smoke-test snippets use POSIX shell syntax (`VAR=value` prefix, `\` line continuations). Run them under **Git Bash** or **WSL**, or skip straight to `java-codebase-rag install`, which wires up MCP registration and configuration without a shell.

Smoke-test the index with two checks (`search_lancedb` ships with the package):

```bash
# Vector search — proves the LanceDB side works
JAVA_CODEBASE_RAG_INDEX_DIR=/tmp/bank-chat-index \
JAVA_CODEBASE_RAG_INDEX_DIR=tmp/bank-chat-index \
python -m search_lancedb "chat ingress controller" --table java --limit 3

# Vector + graph expansion — proves LadybugDB is wired in
JAVA_CODEBASE_RAG_INDEX_DIR=/tmp/bank-chat-index \
JAVA_CODEBASE_RAG_INDEX_DIR=tmp/bank-chat-index \
python -m search_lancedb "chat ingress controller" --table java --limit 3 \
--graph-expand --expand-depth 2
```
Expand Down
8 changes: 8 additions & 0 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3816,13 +3816,15 @@ def incremental_rebuild(
if verbose:
_verbose_stderr_line(f"[increment] ontology version {version} < 17; falling back to full rebuild")
conn.close()
db.close()
del conn, db
return _fallback_to_full(source_root, ladybug_path, verbose, t_start)
except Exception as e:
if verbose:
_verbose_stderr_line(f"[increment] failed to read ontology version: {e}; falling back to full rebuild")
try:
conn.close()
db.close()
except Exception:
pass
del conn, db
Expand All @@ -3841,6 +3843,7 @@ def incremental_rebuild(
if verbose:
_verbose_stderr_line("[increment] no changes detected; no-op")
conn.close()
db.close()
return IncrementalResult(
mode="incremental",
files_changed=0,
Expand All @@ -3859,6 +3862,7 @@ def incremental_rebuild(
if verbose:
_verbose_stderr_line("[increment] crash marker exists; falling back to full rebuild")
conn.close()
db.close()
crash_marker_path.unlink(missing_ok=True)
return _fallback_to_full(source_root, ladybug_path, verbose, t_start)

Expand Down Expand Up @@ -3893,6 +3897,7 @@ def incremental_rebuild(
if verbose:
_verbose_stderr_line(f"[increment] dependent expansion cap ({expansion_cap}) exceeded ({len(scope_files)} files); falling back to full rebuild")
conn.close()
db.close()
crash_marker_path.unlink(missing_ok=True)
return _fallback_to_full(source_root, ladybug_path, verbose, t_start)

Expand Down Expand Up @@ -3977,6 +3982,7 @@ def incremental_rebuild(
crash_marker_path.unlink(missing_ok=True)

conn.close()
db.close()

elapsed = time.time() - t_start
if verbose:
Expand All @@ -3996,6 +4002,7 @@ def incremental_rebuild(
if verbose:
_verbose_stderr_line(f"[increment] error during incremental rebuild: {e}; falling back to full rebuild")
conn.close()
db.close()
crash_marker_path.unlink(missing_ok=True)
return _fallback_to_full(source_root, ladybug_path, verbose, t_start)

Expand Down Expand Up @@ -4200,6 +4207,7 @@ def write_ladybug(
_verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s")
_write_meta(conn, tables, source_root)
conn.close()
db.close()
_init_hash_tracker(source_root, db_path)


Expand Down
12 changes: 10 additions & 2 deletions java_codebase_rag/_fdlimit.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@

from __future__ import annotations

import resource
try:
# Unix-only: the ``resource`` module does not exist on Windows. Importing it
# unconditionally at module scope crashes on Windows, which (because
# ``cli.py`` and ``server.py`` both import ``raise_fd_limit``) made the
# entire CLI and MCP server fail to start there. Guard the import so the
# module loads everywhere; the function below no-ops when it's absent.
import resource
except ImportError: # pragma: no cover - Windows lacks the resource module
resource = None # type: ignore[assignment]

# Safe ceiling well above LanceDB's appetite, comfortably below macOS libc
# quirks. The hard limit caps it further if lower (locked-down servers).
Expand All @@ -35,7 +43,7 @@ def raise_fd_limit(cap: int = _DEFAULT_CAP) -> None:
Best-effort and silent: never raises. No-op where ``RLIMIT_NOFILE`` is
unsupported (Windows) or where the soft limit already meets ``min(hard, cap)``.
"""
if not hasattr(resource, "RLIMIT_NOFILE"):
if resource is None or not hasattr(resource, "RLIMIT_NOFILE"):
return
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
target = min(hard, cap)
Expand Down
12 changes: 11 additions & 1 deletion java_codebase_rag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,17 @@ def _cmd_erase(args: argparse.Namespace) -> int:
file=sys.stderr,
)
return 2
ans = input("Delete these paths? [y/N]: ").strip().lower()
try:
ans = input("Delete these paths? [y/N]: ").strip().lower()
except EOFError:
# Non-interactive stdin that nonetheless reported isatty() == True
# (the Windows NUL device is a character device, so isatty() lies).
# Treat it as a refusal instead of crashing with an EOF traceback.
print(
"java-codebase-rag erase: non-interactive stdin; pass --yes to confirm.",
file=sys.stderr,
)
return 2
if ans not in ("y", "yes"):
print("Aborted.", file=sys.stderr)
return 2
Expand Down
4 changes: 2 additions & 2 deletions java_codebase_rag/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def merge_mcp_config(config_path: Path, host: HostConfig, *, mcp_command: str) -
tmp_name = tmp.name

# Atomic rename
os.rename(tmp_name, config_path)
os.replace(tmp_name, config_path)
return True
except (IOError, OSError) as e:
if tmp_name:
Expand Down Expand Up @@ -1258,7 +1258,7 @@ def _refresh_mcp_config(
tmp_name = tmp.name

# Atomic rename
os.rename(tmp_name, config_path)
os.replace(tmp_name, config_path)
print(f"Updated MCP config at {config_path}")
return ArtifactResult(path=config_path, success=True, error=None)

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Software Development :: Libraries",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
]
dependencies = [
"cocoindex[lancedb]>=1.0.7,<2",
Expand Down
13 changes: 9 additions & 4 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def test_discover_project_root_ignores_stray_index_dir_at_home(self, tmp_path, m
(stray_idx / "code_graph.lbug").write_bytes(b"\x00" * 16)

monkeypatch.setenv("HOME", str(fake_home))
monkeypatch.setenv("USERPROFILE", str(fake_home)) # Windows: Path.home() uses %USERPROFILE%

result = discover_project_root(project_dir)
assert result is None, "stray ~/.java-codebase-rag/ must not anchor at $HOME (#357)"
Expand Down Expand Up @@ -212,9 +213,11 @@ def test_source_root_from_yaml_absolute(self, tmp_path, monkeypatch):
# Change cwd to tmp_path so walk-up finds this config
monkeypatch.chdir(tmp_path)

# source_root=None triggers walk-up discovery + YAML parsing
# source_root=None triggers walk-up discovery + YAML parsing.
# .resolve() on both sides normalises drive-relative anchoring:
# Windows sees "/some/absolute/path" as C:/some/absolute/path.
result = resolve_operator_config(source_root=None)
assert result.source_root == Path(absolute_path)
assert Path(result.source_root).resolve() == Path(absolute_path).resolve()


class TestIndexDirRelativeToConfigDir:
Expand Down Expand Up @@ -297,8 +300,9 @@ def test_source_root_precedence_yaml_over_discovery(self, tmp_path, monkeypatch)

# source_root=None triggers walk-up discovery
result = resolve_operator_config(source_root=None)
# YAML should override the discovered config dir
assert result.source_root == Path("/yaml/root")
# YAML should override the discovered config dir. .resolve() normalises
# drive-relative anchoring on Windows ("/yaml/root" -> C:/yaml/root).
assert Path(result.source_root).resolve() == Path("/yaml/root").resolve()

def test_source_root_precedence_env_over_yaml(self, tmp_path, monkeypatch):
"""env var wins over YAML source_root."""
Expand Down Expand Up @@ -449,6 +453,7 @@ def test_tilde_expansion_preserved(self, monkeypatch):
from java_codebase_rag.config import maybe_expand_embedding_model_path

monkeypatch.setenv("HOME", "/home/user")
monkeypatch.setenv("USERPROFILE", "/home/user") # Windows expanduser uses %USERPROFILE%
assert maybe_expand_embedding_model_path("~/models/minilm") == "/home/user/models/minilm"

def test_yaml_base_resolves_relative(self, tmp_path):
Expand Down
1 change: 1 addition & 0 deletions tests/test_cross_service_resolution_flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ def test_meta_resolution_null_for_old_graphs(tmp_path: Path) -> None:
},
)
conn.close()
db.close()
LadybugGraph._instance = None
LadybugGraph._instance_path = None
assert LadybugGraph(str(db_path)).meta()["cross_service_resolution"] is None
Expand Down
12 changes: 12 additions & 0 deletions tests/test_fd_limit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,20 @@

from __future__ import annotations

import sys

import pytest

from java_codebase_rag import _fdlimit

# These tests exercise the Unix-only ``resource.RLIMIT_NOFILE`` raising path.
# ``raise_fd_limit`` no-ops on Windows (where the ``resource`` module is absent),
# so there is nothing to assert there.
pytestmark = pytest.mark.skipif(
sys.platform.startswith("win"),
reason="resource.RLIMIT_NOFILE is Unix-only; raise_fd_limit no-ops on Windows",
)


def test_raises_soft_limit_up_to_cap(monkeypatch):
"""When soft < min(hard, cap), raise soft to the target and keep hard."""
Expand Down
1 change: 1 addition & 0 deletions tests/test_feign_not_exposer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def test_meta_returns_none_for_old_graphs(tmp_path: Path) -> None:
},
)
conn.close()
db.close()
LadybugGraph._instance = None
LadybugGraph._instance_path = None
assert LadybugGraph(str(db_path)).meta()["pass4_exposes_suppressed_feign"] is None
Expand Down
3 changes: 3 additions & 0 deletions tests/test_incremental_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ def test_incremental_phantom_nodes_preserved(self, tmp_path: Path) -> None:
phantom_count_before = phantom_result.get_next()[0]

conn.close()
db.close()

# Initialize hash tracker
tracker = FileHashTracker(index_dir)
Expand Down Expand Up @@ -598,6 +599,7 @@ def test_incremental_no_changes_is_noop(self, tmp_path: Path) -> None:
if count_before_result.has_next():
count_before = count_before_result.get_next()[0]
conn.close()
db.close()

# Initialize hash tracker
tracker = FileHashTracker(index_dir)
Expand Down Expand Up @@ -940,6 +942,7 @@ def test_incremental_preserves_incoming_edges_to_dependent(self, tmp_path: Path)
cb_count = cb_result.get_next()[0]
assert cb_count > 0, "seeded graph must contain a C->B CALLS edge"
conn.close()
db.close()

# Initialize hash tracker for all files.
tracker = FileHashTracker(index_dir)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_java_codebase_rag_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,13 @@ def test_embedding_model_yaml_expands_tilde(
) -> None:
monkeypatch.delenv("SBERT_MODEL", raising=False)
monkeypatch.setenv("HOME", str(tmp_path / "home"))
monkeypatch.setenv("USERPROFILE", str(tmp_path / "home")) # Windows expanduser uses %USERPROFILE%
(tmp_path / ".java-codebase-rag.yml").write_text(
"embedding:\n model: ~/models/minilm\n",
encoding="utf-8",
)
cfg = resolve_operator_config(source_root=tmp_path)
assert cfg.embedding_model == str(tmp_path / "home" / "models" / "minilm")
assert Path(cfg.embedding_model) == tmp_path / "home" / "models" / "minilm"
assert cfg.embedding_model_source == "yaml"


Expand Down Expand Up @@ -247,11 +248,12 @@ def test_embedding_model_cli_quoted_tilde_expanded(
"""UC10b: quoted CLI argument bypasses shell expansion; helper canonicalises."""
monkeypatch.delenv("SBERT_MODEL", raising=False)
monkeypatch.setenv("HOME", str(tmp_path / "home"))
monkeypatch.setenv("USERPROFILE", str(tmp_path / "home")) # Windows expanduser uses %USERPROFILE%
cfg = resolve_operator_config(
source_root=tmp_path,
cli_embedding_model="~/cli/x", # quoted in shell → arrives literal
)
assert cfg.embedding_model == str(tmp_path / "home" / "cli" / "x")
assert Path(cfg.embedding_model) == tmp_path / "home" / "cli" / "x"
assert cfg.embedding_model_source == "cli"


Expand Down
Loading