diff --git a/java_codebase_rag/pipeline.py b/java_codebase_rag/pipeline.py index 83262c69..bc2d172e 100644 --- a/java_codebase_rag/pipeline.py +++ b/java_codebase_rag/pipeline.py @@ -128,6 +128,10 @@ def run_cocoindex_update( stdout="", stderr=f"java_index_flow_lancedb.py not found under {bd}", ) + # Set CocoIndex concurrency limits to prevent "too many open files" error + # See: https://github.com/HumanBean17/java-codebase-rag/issues/293 + env = env.copy() + env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256") cmd: list[str] = [str(exe), "update", COCOINDEX_TARGET] if full_reprocess: cmd.extend(["--full-reprocess", "-f"]) diff --git a/server.py b/server.py index 65f737c8..9dc44adb 100644 --- a/server.py +++ b/server.py @@ -162,6 +162,9 @@ def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]: idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip() if idx: sub_env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(Path(idx).expanduser().resolve()) + # Set CocoIndex concurrency limits to prevent "too many open files" error + # See: https://github.com/HumanBean17/java-codebase-rag/issues/293 + sub_env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256") return sub_env