From 54a33d4011e4cba26d28405996c10959266505a4 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Mon, 8 Jun 2026 22:31:25 +0300 Subject: [PATCH] fix: set COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS=256 to prevent "too many open files" error Fixes #293. Users experienced "Too many open files (os error 24)" during indexing because CocoIndex's default concurrency (1024 inflight rows) opens more file handles than OS limits allow (typically 256-1024). Set COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS=256 in both code paths that invoke cocoindex (pipeline.run_cocoindex_update and server._cocoindex_subprocess_env). Co-Authored-By: Claude Opus 4.7 --- java_codebase_rag/pipeline.py | 4 ++++ server.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/java_codebase_rag/pipeline.py b/java_codebase_rag/pipeline.py index 83262c69..bc2d172e 100644 --- a/java_codebase_rag/pipeline.py +++ b/java_codebase_rag/pipeline.py @@ -128,6 +128,10 @@ def run_cocoindex_update( stdout="", stderr=f"java_index_flow_lancedb.py not found under {bd}", ) + # Set CocoIndex concurrency limits to prevent "too many open files" error + # See: https://github.com/HumanBean17/java-codebase-rag/issues/293 + env = env.copy() + env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256") cmd: list[str] = [str(exe), "update", COCOINDEX_TARGET] if full_reprocess: cmd.extend(["--full-reprocess", "-f"]) diff --git a/server.py b/server.py index 65f737c8..9dc44adb 100644 --- a/server.py +++ b/server.py @@ -162,6 +162,9 @@ def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]: idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip() if idx: sub_env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(Path(idx).expanduser().resolve()) + # Set CocoIndex concurrency limits to prevent "too many open files" error + # See: https://github.com/HumanBean17/java-codebase-rag/issues/293 + sub_env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256") return sub_env