Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@ All notable changes to VecGrep are documented here.

---

## [Unreleased]

### Added

- **`min_score` parameter on `search_code`** — filters out results below a
cosine similarity threshold before returning them. Default is `0.35`, which
cuts noise without affecting relevant results. Set to `0.0` to disable.
Values are clamped to `[0.0, 1.0]`.
- **`Min score` field in `get_index_status`** — surfaces the default threshold
so users know filtering is active and how to override it.

---

## [1.7.0] — 2026-03-04

### Added
Expand Down
11 changes: 9 additions & 2 deletions src/vecgrep/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ def index_codebase(


@mcp.tool()
def search_code(query: str, path: str, top_k: int = 8) -> str:
def search_code(query: str, path: str, top_k: int = 8, min_score: float = 0.35) -> str:
"""
Semantically search an indexed codebase for code relevant to a query.

Expand All @@ -741,6 +741,9 @@ def search_code(query: str, path: str, top_k: int = 8) -> str:
E.g. "how does authentication work", "database connection setup"
path: Absolute path to the codebase root directory.
top_k: Number of results to return (default 8, max 20).
min_score: Minimum cosine similarity score to include a result (default 0.35).
Results below this threshold are filtered out as noise. Set to 0.0
to disable filtering.

Returns:
Formatted list of matching code chunks with file:line references and
Expand All @@ -753,6 +756,7 @@ def search_code(query: str, path: str, top_k: int = 8) -> str:
return "Error: query must not be empty"

top_k = max(1, min(top_k, 20))
min_score = max(0.0, min(min_score, 1.0))
root = Path(path).resolve()

# Check if index has data
Expand Down Expand Up @@ -782,6 +786,8 @@ def search_code(query: str, path: str, top_k: int = 8) -> str:
query_vec = emb_provider.embed([query])[0]
results = store.search(query_vec, top_k=top_k)

results = [r for r in results if r["score"] >= min_score]

if not results:
return "No results found. Try re-indexing with index_codebase()."

Expand Down Expand Up @@ -832,7 +838,8 @@ def get_index_status(path: str) -> str:
f" Provider: {s['provider']}\n"
f" Model: {s['model']}\n"
f" Dimensions: {s['dims']}\n"
f" Compute device: {device_label}"
f" Compute device: {device_label}\n"
f" Min score: 0.35 (default, override via search_code min_score param)"
)
except Exception as e:
return f"Error: {e}"
Expand Down
42 changes: 42 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,47 @@ def test_auto_indexes_on_first_search(self, tmp_path):
result = search_code("compute function", str(tmp_path), top_k=5)
assert "Error" not in result

def test_min_score_filters_low_scoring_results(self, tmp_path):
_write_py(tmp_path, "a.py", "def authenticate_user(username, password):\n pass\n")
_do_index(str(tmp_path))
# Inject a result below threshold and one above
low = {"file_path": str(tmp_path / "a.py"), "start_line": 1, "end_line": 2,
"content": "def authenticate_user(username, password):\n pass", "score": 0.20}
high = {"file_path": str(tmp_path / "a.py"), "start_line": 1, "end_line": 2,
"content": "def authenticate_user(username, password):\n pass", "score": 0.80}
with patch("vecgrep.server.VectorStore.search", return_value=[high, low]):
result = search_code("auth", str(tmp_path), top_k=5, min_score=0.35)
assert "0.80" in result
assert "0.20" not in result

def test_min_score_zero_disables_filtering(self, tmp_path):
_write_py(tmp_path, "a.py", "def foo(): pass\n")
_do_index(str(tmp_path))
low = {"file_path": str(tmp_path / "a.py"), "start_line": 1, "end_line": 1,
"content": "def foo(): pass", "score": 0.10}
with patch("vecgrep.server.VectorStore.search", return_value=[low]):
result = search_code("foo", str(tmp_path), top_k=5, min_score=0.0)
assert "0.10" in result

def test_min_score_clamped_above_one(self, tmp_path):
_write_py(tmp_path, "a.py", "def foo(): pass\n")
_do_index(str(tmp_path))
perfect = {"file_path": str(tmp_path / "a.py"), "start_line": 1, "end_line": 1,
"content": "def foo(): pass", "score": 1.0}
with patch("vecgrep.server.VectorStore.search", return_value=[perfect]):
result = search_code("foo", str(tmp_path), top_k=5, min_score=2.0)
# Clamped to 1.0 — only a perfect score passes
assert "1.00" in result

def test_min_score_all_filtered_returns_no_results(self, tmp_path):
_write_py(tmp_path, "a.py", "def foo(): pass\n")
_do_index(str(tmp_path))
low = {"file_path": str(tmp_path / "a.py"), "start_line": 1, "end_line": 1,
"content": "def foo(): pass", "score": 0.10}
with patch("vecgrep.server.VectorStore.search", return_value=[low]):
result = search_code("foo", str(tmp_path), top_k=5, min_score=0.35)
assert "No results found" in result


# ---------------------------------------------------------------------------
# Concurrency
Expand Down Expand Up @@ -179,6 +220,7 @@ def test_returns_expected_fields(self, tmp_path):
assert "Total chunks" in result
assert "Last indexed" in result
assert "Index size" in result
assert "Min score" in result

def test_nonexistent_path_shows_zero_files(self, tmp_path):
# No indexing — status should still return without raising
Expand Down
Loading