Skip to content

Commit d6a5f89

Browse files
committed
Add incremental reindex + ADR hints in index/schema/adr responses
Incremental pipeline (pipeline_incremental.c): - Disk-based delta re-indexing: classifies files by mtime+size vs stored hashes, deletes changed nodes (edges cascade), re-parses only changed files, merges into existing DB via cbm_gbuf_merge_into_store - pipeline.c auto-routes: existing DB with hashes -> incremental, else full - Full pipeline persists file hashes after dump for next incremental run - No-op when nothing changed (watcher polls every 5-60s, 99% skip) - 4 tests: noop, changed file, deleted file, new file added ADR hints (ported from Go): - index_repository: adr_present + adr_hint when no ADR exists - get_graph_schema: adr_present + adr_hint per project - manage_adr GET: adr_hint with creation instructions when no ADR
1 parent 54930c6 commit d6a5f89

10 files changed

Lines changed: 794 additions & 162 deletions

File tree

Makefile.cbm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ PIPELINE_SRCS = \
159159
src/pipeline/fqn.c \
160160
src/pipeline/registry.c \
161161
src/pipeline/pipeline.c \
162+
src/pipeline/pipeline_incremental.c \
162163
src/pipeline/worker_pool.c \
163164
src/pipeline/pass_parallel.c \
164165
src/pipeline/pass_definitions.c \

src/graph_buffer/graph_buffer.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,3 +1082,62 @@ int cbm_gbuf_flush_to_store(cbm_gbuf_t *gb, cbm_store_t *store) {
10821082
free(temp_to_real);
10831083
return 0;
10841084
}
1085+
1086+
int cbm_gbuf_merge_into_store(cbm_gbuf_t *gb, cbm_store_t *store) {
1087+
if (!gb || !store) {
1088+
return -1;
1089+
}
1090+
1091+
/* Begin bulk mode — no project wipe */
1092+
cbm_store_begin(store);
1093+
1094+
/* Build temp_id → real_id map */
1095+
int64_t max_temp_id = gb->next_id;
1096+
int64_t *temp_to_real = calloc(max_temp_id, sizeof(int64_t));
1097+
1098+
for (int i = 0; i < gb->nodes.count; i++) {
1099+
cbm_gbuf_node_t *n = gb->nodes.items[i];
1100+
1101+
if (!cbm_ht_get(gb->node_by_qn, n->qualified_name)) {
1102+
continue;
1103+
}
1104+
1105+
cbm_node_t sn = {
1106+
.project = gb->project,
1107+
.label = n->label,
1108+
.name = n->name,
1109+
.qualified_name = n->qualified_name,
1110+
.file_path = n->file_path,
1111+
.start_line = n->start_line,
1112+
.end_line = n->end_line,
1113+
.properties_json = n->properties_json,
1114+
};
1115+
int64_t real_id = cbm_store_upsert_node(store, &sn);
1116+
if (real_id > 0 && n->id < max_temp_id) {
1117+
temp_to_real[n->id] = real_id;
1118+
}
1119+
}
1120+
1121+
for (int i = 0; i < gb->edges.count; i++) {
1122+
cbm_gbuf_edge_t *e = gb->edges.items[i];
1123+
int64_t real_src = (e->source_id < max_temp_id) ? temp_to_real[e->source_id] : 0;
1124+
int64_t real_tgt = (e->target_id < max_temp_id) ? temp_to_real[e->target_id] : 0;
1125+
if (real_src == 0 || real_tgt == 0) {
1126+
continue;
1127+
}
1128+
1129+
cbm_edge_t se = {
1130+
.project = gb->project,
1131+
.source_id = real_src,
1132+
.target_id = real_tgt,
1133+
.type = e->type,
1134+
.properties_json = e->properties_json,
1135+
};
1136+
cbm_store_insert_edge(store, &se);
1137+
}
1138+
1139+
cbm_store_commit(store);
1140+
1141+
free(temp_to_real);
1142+
return 0;
1143+
}

src/graph_buffer/graph_buffer.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,12 @@ int cbm_gbuf_delete_edges_by_type(cbm_gbuf_t *gb, const char *type);
140140
int cbm_gbuf_dump_to_sqlite(cbm_gbuf_t *gb, const char *path);
141141

142142
/* Flush the buffer to an existing store via the store API.
143-
* Used for incremental indexing. Returns 0 on success. */
143+
* Deletes existing project data first. Returns 0 on success. */
144144
int cbm_gbuf_flush_to_store(cbm_gbuf_t *gb, cbm_store_t *store);
145145

146+
/* Merge the buffer into an existing store WITHOUT deleting existing data.
147+
* Upserts nodes, inserts edges. Used for incremental indexing.
148+
* Returns 0 on success. */
149+
int cbm_gbuf_merge_into_store(cbm_gbuf_t *gb, cbm_store_t *store);
150+
146151
#endif /* CBM_GRAPH_BUFFER_H */

src/mcp/mcp.c

Lines changed: 70 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,13 @@ static const tool_def_t TOOLS[] = {
258258
"name\"]}"},
259259

260260
{"get_code_snippet",
261-
"Get source code for a specific function, class, or symbol by qualified name. Use INSTEAD OF "
262-
"reading entire files when you need one function's implementation.",
263-
"{\"type\":\"object\",\"properties\":{\"qualified_name\":{\"type\":\"string\"},\"project\":{"
264-
"\"type\":\"string\"},\"auto_resolve\":{\"type\":\"boolean\",\"default\":false},\"include_"
265-
"neighbors\":{\"type\":\"boolean\",\"default\":false}},\"required\":[\"qualified_name\"]}"},
261+
"Read source code for a function/class/symbol. IMPORTANT: First call search_graph to find the "
262+
"exact qualified_name, then pass it here. This is a read tool, not a search tool. Accepts "
263+
"full qualified_name (exact match) or short function name (returns suggestions if ambiguous).",
264+
"{\"type\":\"object\",\"properties\":{\"qualified_name\":{\"type\":\"string\",\"description\":"
265+
"\"Full qualified_name from search_graph, or short function name\"},\"project\":{"
266+
"\"type\":\"string\"},\"include_neighbors\":{"
267+
"\"type\":\"boolean\",\"default\":false}},\"required\":[\"qualified_name\"]}"},
266268

267269
{"get_graph_schema", "Get the schema of the knowledge graph (node labels, edge types)",
268270
"{\"type\":\"object\",\"properties\":{\"project\":{\"type\":\"string\"}}}"},
@@ -740,6 +742,24 @@ static char *handle_get_graph_schema(cbm_mcp_server_t *srv, const char *args) {
740742
}
741743
yyjson_mut_obj_add_val(doc, root, "edge_types", types);
742744

745+
/* Check ADR presence */
746+
cbm_project_t proj_info = {0};
747+
if (cbm_store_get_project(store, project, &proj_info) == 0 && proj_info.root_path) {
748+
char adr_path[4096];
749+
snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", proj_info.root_path);
750+
struct stat adr_st;
751+
// NOLINTNEXTLINE(readability-implicit-bool-conversion)
752+
bool adr_exists = (stat(adr_path, &adr_st) == 0);
753+
yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists);
754+
if (!adr_exists) {
755+
yyjson_mut_obj_add_str(
756+
doc, root, "adr_hint",
757+
"No ADR found. Use manage_adr(mode='update') to persist architectural "
758+
"decisions across sessions. Run get_architecture(aspects=['all']) first.");
759+
}
760+
cbm_project_free_fields(&proj_info);
761+
}
762+
743763
char *json = yy_doc_to_str(doc);
744764
yyjson_mut_doc_free(doc);
745765
cbm_store_schema_free(&schema);
@@ -1255,6 +1275,21 @@ static char *handle_index_repository(cbm_mcp_server_t *srv, const char *args) {
12551275
int edges = cbm_store_count_edges(store, project_name);
12561276
yyjson_mut_obj_add_int(doc, root, "nodes", nodes);
12571277
yyjson_mut_obj_add_int(doc, root, "edges", edges);
1278+
1279+
/* Check ADR presence and suggest creation if missing */
1280+
char adr_path[4096];
1281+
snprintf(adr_path, sizeof(adr_path), "%s/.codebase-memory/adr.md", repo_path);
1282+
struct stat adr_st;
1283+
// NOLINTNEXTLINE(readability-implicit-bool-conversion)
1284+
bool adr_exists = (stat(adr_path, &adr_st) == 0);
1285+
yyjson_mut_obj_add_bool(doc, root, "adr_present", adr_exists);
1286+
if (!adr_exists) {
1287+
yyjson_mut_obj_add_str(
1288+
doc, root, "adr_hint",
1289+
"Project indexed. Consider creating an Architecture Decision Record: "
1290+
"explore the codebase with get_architecture(aspects=['all']), then use "
1291+
"manage_adr(mode='store') to persist architectural insights across sessions.");
1292+
}
12581293
}
12591294
}
12601295

@@ -1293,8 +1328,8 @@ static char *snippet_suggestions(const char *input, cbm_node_t *nodes, int count
12931328

12941329
char msg[512];
12951330
snprintf(msg, sizeof(msg),
1296-
"%d matches found for \"%s\" — use a qualified_name "
1297-
"from the suggestions to disambiguate",
1331+
"%d matches for \"%s\". Pick a qualified_name from suggestions below, "
1332+
"or use search_graph(name_pattern=\"...\") to narrow results.",
12981333
count, input);
12991334
yyjson_mut_obj_add_str(doc, root, "message", msg);
13001335

@@ -1472,36 +1507,40 @@ static char *build_snippet_response(cbm_mcp_server_t *srv, cbm_node_t *node,
14721507
static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) {
14731508
char *qn = cbm_mcp_get_string_arg(args, "qualified_name");
14741509
char *project = cbm_mcp_get_string_arg(args, "project");
1475-
cbm_store_t *store = resolve_store(srv, project);
1476-
bool auto_resolve = cbm_mcp_get_bool_arg(args, "auto_resolve");
14771510
bool include_neighbors = cbm_mcp_get_bool_arg(args, "include_neighbors");
14781511

14791512
if (!qn) {
14801513
free(project);
14811514
return cbm_mcp_text_result("qualified_name is required", true);
14821515
}
1516+
1517+
cbm_store_t *store = resolve_store(srv, project);
14831518
if (!store) {
14841519
free(qn);
14851520
free(project);
1486-
return cbm_mcp_text_result("{\"error\":\"no project loaded\"}", true);
1521+
return cbm_mcp_text_result("no project loaded — run index_repository first", true);
14871522
}
14881523

1524+
/* Default to current project (same as all other tools) */
1525+
const char *effective_project = project ? project : srv->current_project;
1526+
14891527
/* Tier 1: Exact QN match */
14901528
cbm_node_t node = {0};
1491-
int rc = cbm_store_find_node_by_qn(store, project, qn, &node);
1529+
int rc = cbm_store_find_node_by_qn(store, effective_project, qn, &node);
14921530
if (rc == CBM_STORE_OK) {
1493-
char *result =
1494-
build_snippet_response(srv, &node, NULL /*exact*/, include_neighbors, NULL, 0);
1531+
char *result = build_snippet_response(srv, &node, NULL, include_neighbors, NULL, 0);
14951532
free_node_contents(&node);
14961533
free(qn);
14971534
free(project);
14981535
return result;
14991536
}
15001537

1501-
/* Tier 2: QN suffix match */
1538+
/* Tier 2: Suffix match — handles partial QNs ("main.HandleRequest")
1539+
* and short names ("ProcessOrder") via LIKE '%.X'. */
15021540
cbm_node_t *suffix_nodes = NULL;
15031541
int suffix_count = 0;
1504-
cbm_store_find_nodes_by_qn_suffix(store, project, qn, &suffix_nodes, &suffix_count);
1542+
cbm_store_find_nodes_by_qn_suffix(store, effective_project, qn, &suffix_nodes, &suffix_count);
1543+
15051544
if (suffix_count == 1) {
15061545
copy_node(&suffix_nodes[0], &node);
15071546
cbm_store_free_nodes(suffix_nodes, suffix_count);
@@ -1512,154 +1551,23 @@ static char *handle_get_code_snippet(cbm_mcp_server_t *srv, const char *args) {
15121551
return result;
15131552
}
15141553

1515-
/* Tier 3: Short name match */
1516-
cbm_node_t *name_nodes = NULL;
1517-
int name_count = 0;
1518-
cbm_store_find_nodes_by_name(store, project, qn, &name_nodes, &name_count);
1519-
if (name_count == 1) {
1520-
copy_node(&name_nodes[0], &node);
1521-
cbm_store_free_nodes(name_nodes, name_count);
1554+
if (suffix_count > 1) {
1555+
char *result = snippet_suggestions(qn, suffix_nodes, suffix_count);
15221556
cbm_store_free_nodes(suffix_nodes, suffix_count);
1523-
char *result = build_snippet_response(srv, &node, "name", include_neighbors, NULL, 0);
1524-
free_node_contents(&node);
1525-
free(qn);
1526-
free(project);
1527-
return result;
1528-
}
1529-
1530-
/* Ambiguous: collect candidates from suffix + name tiers (dedup by id) */
1531-
int total_cand = suffix_count + name_count;
1532-
if (total_cand > 0) {
1533-
/* Dedup by node ID */
1534-
cbm_node_t *candidates = calloc((size_t)total_cand, sizeof(cbm_node_t));
1535-
int cand_count = 0;
1536-
1537-
for (int i = 0; i < suffix_count; i++) {
1538-
copy_node(&suffix_nodes[i], &candidates[cand_count++]);
1539-
}
1540-
for (int i = 0; i < name_count; i++) {
1541-
bool dup = false;
1542-
for (int j = 0; j < cand_count; j++) {
1543-
if (candidates[j].id == name_nodes[i].id) {
1544-
dup = true;
1545-
break;
1546-
}
1547-
}
1548-
if (!dup) {
1549-
copy_node(&name_nodes[i], &candidates[cand_count++]);
1550-
}
1551-
}
1552-
1553-
cbm_store_free_nodes(suffix_nodes, suffix_count);
1554-
cbm_store_free_nodes(name_nodes, name_count);
1555-
1556-
/* Auto-resolve: pick best candidate by degree */
1557-
if (auto_resolve && cand_count >= 2 && cand_count <= 2) {
1558-
/* Find best: highest total degree, prefer non-test files */
1559-
int best_idx = 0;
1560-
int best_deg = -1;
1561-
bool best_is_test = false;
1562-
for (int i = 0; i < cand_count; i++) {
1563-
int in_d = 0;
1564-
int out_d = 0;
1565-
cbm_store_node_degree(store, candidates[i].id, &in_d, &out_d);
1566-
int deg = in_d + out_d;
1567-
bool is_test =
1568-
// NOLINTNEXTLINE(readability-implicit-bool-conversion)
1569-
candidates[i].file_path && strstr(candidates[i].file_path, "_test") != NULL;
1570-
if (i == 0 || (best_is_test && !is_test) ||
1571-
(!best_is_test == !is_test && deg > best_deg) ||
1572-
(!best_is_test == !is_test && deg == best_deg && candidates[i].qualified_name &&
1573-
best_idx >= 0 && candidates[best_idx].qualified_name &&
1574-
strcmp(candidates[i].qualified_name, candidates[best_idx].qualified_name) <
1575-
0)) {
1576-
best_idx = i;
1577-
best_deg = deg;
1578-
best_is_test = is_test;
1579-
}
1580-
}
1581-
1582-
copy_node(&candidates[best_idx], &node);
1583-
1584-
/* Build alternatives list (skip the picked one) */
1585-
cbm_node_t *alts = calloc((size_t)(cand_count - 1), sizeof(cbm_node_t));
1586-
int alt_count = 0;
1587-
for (int i = 0; i < cand_count; i++) {
1588-
if (i != best_idx) {
1589-
copy_node(&candidates[i], &alts[alt_count++]);
1590-
}
1591-
}
1592-
1593-
for (int i = 0; i < cand_count; i++) {
1594-
free_node_contents(&candidates[i]);
1595-
}
1596-
free(candidates);
1597-
1598-
char *result =
1599-
build_snippet_response(srv, &node, "auto_best", include_neighbors, alts, alt_count);
1600-
free_node_contents(&node);
1601-
for (int i = 0; i < alt_count; i++) {
1602-
free_node_contents(&alts[i]);
1603-
}
1604-
free(alts);
1605-
free(qn);
1606-
free(project);
1607-
return result;
1608-
}
1609-
1610-
/* Return suggestions */
1611-
char *result = snippet_suggestions(qn, candidates, cand_count);
1612-
for (int i = 0; i < cand_count; i++) {
1613-
free_node_contents(&candidates[i]);
1614-
}
1615-
free(candidates);
16161557
free(qn);
16171558
free(project);
16181559
return result;
16191560
}
16201561

16211562
cbm_store_free_nodes(suffix_nodes, suffix_count);
1622-
cbm_store_free_nodes(name_nodes, name_count);
1623-
1624-
/* Tier 4: Fuzzy — try last segment for name-based search */
1625-
const char *dot = strrchr(qn, '.');
1626-
const char *search_name = dot ? dot + 1 : qn;
1627-
1628-
/* Use search with name pattern for fuzzy matching */
1629-
cbm_search_params_t params = {0};
1630-
params.project = project;
1631-
params.name_pattern = search_name;
1632-
params.limit = 5;
1633-
params.min_degree = -1;
1634-
params.max_degree = -1;
1635-
const char *excl[] = {"Community", NULL};
1636-
params.exclude_labels = excl;
1637-
1638-
cbm_search_output_t search_out = {0};
1639-
if (cbm_store_search(store, &params, &search_out) == CBM_STORE_OK && search_out.count > 0) {
1640-
/* Build suggestions from search results */
1641-
cbm_node_t *fuzzy = calloc((size_t)search_out.count, sizeof(cbm_node_t));
1642-
for (int i = 0; i < search_out.count; i++) {
1643-
copy_node(&search_out.results[i].node, &fuzzy[i]);
1644-
}
1645-
int fuzzy_count = search_out.count;
1646-
cbm_store_search_free(&search_out);
1647-
1648-
char *result = snippet_suggestions(qn, fuzzy, fuzzy_count);
1649-
for (int i = 0; i < fuzzy_count; i++) {
1650-
free_node_contents(&fuzzy[i]);
1651-
}
1652-
free(fuzzy);
1653-
free(qn);
1654-
free(project);
1655-
return result;
1656-
}
1657-
cbm_store_search_free(&search_out);
1658-
1659-
/* Nothing found */
16601563
free(qn);
16611564
free(project);
1662-
return cbm_mcp_text_result("symbol not found", true);
1565+
1566+
/* Nothing found — guide the caller toward search_graph */
1567+
return cbm_mcp_text_result(
1568+
"symbol not found. Use search_graph(name_pattern=\"...\") first to discover "
1569+
"the exact qualified_name, then pass it to get_code_snippet.",
1570+
true);
16631571
}
16641572

16651573
/* ── search_code ──────────────────────────────────────────────── */
@@ -1960,6 +1868,14 @@ static char *handle_manage_adr(cbm_mcp_server_t *srv, const char *args) {
19601868
} else {
19611869
yyjson_mut_obj_add_str(doc, root_obj, "content", "");
19621870
yyjson_mut_obj_add_str(doc, root_obj, "status", "no_adr");
1871+
yyjson_mut_obj_add_str(
1872+
doc, root_obj, "adr_hint",
1873+
"No ADR yet. Create one with manage_adr(mode='update', "
1874+
"content='## PURPOSE\\n...\\n\\n## STACK\\n...\\n\\n## ARCHITECTURE\\n..."
1875+
"\\n\\n## PATTERNS\\n...\\n\\n## TRADEOFFS\\n...\\n\\n## PHILOSOPHY\\n...'). "
1876+
"For guided creation: explore the codebase with get_architecture, "
1877+
"then draft and store. Sections: PURPOSE, STACK, ARCHITECTURE, "
1878+
"PATTERNS, TRADEOFFS, PHILOSOPHY.");
19631879
}
19641880
}
19651881

0 commit comments

Comments
 (0)