From d9c6323387a4f03db4e0bd1da520c107314b644a Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Fri, 3 Jul 2026 23:48:48 +0300 Subject: [PATCH 1/2] fix(graph): incremental global pass5/6 runs pass4; preserve stub callee role (#352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incremental rebuild diverged from a full rebuild in two ways: 1. The global pass5/6 step rebuilt its tables via pass1 only — no pass4 — so tables_for_global.exposes_rows/routes_rows stayed empty. pass5 links Feign HTTP_CALLS to routes via exposes_rows and pass6 matches against routes_rows, so the HTTP_CALLS match outcome drifted from the full rebuild. Run pass4 on tables_for_global (mirrors main()). 2. _load_existing_types loaded out-of-scope types as annotation-less stubs but not their persisted role, so _write_nodes_impl recomputed role via resolve_role_and_capabilities on the stub and collapsed CONTROLLER/SERVICE/... to OTHER — corrupting callee_declaring_role on CALLS edges into the type. Load s.role and seed type_role_by_node_id; for loaded_from_db stubs trust the persisted role instead of overwriting it. Strengthens the incremental==full equivalence test to also assert HTTP_CALLS / ASYNC_CALLS match histograms and the CALLS callee_declaring_role multiset, on a corpus with a route, a Feign client, and an out-of-scope @RestController callee. Co-Authored-By: Claude --- build_ast_graph.py | 27 ++++++-- tests/test_incremental_graph.py | 117 ++++++++++++++++++++++++++++---- 2 files changed, 126 insertions(+), 18 deletions(-) diff --git a/build_ast_graph.py b/build_ast_graph.py index 3e75a32c..6a356844 100644 --- a/build_ast_graph.py +++ b/build_ast_graph.py @@ -576,7 +576,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_ query = f""" MATCH (s:Symbol) {where} - RETURN s.kind, s.fqn, s.name, s.filename, s.module, s.microservice, s.id + RETURN s.kind, s.fqn, s.name, s.filename, s.module, s.microservice, s.id, s.role """ result = conn.execute(query, params) while result.has_next(): @@ -585,6 +585,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_ module = row[4] if len(row) > 4 else "" microservice = row[5] if len(row) > 5 else "" node_id = row[6] if len(row) > 6 else "" + role = row[7] if len(row) > 7 else "" decl = TypeDecl(name, kind, fqn) package = fqn[: -(len(name) + 1)] if fqn.endswith("." + name) else "" @@ -602,6 +603,10 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_ tables.types[fqn] = entry tables.by_simple_name.setdefault(name, []).append(entry) tables.by_package.setdefault(package, []).append(entry) + # Seed the persisted role so the annotation-less stub is not recomputed to + # the default during node staging (issue #352 divergence #2). + if role: + tables.type_role_by_node_id[node_id] = role def _load_existing_members(conn: ladybug.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None: @@ -3207,15 +3212,24 @@ def _write_nodes_impl( )) # types for entry in tables.types.values(): - if entry.loaded_from_db: - stub_ids.add(entry.node_id) d = entry.decl role, capabilities = resolve_role_and_capabilities( d, overrides=overrides, meta_chain=mch, ) - tables.type_role_by_node_id[entry.node_id] = role + if entry.loaded_from_db: + stub_ids.add(entry.node_id) + # Out-of-scope stub: its annotation-less decl collapses role to the + # default. The real node's role was persisted at index time and seeded + # into type_role_by_node_id by _load_existing_types; trust it so CALLS + # edges into this type keep the correct callee_declaring_role (#352). + # The staged row is filtered out of the write via stub_ids, so its + # capabilities placeholder never reaches the graph. + role = tables.type_role_by_node_id.get(entry.node_id, role) + capabilities = [] + else: + tables.type_role_by_node_id[entry.node_id] = role rows.append(_node_row( id=entry.node_id, kind=d.kind, name=d.name, fqn=d.fqn, package=entry.package, @@ -3896,6 +3910,11 @@ def incremental_rebuild( # Rebuild full tables for global pass 5-6 (pass1 populates members from scratch) tables_for_global = GraphTables() global_asts = pass1_parse(source_root, tables_for_global, verbose=verbose) + # pass4 (routes/EXPOSES) must run on the global pass5/6 tables too (issue + # #352): pass5 links Feign HTTP_CALLS to routes via exposes_rows, and pass6 + # matches against routes_rows. Without pass4 both stay empty and the + # HTTP_CALLS match outcome drifts from a full rebuild. Mirrors main(). + pass4_routes(tables_for_global, global_asts, source_root=source_root, verbose=verbose) pass5_imperative_edges(tables_for_global, global_asts, source_root=source_root, verbose=verbose) diff --git a/tests/test_incremental_graph.py b/tests/test_incremental_graph.py index 675b8232..e5adfbdf 100644 --- a/tests/test_incremental_graph.py +++ b/tests/test_incremental_graph.py @@ -998,22 +998,67 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path) index_dir.mkdir() ladybug_path = index_dir / "code_graph.lbug" - # Create initial files - (source_root / "A.java").write_text("package pkg; class A { void foo() {} }", encoding="utf-8") - (source_root / "B.java").write_text("package pkg; class B { void bar() {} }", encoding="utf-8") + # Corpus exercising both #352 divergences: + # - Cal: @RestController (CONTROLLER role) + a route; left OUT OF SCOPE so + # it is loaded as an annotation-less stub. Its method is a CALLS callee + # of Caller, so callee_declaring_role must stay CONTROLLER (#352 #2). + # - Feign: a Feign client whose HTTP_CALLS match outcome depends on + # routes/EXPOSES (pass4) being run on the global pass5/6 tables (#352 #1). + # - Caller: the in-scope file we touch. + (source_root / "Cal.java").write_text( + "package pkg;\n" + "import org.springframework.web.bind.annotation.RestController;\n" + "import org.springframework.web.bind.annotation.GetMapping;\n" + "@RestController\n" + "public class Cal {\n" + " @GetMapping(\"/things\")\n" + " public String thing() { return \"\"; }\n" + "}\n", + encoding="utf-8", + ) + (source_root / "Feign.java").write_text( + "package pkg;\n" + "import org.springframework.cloud.openfeign.FeignClient;\n" + "import org.springframework.web.bind.annotation.GetMapping;\n" + "@FeignClient(name = \"cal\")\n" + "public interface Feign {\n" + " @GetMapping(\"/things\")\n" + " String thing();\n" + "}\n", + encoding="utf-8", + ) + (source_root / "Caller.java").write_text( + "package pkg;\n" + "public class Caller {\n" + " public void doCall() {\n" + " Cal c = new Cal();\n" + " c.thing();\n" + " }\n" + "}\n", + encoding="utf-8", + ) # Initial full build (pass1–6). write_ladybug initializes the hash # tracker, so incremental_rebuild can detect the change below. build_ladybug_full_into(source_root, ladybug_path) - # Modify A.java, then run the incremental path. - (source_root / "A.java").write_text( - "package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8" + # Modify Caller.java, then run the incremental path. Cal + Feign stay + # unchanged (out of scope). + (source_root / "Caller.java").write_text( + "package pkg;\n" + "public class Caller {\n" + " public void doCall() {\n" + " Cal c = new Cal();\n" + " c.thing();\n" + " }\n" + " public void extra() {}\n" + "}\n", + encoding="utf-8", ) result = incremental_rebuild(source_root, ladybug_path, verbose=False) assert result.mode == "incremental" - def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, str]]: + def _graph_state(c: ladybug.Connection) -> dict: nc = c.execute("MATCH (n) RETURN count(n)") node_count = nc.get_next()[0] if nc.has_next() else 0 edge_counts: dict[str, int] = {} @@ -1032,7 +1077,38 @@ def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, while rr.has_next(): fqn, role = rr.get_next() roles[fqn] = role - return node_count, edge_counts, roles + + # HTTP_CALLS / ASYNC_CALLS match histograms (issue #352 divergence #1): + # incremental must reproduce the full-rebuild match breakdown, which + # requires pass4 (routes/EXPOSES) on the global pass5/6 tables. + def _match_hist(rel: str) -> dict[str, int]: + hist: dict[str, int] = {} + res = c.execute(f"MATCH ()-[r:{rel}]->() RETURN r.match AS m, count(r) AS n") + while res.has_next(): + m, n = res.get_next() + hist[str(m) if m else ""] = n + return hist + + # CALLS callee_declaring_role multiset (issue #352 divergence #2): an + # out-of-scope callee's declaring-type role must be preserved, not + # recomputed from an annotation-less stub. + callee_roles: dict[str, int] = {} + cr = c.execute( + "MATCH ()-[r:CALLS]->() RETURN r.callee_declaring_role AS role, count(r) AS n" + ) + while cr.has_next(): + role, n = cr.get_next() + key = str(role) if role else "" + callee_roles[key] = callee_roles.get(key, 0) + n + + return { + "nodes": node_count, + "edges": edge_counts, + "roles": roles, + "http_calls_match": _match_hist("HTTP_CALLS"), + "async_calls_match": _match_hist("ASYNC_CALLS"), + "calls_callee_declaring_role": callee_roles, + } db = ladybug.Database(str(ladybug_path)) conn = ladybug.Connection(db) @@ -1056,14 +1132,27 @@ def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, # the same graph as a full rebuild of that state. The previous form # asserted only `count > 0` and `set(edge_type_keys) == set(edge_type_keys)` # — a no-op, since every rel type yields a count row even at 0. - assert incremental_state[0] == full_state[0], ( - f"node count diverged: incremental={incremental_state[0]} full={full_state[0]}" + assert incremental_state["nodes"] == full_state["nodes"], ( + f"node count diverged: incremental={incremental_state['nodes']} full={full_state['nodes']}" + ) + assert incremental_state["edges"] == full_state["edges"], ( + f"edge counts diverged:\nincremental={incremental_state['edges']}\nfull={full_state['edges']}" + ) + assert incremental_state["roles"] == full_state["roles"], ( + f"type roles diverged:\nincremental={incremental_state['roles']}\nfull={full_state['roles']}" + ) + assert incremental_state["http_calls_match"] == full_state["http_calls_match"], ( + "HTTP_CALLS match histogram diverged (missing pass4 on global pass5/6 tables, #352):\n" + f"incremental={incremental_state['http_calls_match']}\nfull={full_state['http_calls_match']}" ) - assert incremental_state[1] == full_state[1], ( - f"edge counts diverged:\nincremental={incremental_state[1]}\nfull={full_state[1]}" + assert incremental_state["async_calls_match"] == full_state["async_calls_match"], ( + "ASYNC_CALLS match histogram diverged (#352):\n" + f"incremental={incremental_state['async_calls_match']}\nfull={full_state['async_calls_match']}" ) - assert incremental_state[2] == full_state[2], ( - f"type roles diverged:\nincremental={incremental_state[2]}\nfull={full_state[2]}" + assert incremental_state["calls_callee_declaring_role"] == full_state["calls_callee_declaring_role"], ( + "CALLS callee_declaring_role multiset diverged (out-of-scope stub role collapsed, #352):\n" + f"incremental={incremental_state['calls_callee_declaring_role']}\n" + f"full={full_state['calls_callee_declaring_role']}" ) def test_incremental_refreshes_dependent_role_on_meta_chain_change( From 4150444aef01a9a67ed91a08167b04302fe589d2 Mon Sep 17 00:00:00 2001 From: Dmitry Teryaev Date: Sat, 4 Jul 2026 09:03:41 +0300 Subject: [PATCH 2/2] test(graph): structural guard for pass4 on global tables (#352 #1) The equivalence test's http_calls_match histogram claimed to guard "missing pass4 on global pass5/6 tables" but could not trip on it: the synthetic corpus has no microservice, so the Feign route is filtered out in pass6 and resolves to 'phantom' in both the incremental and full paths. The assertion was a non-load-bearing consistency check. - Add test_incremental_global_pass_runs_pass4_on_global_tables: a spy on pass4_routes that asserts it runs on a DISTINCT global tables instance (the full-source rebuild) alongside the changed-files tables. Removing the global pass4 call (incremental_rebuild step 6) drops the call count to 1 -> failure. Verified red->green. - Correct the equivalence test's http_calls_match message to state plainly that it is a consistency guard, and point to the structural test for divergence #1. Co-Authored-By: Claude --- tests/test_incremental_graph.py | 67 ++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/tests/test_incremental_graph.py b/tests/test_incremental_graph.py index e5adfbdf..9c6be832 100644 --- a/tests/test_incremental_graph.py +++ b/tests/test_incremental_graph.py @@ -1142,8 +1142,12 @@ def _match_hist(rel: str) -> dict[str, int]: f"type roles diverged:\nincremental={incremental_state['roles']}\nfull={full_state['roles']}" ) assert incremental_state["http_calls_match"] == full_state["http_calls_match"], ( - "HTTP_CALLS match histogram diverged (missing pass4 on global pass5/6 tables, #352):\n" - f"incremental={incremental_state['http_calls_match']}\nfull={full_state['http_calls_match']}" + "HTTP_CALLS match histogram diverged between incremental and full rebuild (#352):\n" + f"incremental={incremental_state['http_calls_match']}\nfull={full_state['http_calls_match']}\n" + "Consistency guard: with this microservice-less corpus the Feign edge resolves to " + "'phantom' in both paths, so divergence #1 (missing pass4 on the global tables) cannot " + "trip here -- it is guarded structurally in " + "test_incremental_global_pass_runs_pass4_on_global_tables." ) assert incremental_state["async_calls_match"] == full_state["async_calls_match"], ( "ASYNC_CALLS match histogram diverged (#352):\n" @@ -1155,6 +1159,65 @@ def _match_hist(rel: str) -> dict[str, int]: f"full={full_state['calls_callee_declaring_role']}" ) + def test_incremental_global_pass_runs_pass4_on_global_tables(self, tmp_path: Path) -> None: + """Structural guard for issue #352 divergence #1: incremental_rebuild must + run pass4_routes on the GLOBAL pass5/6 tables (the full-source rebuild at + step 6), not only on the changed-files tables. Without it, Feign + HTTP_CALLS route linkage drifts from a full rebuild. + + The equivalence test's http_calls_match histogram cannot trip on this with + a microservice-less corpus (the route resolves to 'phantom' in both the + incremental and full paths), so this spy asserts the invariant directly: + pass4_routes receives a DISTINCT global tables instance alongside the + changed-files one. Removing the global pass4 call (incremental_rebuild + step 6) leaves only one tables instance -> failure. + """ + import build_ast_graph as bag + from build_ast_graph import incremental_rebuild + from _builders import build_ladybug_full_into + + source_root = tmp_path / "src" + source_root.mkdir() + index_dir = tmp_path / "index" + index_dir.mkdir() + ladybug_path = index_dir / "code_graph.lbug" + + (source_root / "Cal.java").write_text("package pkg;\npublic class Cal {}\n", encoding="utf-8") + (source_root / "Caller.java").write_text("package pkg;\npublic class Caller {}\n", encoding="utf-8") + build_ladybug_full_into(source_root, ladybug_path) + # Touch the in-scope file so incremental_rebuild takes the incremental path + # (not the no-graph fallback) and reaches its global pass5/6 step. + (source_root / "Caller.java").write_text( + "package pkg;\npublic class Caller { int _touched; }\n", encoding="utf-8" + ) + + seen_tables: list = [] + real_pass4 = bag.pass4_routes + + def spy(tables, *args, **kwargs): + seen_tables.append(tables) + return real_pass4(tables, *args, **kwargs) + + bag.pass4_routes = spy + try: + result = incremental_rebuild(source_root, ladybug_path, verbose=False) + finally: + bag.pass4_routes = real_pass4 + + assert result.mode == "incremental", f"expected incremental mode, got {result.mode}" + # pass4 must run on at least two DISTINCT tables instances: the changed-files + # tables and the global (full-source) tables. Removing the global call leaves + # only the changed-files call -> this fails. + assert len(seen_tables) >= 2, ( + f"pass4_routes ran {len(seen_tables)} time(s); expected >=2 (changed-files + global) " + "-- the global pass5/6 step must run pass4 on the full-source tables (#352 #1)" + ) + distinct = {id(t) for t in seen_tables} + assert len(distinct) >= 2, ( + "pass4_routes must run on a DISTINCT global tables instance, not reuse the " + "changed-files tables -- otherwise out-of-scope routes/EXPOSES are missing (#352 #1)" + ) + def test_incremental_refreshes_dependent_role_on_meta_chain_change( self, tmp_path: Path ) -> None: