Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions build_ast_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_
query = f"""
MATCH (s:Symbol)
{where}
RETURN s.kind, s.fqn, s.name, s.filename, s.module, s.microservice, s.id
RETURN s.kind, s.fqn, s.name, s.filename, s.module, s.microservice, s.id, s.role
"""
result = conn.execute(query, params)
while result.has_next():
Expand All @@ -585,6 +585,7 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_
module = row[4] if len(row) > 4 else ""
microservice = row[5] if len(row) > 5 else ""
node_id = row[6] if len(row) > 6 else ""
role = row[7] if len(row) > 7 else ""

decl = TypeDecl(name, kind, fqn)
package = fqn[: -(len(name) + 1)] if fqn.endswith("." + name) else ""
Expand All @@ -602,6 +603,10 @@ def _load_existing_types(conn: ladybug.Connection, tables: GraphTables, exclude_
tables.types[fqn] = entry
tables.by_simple_name.setdefault(name, []).append(entry)
tables.by_package.setdefault(package, []).append(entry)
# Seed the persisted role so the annotation-less stub is not recomputed to
# the default during node staging (issue #352 divergence #2).
if role:
tables.type_role_by_node_id[node_id] = role


def _load_existing_members(conn: ladybug.Connection, tables: GraphTables, exclude_files: set[str] | None = None) -> None:
Expand Down Expand Up @@ -3207,15 +3212,24 @@ def _write_nodes_impl(
))
# types
for entry in tables.types.values():
if entry.loaded_from_db:
stub_ids.add(entry.node_id)
d = entry.decl
role, capabilities = resolve_role_and_capabilities(
d,
overrides=overrides,
meta_chain=mch,
)
tables.type_role_by_node_id[entry.node_id] = role
if entry.loaded_from_db:
stub_ids.add(entry.node_id)
# Out-of-scope stub: its annotation-less decl collapses role to the
# default. The real node's role was persisted at index time and seeded
# into type_role_by_node_id by _load_existing_types; trust it so CALLS
# edges into this type keep the correct callee_declaring_role (#352).
# The staged row is filtered out of the write via stub_ids, so its
# capabilities placeholder never reaches the graph.
role = tables.type_role_by_node_id.get(entry.node_id, role)
capabilities = []
else:
tables.type_role_by_node_id[entry.node_id] = role
rows.append(_node_row(
id=entry.node_id, kind=d.kind, name=d.name, fqn=d.fqn,
package=entry.package,
Expand Down Expand Up @@ -3896,6 +3910,11 @@ def incremental_rebuild(
# Rebuild full tables for global pass 5-6 (pass1 populates members from scratch)
tables_for_global = GraphTables()
global_asts = pass1_parse(source_root, tables_for_global, verbose=verbose)
# pass4 (routes/EXPOSES) must run on the global pass5/6 tables too (issue
# #352): pass5 links Feign HTTP_CALLS to routes via exposes_rows, and pass6
# matches against routes_rows. Without pass4 both stay empty and the
# HTTP_CALLS match outcome drifts from a full rebuild. Mirrors main().
pass4_routes(tables_for_global, global_asts, source_root=source_root, verbose=verbose)

pass5_imperative_edges(tables_for_global, global_asts, source_root=source_root, verbose=verbose)

Expand Down
180 changes: 166 additions & 14 deletions tests/test_incremental_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,22 +998,67 @@ def test_incremental_bulk_write_equivalent_to_full_rebuild(self, tmp_path: Path)
index_dir.mkdir()
ladybug_path = index_dir / "code_graph.lbug"

# Create initial files
(source_root / "A.java").write_text("package pkg; class A { void foo() {} }", encoding="utf-8")
(source_root / "B.java").write_text("package pkg; class B { void bar() {} }", encoding="utf-8")
# Corpus exercising both #352 divergences:
# - Cal: @RestController (CONTROLLER role) + a route; left OUT OF SCOPE so
# it is loaded as an annotation-less stub. Its method is a CALLS callee
# of Caller, so callee_declaring_role must stay CONTROLLER (#352 #2).
# - Feign: a Feign client whose HTTP_CALLS match outcome depends on
# routes/EXPOSES (pass4) being run on the global pass5/6 tables (#352 #1).
# - Caller: the in-scope file we touch.
(source_root / "Cal.java").write_text(
"package pkg;\n"
"import org.springframework.web.bind.annotation.RestController;\n"
"import org.springframework.web.bind.annotation.GetMapping;\n"
"@RestController\n"
"public class Cal {\n"
" @GetMapping(\"/things\")\n"
" public String thing() { return \"\"; }\n"
"}\n",
encoding="utf-8",
)
(source_root / "Feign.java").write_text(
"package pkg;\n"
"import org.springframework.cloud.openfeign.FeignClient;\n"
"import org.springframework.web.bind.annotation.GetMapping;\n"
"@FeignClient(name = \"cal\")\n"
"public interface Feign {\n"
" @GetMapping(\"/things\")\n"
" String thing();\n"
"}\n",
encoding="utf-8",
)
(source_root / "Caller.java").write_text(
"package pkg;\n"
"public class Caller {\n"
" public void doCall() {\n"
" Cal c = new Cal();\n"
" c.thing();\n"
" }\n"
"}\n",
encoding="utf-8",
)

# Initial full build (pass1–6). write_ladybug initializes the hash
# tracker, so incremental_rebuild can detect the change below.
build_ladybug_full_into(source_root, ladybug_path)

# Modify A.java, then run the incremental path.
(source_root / "A.java").write_text(
"package pkg; class A { void foo() {} void baz() {} }", encoding="utf-8"
# Modify Caller.java, then run the incremental path. Cal + Feign stay
# unchanged (out of scope).
(source_root / "Caller.java").write_text(
"package pkg;\n"
"public class Caller {\n"
" public void doCall() {\n"
" Cal c = new Cal();\n"
" c.thing();\n"
" }\n"
" public void extra() {}\n"
"}\n",
encoding="utf-8",
)
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
assert result.mode == "incremental"

def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str, str]]:
def _graph_state(c: ladybug.Connection) -> dict:
nc = c.execute("MATCH (n) RETURN count(n)")
node_count = nc.get_next()[0] if nc.has_next() else 0
edge_counts: dict[str, int] = {}
Expand All @@ -1032,7 +1077,38 @@ def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str,
while rr.has_next():
fqn, role = rr.get_next()
roles[fqn] = role
return node_count, edge_counts, roles

# HTTP_CALLS / ASYNC_CALLS match histograms (issue #352 divergence #1):
# incremental must reproduce the full-rebuild match breakdown, which
# requires pass4 (routes/EXPOSES) on the global pass5/6 tables.
def _match_hist(rel: str) -> dict[str, int]:
hist: dict[str, int] = {}
res = c.execute(f"MATCH ()-[r:{rel}]->() RETURN r.match AS m, count(r) AS n")
while res.has_next():
m, n = res.get_next()
hist[str(m) if m else "<none>"] = n
return hist

# CALLS callee_declaring_role multiset (issue #352 divergence #2): an
# out-of-scope callee's declaring-type role must be preserved, not
# recomputed from an annotation-less stub.
callee_roles: dict[str, int] = {}
cr = c.execute(
"MATCH ()-[r:CALLS]->() RETURN r.callee_declaring_role AS role, count(r) AS n"
)
while cr.has_next():
role, n = cr.get_next()
key = str(role) if role else "<none>"
callee_roles[key] = callee_roles.get(key, 0) + n

return {
"nodes": node_count,
"edges": edge_counts,
"roles": roles,
"http_calls_match": _match_hist("HTTP_CALLS"),
"async_calls_match": _match_hist("ASYNC_CALLS"),
"calls_callee_declaring_role": callee_roles,
}

db = ladybug.Database(str(ladybug_path))
conn = ladybug.Connection(db)
Expand All @@ -1056,14 +1132,90 @@ def _graph_state(c: ladybug.Connection) -> tuple[int, dict[str, int], dict[str,
# the same graph as a full rebuild of that state. The previous form
# asserted only `count > 0` and `set(edge_type_keys) == set(edge_type_keys)`
# — a no-op, since every rel type yields a count row even at 0.
assert incremental_state[0] == full_state[0], (
f"node count diverged: incremental={incremental_state[0]} full={full_state[0]}"
assert incremental_state["nodes"] == full_state["nodes"], (
f"node count diverged: incremental={incremental_state['nodes']} full={full_state['nodes']}"
)
assert incremental_state["edges"] == full_state["edges"], (
f"edge counts diverged:\nincremental={incremental_state['edges']}\nfull={full_state['edges']}"
)
assert incremental_state["roles"] == full_state["roles"], (
f"type roles diverged:\nincremental={incremental_state['roles']}\nfull={full_state['roles']}"
)
assert incremental_state["http_calls_match"] == full_state["http_calls_match"], (
"HTTP_CALLS match histogram diverged between incremental and full rebuild (#352):\n"
f"incremental={incremental_state['http_calls_match']}\nfull={full_state['http_calls_match']}\n"
"Consistency guard: with this microservice-less corpus the Feign edge resolves to "
"'phantom' in both paths, so divergence #1 (missing pass4 on the global tables) cannot "
"trip here -- it is guarded structurally in "
"test_incremental_global_pass_runs_pass4_on_global_tables."
)
assert incremental_state[1] == full_state[1], (
f"edge counts diverged:\nincremental={incremental_state[1]}\nfull={full_state[1]}"
assert incremental_state["async_calls_match"] == full_state["async_calls_match"], (
"ASYNC_CALLS match histogram diverged (#352):\n"
f"incremental={incremental_state['async_calls_match']}\nfull={full_state['async_calls_match']}"
)
assert incremental_state["calls_callee_declaring_role"] == full_state["calls_callee_declaring_role"], (
"CALLS callee_declaring_role multiset diverged (out-of-scope stub role collapsed, #352):\n"
f"incremental={incremental_state['calls_callee_declaring_role']}\n"
f"full={full_state['calls_callee_declaring_role']}"
)

def test_incremental_global_pass_runs_pass4_on_global_tables(self, tmp_path: Path) -> None:
"""Structural guard for issue #352 divergence #1: incremental_rebuild must
run pass4_routes on the GLOBAL pass5/6 tables (the full-source rebuild at
step 6), not only on the changed-files tables. Without it, Feign
HTTP_CALLS route linkage drifts from a full rebuild.

The equivalence test's http_calls_match histogram cannot trip on this with
a microservice-less corpus (the route resolves to 'phantom' in both the
incremental and full paths), so this spy asserts the invariant directly:
pass4_routes receives a DISTINCT global tables instance alongside the
changed-files one. Removing the global pass4 call (incremental_rebuild
step 6) leaves only one tables instance -> failure.
"""
import build_ast_graph as bag
from build_ast_graph import incremental_rebuild
from _builders import build_ladybug_full_into

source_root = tmp_path / "src"
source_root.mkdir()
index_dir = tmp_path / "index"
index_dir.mkdir()
ladybug_path = index_dir / "code_graph.lbug"

(source_root / "Cal.java").write_text("package pkg;\npublic class Cal {}\n", encoding="utf-8")
(source_root / "Caller.java").write_text("package pkg;\npublic class Caller {}\n", encoding="utf-8")
build_ladybug_full_into(source_root, ladybug_path)
# Touch the in-scope file so incremental_rebuild takes the incremental path
# (not the no-graph fallback) and reaches its global pass5/6 step.
(source_root / "Caller.java").write_text(
"package pkg;\npublic class Caller { int _touched; }\n", encoding="utf-8"
)

seen_tables: list = []
real_pass4 = bag.pass4_routes

def spy(tables, *args, **kwargs):
seen_tables.append(tables)
return real_pass4(tables, *args, **kwargs)

bag.pass4_routes = spy
try:
result = incremental_rebuild(source_root, ladybug_path, verbose=False)
finally:
bag.pass4_routes = real_pass4

assert result.mode == "incremental", f"expected incremental mode, got {result.mode}"
# pass4 must run on at least two DISTINCT tables instances: the changed-files
# tables and the global (full-source) tables. Removing the global call leaves
# only the changed-files call -> this fails.
assert len(seen_tables) >= 2, (
f"pass4_routes ran {len(seen_tables)} time(s); expected >=2 (changed-files + global) "
"-- the global pass5/6 step must run pass4 on the full-source tables (#352 #1)"
)
assert incremental_state[2] == full_state[2], (
f"type roles diverged:\nincremental={incremental_state[2]}\nfull={full_state[2]}"
distinct = {id(t) for t in seen_tables}
assert len(distinct) >= 2, (
"pass4_routes must run on a DISTINCT global tables instance, not reuse the "
"changed-files tables -- otherwise out-of-scope routes/EXPOSES are missing (#352 #1)"
)

def test_incremental_refreshes_dependent_role_on_meta_chain_change(
Expand Down
Loading