Skip to content

Commit 7f50234

Browse files
aksOpsclaude
andcommitted
fix(detector): anchor nodes for bicep/dockerfile/shell/proto imports
Five remaining detectors emitted file-path → external-name edges without anchor CodeNodes; Snapshot's phantom filter dropped every edge. Apply base.EnsureFileAnchor + base.EnsureExternalAnchor (the pattern from Phase 4 TS/Python/Rust/C++). Detectors updated: - iac/bicep.go — 1 anchor pair (module depends_on) - iac/dockerfile.go — 1 anchor pair (FROM depends_on) - script/shell/bash.go — 2 anchor pairs (source imports + tool calls) - script/shell/powershell.go — 1 anchor pair (Import-Module + dot-source) - proto/structure.go — 1 anchor pair (proto import) Each gets a survives-snapshot test asserting MODULE + EXTERNAL anchor nodes and the relevant edge kind are present in the detector result. Smoke: fixture-multi-lang phantom drop count = 4 (unchanged; fixture contains no bicep/dockerfile/shell/proto files, so these detectors don't fire on it — drop count reflects pre-existing Java/Python/TS phantom edges not in scope for this fix). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent ffb3a16 commit 7f50234

10 files changed

Lines changed: 220 additions & 13 deletions

File tree

go/internal/detector/iac/bicep.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ func (d BicepDetector) Detect(ctx *detector.Context) *detector.Result {
3636
var edges []*model.CodeEdge
3737
fp := ctx.FilePath
3838
lines := strings.Split(text, "\n")
39+
seen := map[string]bool{}
3940

4041
for i, line := range lines {
4142
if m := bicepResourceRE.FindStringSubmatch(line); len(m) >= 3 {
@@ -84,7 +85,12 @@ func (d BicepDetector) Detect(ctx *detector.Context) *detector.Result {
8485
n.Properties["module_path"] = modPath
8586
nodes = append(nodes, n)
8687

87-
e := model.NewCodeEdge(fp+":depends_on:"+modPath, model.EdgeDependsOn, fp, modPath)
88+
// Emit anchor nodes so the depends_on edge survives GraphBuilder's
89+
// phantom-drop filter. Without anchors, fp and modPath are free-form
90+
// strings that don't match any CodeNode.
91+
srcID := base.EnsureFileAnchor(ctx, "bicep", "BicepDetector", model.ConfidenceLexical, &nodes, seen)
92+
tgtID := base.EnsureExternalAnchor(modPath, "bicep:external", "BicepDetector", model.ConfidenceLexical, &nodes, seen)
93+
e := model.NewCodeEdge(srcID+":depends_on:"+tgtID, model.EdgeDependsOn, srcID, tgtID)
8894
e.Source = "BicepDetector"
8995
e.Properties["module_name"] = name
9096
edges = append(edges, e)

go/internal/detector/iac/bicep_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,40 @@ func TestBicepNegative(t *testing.T) {
7171
}
7272
}
7373

74+
// TestBicepImports_EdgeSurvivesSnapshot verifies that the anchor nodes emitted
75+
// alongside module depends_on edges are present in the detector result, so
76+
// GraphBuilder.Snapshot's phantom-drop filter does not discard them.
77+
func TestBicepImports_EdgeSurvivesSnapshot(t *testing.T) {
78+
d := NewBicepDetector()
79+
r := d.Detect(&detector.Context{FilePath: "main.bicep", Language: "bicep", Content: bicepSource})
80+
81+
var moduleNodes, externalNodes int
82+
for _, n := range r.Nodes {
83+
switch n.Kind {
84+
case model.NodeModule:
85+
moduleNodes++
86+
case model.NodeExternal:
87+
externalNodes++
88+
}
89+
}
90+
if moduleNodes == 0 {
91+
t.Fatal("expected at least one MODULE anchor node for the file endpoint")
92+
}
93+
if externalNodes == 0 {
94+
t.Fatal("expected at least one EXTERNAL anchor node for the module target")
95+
}
96+
97+
dependsEdges := 0
98+
for _, e := range r.Edges {
99+
if e.Kind == model.EdgeDependsOn {
100+
dependsEdges++
101+
}
102+
}
103+
if dependsEdges == 0 {
104+
t.Fatal("expected at least one surviving depends_on edge, got 0")
105+
}
106+
}
107+
74108
func TestBicepDeterminism(t *testing.T) {
75109
d := NewBicepDetector()
76110
ctx := &detector.Context{FilePath: "main.bicep", Language: "bicep", Content: bicepSource}

go/internal/detector/iac/dockerfile.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func (d DockerfileDetector) Detect(ctx *detector.Context) *detector.Result {
4343
var nodes []*model.CodeNode
4444
var edges []*model.CodeEdge
4545
fp := ctx.FilePath
46+
seen := map[string]bool{}
4647

4748
// Stage tracking — alias → node id, plus offsets so we can resolve which
4849
// FROM is the *current* stage at any byte offset later in the file.
@@ -86,7 +87,12 @@ func (d DockerfileDetector) Detect(ctx *detector.Context) *detector.Result {
8687
fromOffsets = append(fromOffsets, fromOffset{offset: m[0], nodeIndex: len(nodes)})
8788
nodes = append(nodes, n)
8889

89-
e := model.NewCodeEdge(fp+":depends_on:"+image, model.EdgeDependsOn, fp, image)
90+
// Emit anchor nodes so the depends_on edge survives GraphBuilder's
91+
// phantom-drop filter. Without anchors, fp and image are free-form
92+
// strings that don't match any CodeNode.
93+
srcID := base.EnsureFileAnchor(ctx, "dockerfile", "DockerfileDetector", model.ConfidenceLexical, &nodes, seen)
94+
tgtID := base.EnsureExternalAnchor(image, "docker:image", "DockerfileDetector", model.ConfidenceLexical, &nodes, seen)
95+
e := model.NewCodeEdge(srcID+":depends_on:"+tgtID, model.EdgeDependsOn, srcID, tgtID)
9096
e.Source = "DockerfileDetector"
9197
edges = append(edges, e)
9298
}

go/internal/detector/iac/dockerfile_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,40 @@ func TestDockerfileNegative(t *testing.T) {
7878
}
7979
}
8080

81+
// TestDockerfileImports_EdgeSurvivesSnapshot verifies that the anchor nodes
82+
// emitted alongside FROM depends_on edges are present in the detector result,
83+
// so GraphBuilder.Snapshot's phantom-drop filter does not discard them.
84+
func TestDockerfileImports_EdgeSurvivesSnapshot(t *testing.T) {
85+
d := NewDockerfileDetector()
86+
r := d.Detect(&detector.Context{FilePath: "Dockerfile", Language: "dockerfile", Content: dockerfileSource})
87+
88+
var moduleNodes, externalNodes int
89+
for _, n := range r.Nodes {
90+
switch n.Kind {
91+
case model.NodeModule:
92+
moduleNodes++
93+
case model.NodeExternal:
94+
externalNodes++
95+
}
96+
}
97+
if moduleNodes == 0 {
98+
t.Fatal("expected at least one MODULE anchor node for the file endpoint")
99+
}
100+
if externalNodes == 0 {
101+
t.Fatal("expected at least one EXTERNAL anchor node for the image target")
102+
}
103+
104+
dependsEdges := 0
105+
for _, e := range r.Edges {
106+
if e.Kind == model.EdgeDependsOn {
107+
dependsEdges++
108+
}
109+
}
110+
if dependsEdges == 0 {
111+
t.Fatal("expected at least one surviving depends_on edge, got 0")
112+
}
113+
}
114+
81115
func TestDockerfileDeterminism(t *testing.T) {
82116
d := NewDockerfileDetector()
83117
ctx := &detector.Context{FilePath: "Dockerfile", Language: "dockerfile", Content: dockerfileSource}

go/internal/detector/proto/structure.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func (d StructureDetector) Detect(ctx *detector.Context) *detector.Result {
3939
var edges []*model.CodeEdge
4040
fp := ctx.FilePath
4141
lines := strings.Split(text, "\n")
42+
seen := map[string]bool{}
4243

4344
// Package (first match only)
4445
for i, line := range lines {
@@ -55,11 +56,15 @@ func (d StructureDetector) Detect(ctx *detector.Context) *detector.Result {
5556
}
5657
}
5758

58-
// Imports
59+
// Imports — emit anchor nodes so the imports edge survives GraphBuilder's
60+
// phantom-drop filter. Without anchors, fp and imp are free-form strings
61+
// that don't match any CodeNode.
5962
for _, line := range lines {
6063
if m := protoImportRE.FindStringSubmatch(line); len(m) >= 2 {
6164
imp := m[1]
62-
e := model.NewCodeEdge(fp+":imports:"+imp, model.EdgeImports, fp, imp)
65+
srcID := base.EnsureFileAnchor(ctx, "proto", "ProtoStructureDetector", model.ConfidenceLexical, &nodes, seen)
66+
tgtID := base.EnsureExternalAnchor(imp, "proto:external", "ProtoStructureDetector", model.ConfidenceLexical, &nodes, seen)
67+
e := model.NewCodeEdge(srcID+":imports:"+tgtID, model.EdgeImports, srcID, tgtID)
6368
e.Source = "ProtoStructureDetector"
6469
edges = append(edges, e)
6570
}

go/internal/detector/proto/structure_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,40 @@ func TestProtoNegative(t *testing.T) {
8080
}
8181
}
8282

83+
// TestProtoImports_EdgeSurvivesSnapshot verifies that the anchor nodes emitted
84+
// alongside proto import edges are present in the detector result, so
85+
// GraphBuilder.Snapshot's phantom-drop filter does not discard them.
86+
func TestProtoImports_EdgeSurvivesSnapshot(t *testing.T) {
87+
d := NewStructureDetector()
88+
r := d.Detect(&detector.Context{FilePath: "api.proto", Language: "proto", Content: protoSource})
89+
90+
var moduleNodes, externalNodes int
91+
for _, n := range r.Nodes {
92+
switch n.Kind {
93+
case model.NodeModule:
94+
moduleNodes++
95+
case model.NodeExternal:
96+
externalNodes++
97+
}
98+
}
99+
if moduleNodes == 0 {
100+
t.Fatal("expected at least one MODULE anchor node for the file endpoint")
101+
}
102+
if externalNodes == 0 {
103+
t.Fatal("expected at least one EXTERNAL anchor node for the import target")
104+
}
105+
106+
importEdges := 0
107+
for _, e := range r.Edges {
108+
if e.Kind == model.EdgeImports {
109+
importEdges++
110+
}
111+
}
112+
if importEdges == 0 {
113+
t.Fatal("expected at least one surviving imports edge, got 0")
114+
}
115+
}
116+
83117
func TestProtoDeterminism(t *testing.T) {
84118
d := NewStructureDetector()
85119
ctx := &detector.Context{FilePath: "api.proto", Language: "proto", Content: protoSource}

go/internal/detector/script/shell/bash.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func (d BashDetector) Detect(ctx *detector.Context) *detector.Result {
3939
var edges []*model.CodeEdge
4040
fp := ctx.FilePath
4141
lines := strings.Split(text, "\n")
42+
seen := map[string]bool{}
4243

4344
// Shebang → MODULE node for the script
4445
if len(lines) > 0 {
@@ -69,9 +70,12 @@ func (d BashDetector) Detect(ctx *detector.Context) *detector.Result {
6970
}
7071

7172
// source ./lib.sh / . helpers.sh
73+
// Emit anchor nodes so the imports edge survives GraphBuilder's phantom-drop.
7274
if m := bashSourceRE.FindStringSubmatch(line); len(m) >= 2 {
7375
src := m[1]
74-
e := model.NewCodeEdge(fp+":sources:"+src, model.EdgeImports, fp, src)
76+
srcID := base.EnsureFileAnchor(ctx, "bash", "BashDetector", model.ConfidenceLexical, &nodes, seen)
77+
tgtID := base.EnsureExternalAnchor(src, "bash:external", "BashDetector", model.ConfidenceLexical, &nodes, seen)
78+
e := model.NewCodeEdge(srcID+":sources:"+tgtID, model.EdgeImports, srcID, tgtID)
7579
e.Source = "BashDetector"
7680
edges = append(edges, e)
7781
}
@@ -89,6 +93,7 @@ func (d BashDetector) Detect(ctx *detector.Context) *detector.Result {
8993
}
9094

9195
// Tool calls — dedup across the whole file, skip comments
96+
// Emit anchor nodes so the calls edges survive GraphBuilder's phantom-drop.
9297
toolsSeen := map[string]bool{}
9398
for _, line := range lines {
9499
stripped := strings.TrimLeft(line, " \t")
@@ -101,7 +106,9 @@ func (d BashDetector) Detect(ctx *detector.Context) *detector.Result {
101106
continue
102107
}
103108
toolsSeen[tool] = true
104-
e := model.NewCodeEdge(fp+":calls:"+tool, model.EdgeCalls, fp, tool)
109+
srcID := base.EnsureFileAnchor(ctx, "bash", "BashDetector", model.ConfidenceLexical, &nodes, seen)
110+
tgtID := base.EnsureExternalAnchor(tool, "bash:tool", "BashDetector", model.ConfidenceLexical, &nodes, seen)
111+
e := model.NewCodeEdge(srcID+":calls:"+tgtID, model.EdgeCalls, srcID, tgtID)
105112
e.Source = "BashDetector"
106113
e.Properties["tool"] = tool
107114
edges = append(edges, e)

go/internal/detector/script/shell/bash_test.go

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ func TestBashPositive(t *testing.T) {
3434
for _, n := range r.Nodes {
3535
kinds[n.Kind]++
3636
}
37-
// 1 shebang module
38-
if kinds[model.NodeModule] != 1 {
39-
t.Errorf("expected 1 MODULE (shebang), got %d", kinds[model.NodeModule])
37+
// 1 shebang module + 1 file-anchor module (emitted by import/calls anchor helpers)
38+
if kinds[model.NodeModule] != 2 {
39+
t.Errorf("expected 2 MODULE (shebang + file anchor), got %d", kinds[model.NodeModule])
4040
}
4141
// 2 functions (deploy, cleanup)
4242
if kinds[model.NodeMethod] != 2 {
@@ -75,6 +75,46 @@ func TestBashNegative(t *testing.T) {
7575
}
7676
}
7777

78+
// TestBashImports_EdgeSurvivesSnapshot verifies that the anchor nodes emitted
79+
// alongside source/calls edges are present in the detector result, so
80+
// GraphBuilder.Snapshot's phantom-drop filter does not discard them.
81+
func TestBashImports_EdgeSurvivesSnapshot(t *testing.T) {
82+
d := NewBashDetector()
83+
r := d.Detect(&detector.Context{FilePath: "deploy.sh", Language: "bash", Content: bashSource})
84+
85+
var moduleNodes, externalNodes int
86+
for _, n := range r.Nodes {
87+
switch n.Kind {
88+
case model.NodeModule:
89+
moduleNodes++
90+
case model.NodeExternal:
91+
externalNodes++
92+
}
93+
}
94+
if moduleNodes == 0 {
95+
t.Fatal("expected at least one MODULE anchor node for the file endpoint")
96+
}
97+
if externalNodes == 0 {
98+
t.Fatal("expected at least one EXTERNAL anchor node for imported/called targets")
99+
}
100+
101+
var importEdges, callEdges int
102+
for _, e := range r.Edges {
103+
switch e.Kind {
104+
case model.EdgeImports:
105+
importEdges++
106+
case model.EdgeCalls:
107+
callEdges++
108+
}
109+
}
110+
if importEdges == 0 {
111+
t.Fatal("expected at least one surviving imports edge, got 0")
112+
}
113+
if callEdges == 0 {
114+
t.Fatal("expected at least one surviving calls edge, got 0")
115+
}
116+
}
117+
78118
func TestBashDeterminism(t *testing.T) {
79119
d := NewBashDetector()
80120
ctx := &detector.Context{FilePath: "deploy.sh", Language: "bash", Content: bashSource}

go/internal/detector/script/shell/powershell.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func (d PowerShellDetector) Detect(ctx *detector.Context) *detector.Result {
3939
var edges []*model.CodeEdge
4040
fp := ctx.FilePath
4141
lines := strings.Split(text, "\n")
42+
seen := map[string]bool{}
4243

4344
for i, line := range lines {
4445
// Functions
@@ -66,18 +67,23 @@ func (d PowerShellDetector) Detect(ctx *detector.Context) *detector.Result {
6667
nodes = append(nodes, n)
6768
}
6869

69-
// Import-Module
70+
// Import-Module — emit anchor nodes so the imports edge survives
71+
// GraphBuilder's phantom-drop filter.
7072
if m := psImportRE.FindStringSubmatch(line); len(m) >= 2 {
7173
imp := m[1]
72-
e := model.NewCodeEdge(fp+":imports:"+imp, model.EdgeImports, fp, imp)
74+
srcID := base.EnsureFileAnchor(ctx, "powershell", "PowerShellDetector", model.ConfidenceLexical, &nodes, seen)
75+
tgtID := base.EnsureExternalAnchor(imp, "powershell:external", "PowerShellDetector", model.ConfidenceLexical, &nodes, seen)
76+
e := model.NewCodeEdge(srcID+":imports:"+tgtID, model.EdgeImports, srcID, tgtID)
7377
e.Source = "PowerShellDetector"
7478
edges = append(edges, e)
7579
}
7680

77-
// . path\to\file.ps1
81+
// . path\to\file.ps1 — emit anchor nodes so the imports edge survives.
7882
if m := psDotSourceRE.FindStringSubmatch(line); len(m) >= 2 {
7983
src := m[1]
80-
e := model.NewCodeEdge(fp+":dotsource:"+src, model.EdgeImports, fp, src)
84+
srcID := base.EnsureFileAnchor(ctx, "powershell", "PowerShellDetector", model.ConfidenceLexical, &nodes, seen)
85+
tgtID := base.EnsureExternalAnchor(src, "powershell:external", "PowerShellDetector", model.ConfidenceLexical, &nodes, seen)
86+
e := model.NewCodeEdge(srcID+":dotsource:"+tgtID, model.EdgeImports, srcID, tgtID)
8187
e.Source = "PowerShellDetector"
8288
edges = append(edges, e)
8389
}

go/internal/detector/script/shell/powershell_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,41 @@ func TestPowerShellNegative(t *testing.T) {
7676
}
7777
}
7878

79+
// TestPowerShellImports_EdgeSurvivesSnapshot verifies that the anchor nodes
80+
// emitted alongside Import-Module/dot-source imports edges are present in the
81+
// detector result, so GraphBuilder.Snapshot's phantom-drop filter does not
82+
// discard them.
83+
func TestPowerShellImports_EdgeSurvivesSnapshot(t *testing.T) {
84+
d := NewPowerShellDetector()
85+
r := d.Detect(&detector.Context{FilePath: "Deploy.ps1", Language: "powershell", Content: psSource})
86+
87+
var moduleNodes, externalNodes int
88+
for _, n := range r.Nodes {
89+
switch n.Kind {
90+
case model.NodeModule:
91+
moduleNodes++
92+
case model.NodeExternal:
93+
externalNodes++
94+
}
95+
}
96+
if moduleNodes == 0 {
97+
t.Fatal("expected at least one MODULE anchor node for the file endpoint")
98+
}
99+
if externalNodes == 0 {
100+
t.Fatal("expected at least one EXTERNAL anchor node for import targets")
101+
}
102+
103+
importEdges := 0
104+
for _, e := range r.Edges {
105+
if e.Kind == model.EdgeImports {
106+
importEdges++
107+
}
108+
}
109+
if importEdges == 0 {
110+
t.Fatal("expected at least one surviving imports edge, got 0")
111+
}
112+
}
113+
79114
func TestPowerShellDeterminism(t *testing.T) {
80115
d := NewPowerShellDetector()
81116
ctx := &detector.Context{FilePath: "Deploy.ps1", Language: "powershell", Content: psSource}

0 commit comments

Comments
 (0)