Skip to content

Commit 6e80b8b

Browse files
gbaraldiclaude
andcommitted
Fix byref attribute loss: copy param attrs after clone_into!
CloneFunctionInto rebuilds the AttributeList from scratch using VMap. For byref params, VMap maps old args to addrspacecast instructions (not Arguments), so dyn_cast<Argument> fails and byref(T) is silently dropped. The subsequent setAttributes() overwrites any attrs we set before clone_into!. Without byref, the backend emits global_buffer(8) metadata instead of by_value(sizeof(T)), causing HIP to copy only 8 bytes of struct data as a "pointer" — leading to illegal address errors at runtime. Also remove InferAddressSpaces (rely on AMDGPULowerKernelArguments in codegen to trace addrspacecast chains for s_load). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9cc22e3 commit 6e80b8b

1 file changed

Lines changed: 19 additions & 17 deletions

File tree

src/gcn.jl

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
113113
end
114114

115115
# insert addrspacecasts from kernarg (4) back to flat (0) so that the cloned IR
116-
# (which expects flat pointers) continues to work. InferAddressSpaces will then
117-
# propagate addrspace(4) through GEPs and loads, eliminating the casts.
116+
# (which expects flat pointers) continues to work. The AMDGPU backend's
117+
# AMDGPULowerKernelArguments traces these casts and produces s_load.
118118
new_args = LLVM.Value[]
119119
@dispose builder=IRBuilder() begin
120120
entry_bb = BasicBlock(new_f, "conversion")
@@ -127,9 +127,6 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
127127
else
128128
push!(new_args, parameters(new_f)[i])
129129
end
130-
for attr in collect(parameter_attributes(f, i))
131-
push!(parameter_attributes(new_f, i), attr)
132-
end
133130
end
134131

135132
# clone the original function body
@@ -144,6 +141,16 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
144141
br!(builder, blocks(new_f)[2])
145142
end
146143

144+
# copy parameter attributes AFTER clone_into!, because CloneFunctionInto
145+
# overwrites all attributes via setAttributes. For byref params, the VMap
146+
# maps old args to addrspacecast instructions (not Arguments), so LLVM's
147+
# attribute remapping silently drops them. We must re-add them here.
148+
for i in 1:length(parameters(ft))
149+
for attr in collect(parameter_attributes(f, i))
150+
push!(parameter_attributes(new_f, i), attr)
151+
end
152+
end
153+
147154
# replace the old function
148155
fn = LLVM.name(f)
149156
prune_constexpr_uses!(f)
@@ -152,23 +159,18 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
152159
erase!(f)
153160
LLVM.name!(new_f, fn)
154161

155-
# propagate addrspace(4) through GEPs and loads, then clean up.
156-
# InferAddressSpaces needs TargetTransformInfo (via TargetMachine) to know
157-
# that flat address space is 0 on AMDGPU.
158-
tm = llvm_machine(job.config.target)
162+
# clean up the extra conversion block.
163+
# NOTE: we do NOT run InferAddressSpaces here — the AMDGPU backend's
164+
# AMDGPULowerKernelArguments pass traces addrspacecast chains during codegen
165+
# and correctly produces s_load for addrspace(4) provenance. Running
166+
# InferAddressSpaces with a TargetMachine can over-propagate addrspace(4)
167+
# into pointer values loaded from the struct (which should remain flat/global).
159168
@dispose pb=NewPMPassBuilder() begin
160-
add!(pb, NewPMFunctionPassManager()) do fpm
161-
add!(fpm, InferAddressSpacesPass())
162-
end
163169
add!(pb, NewPMFunctionPassManager()) do fpm
164170
add!(fpm, SimplifyCFGPass())
165-
add!(fpm, SROAPass())
166-
add!(fpm, EarlyCSEPass())
167-
add!(fpm, InstCombinePass())
168171
end
169-
run!(pb, mod, tm)
172+
run!(pb, mod)
170173
end
171-
dispose(tm)
172174

173175
return functions(mod)[fn]
174176
end

0 commit comments

Comments
 (0)