@@ -54,15 +54,17 @@ function finish_module!(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
5454 return entry
5555end
5656
57- function finish_ir! (@nospecialize (job:: CompilerJob{GCNCompilerTarget} ), mod:: LLVM.Module ,
58- entry:: LLVM.Function )
57+ function finish_ir! (
58+ @nospecialize (job:: CompilerJob{GCNCompilerTarget} ), mod:: LLVM.Module ,
59+ entry:: LLVM.Function
60+ )
5961 if job. config. kernel
6062 entry = add_kernarg_address_spaces! (job, mod, entry)
6163
6264 # optimize after address space rewriting: propagate addrspace(4) through
6365 # the addrspacecast chains, then clean up newly-exposed opportunities
6466 tm = llvm_machine (job. config. target)
65- @dispose pb= NewPMPassBuilder () tm begin
67+ @dispose pb = NewPMPassBuilder () tm begin
6668 add! (pb, NewPMFunctionPassManager ()) do fpm
6769 add! (fpm, InferAddressSpacesPass ())
6870 add! (fpm, SROAPass ())
@@ -76,23 +78,26 @@ function finish_ir!(@nospecialize(job::CompilerJob{GCNCompilerTarget}), mod::LLV
7678 return entry
7779end
7880
79- # Rewrite byref kernel parameters from flat (addrspace 0) to kernarg (addrspace 4).
81+ # Rewrite byref kernel parameters from flat (addrspace 0) to constant (addrspace 4).
8082#
81- # On AMDGPU, the kernarg segment is in address space 4 and is scalar-loadable via s_load.
82- # Clang emits byref parameters as `ptr addrspace(4)` from the frontend, but Julia's
83- # RemoveJuliaAddrspacesPass strips all address spaces to flat. This pass restores the
84- # correct address space so that struct field loads from byref arguments become s_load
85- # instead of flat_load.
83+ # On AMDGPU, kernel arguments reside in the constant address space (addrspace 4),
84+ # which is scalar-loadable via s_load. Julia initially emits byref parameters as
85+ # pointers in addrspace(11) (tracked/derived), but RemoveJuliaAddrspacesPass strips
86+ # all non-integral address spaces to flat (addrspace 0) during optimization. This pass
87+ # restores addrspace(4) on byref parameters so that the backend can emit s_load
88+ # instead of flat_load for struct field accesses.
8689#
8790# NOTE: must run after optimization, where RemoveJuliaAddrspacesPass has already
8891# converted Julia's addrspace(11) to flat (addrspace 0) on these parameters.
89- function add_kernarg_address_spaces! (@nospecialize (job:: CompilerJob ), mod:: LLVM.Module ,
90- f:: LLVM.Function )
92+ function add_kernarg_address_spaces! (
93+ @nospecialize (job:: CompilerJob ), mod:: LLVM.Module ,
94+ f:: LLVM.Function
95+ )
9196 ft = function_type (f)
9297
9398 # find the byref parameters
9499 byref_mask = BitVector (undef, length (parameters (ft)))
95- args = classify_arguments (job, ft; post_optimization= job. config. optimize)
100+ args = classify_arguments (job, ft; post_optimization = job. config. optimize)
96101 filter! (args) do arg
97102 arg. cc != GHOST
98103 end
@@ -114,7 +119,7 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
114119 new_types = LLVMType[]
115120 for (i, param) in enumerate (parameters (ft))
116121 if byref_mask[i] && param isa LLVM. PointerType && addrspace (param) == 0
117- push! (new_types, LLVM. PointerType (#= kernarg =# 4 ))
122+ push! (new_types, LLVM. PointerType (#= constant =# 4 ))
118123 else
119124 push! (new_types, param)
120125 end
@@ -130,7 +135,7 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
130135 # (which expects flat pointers) continues to work. The AMDGPU backend's
131136 # AMDGPULowerKernelArguments traces these casts and produces s_load.
132137 new_args = LLVM. Value[]
133- @dispose builder= IRBuilder () begin
138+ @dispose builder = IRBuilder () begin
134139 entry_bb = BasicBlock (new_f, " conversion" )
135140 position! (builder, entry_bb)
136141
@@ -148,8 +153,10 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
148153 param => new_args[i] for (i, param) in enumerate (parameters (f))
149154 )
150155 value_map[f] = new_f
151- clone_into! (new_f, f; value_map,
152- changes= LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges)
156+ clone_into! (
157+ new_f, f; value_map,
158+ changes = LLVM. API. LLVMCloneFunctionChangeTypeGlobalChanges
159+ )
153160
154161 # fall through from conversion block to cloned entry
155162 br! (builder, blocks (new_f)[2 ])
@@ -174,7 +181,7 @@ function add_kernarg_address_spaces!(@nospecialize(job::CompilerJob), mod::LLVM.
174181 LLVM. name! (new_f, fn)
175182
176183 # clean up the extra conversion block
177- @dispose pb= NewPMPassBuilder () begin
184+ @dispose pb = NewPMPassBuilder () begin
178185 add! (pb, NewPMFunctionPassManager ()) do fpm
179186 add! (fpm, SimplifyCFGPass ())
180187 end
0 commit comments