|
37 | 37 | end |
38 | 38 | end |
39 | 39 |
|
| 40 | +@testset "kernarg address space for byref parameters" begin |
| 41 | + mod = @eval module $(gensym()) |
| 42 | + struct MyStruct |
| 43 | + x::Float64 |
| 44 | + y::Float64 |
| 45 | + end |
| 46 | + |
| 47 | + function kernel(s::MyStruct) |
| 48 | + s.x + s.y |
| 49 | + return |
| 50 | + end |
| 51 | + end |
| 52 | + |
| 53 | + # byref struct params should be ptr addrspace(4) in kernel IR |
| 54 | + @test @filecheck begin |
| 55 | + check"CHECK: define amdgpu_kernel void @_Z6kernel8MyStruct(ptr addrspace(4)" |
| 56 | + GCN.code_llvm(mod.kernel, Tuple{mod.MyStruct}; dump_module=true, kernel=true) |
| 57 | + end |
| 58 | + |
| 59 | + # non-kernel should NOT have addrspace(4) |
| 60 | + @test @filecheck begin |
| 61 | + check"CHECK-NOT: addrspace(4)" |
| 62 | + GCN.code_llvm(mod.kernel, Tuple{mod.MyStruct}; dump_module=true, kernel=false) |
| 63 | + end |
| 64 | +end |
| 65 | + |
40 | 66 | @testset "https://github.com/JuliaGPU/AMDGPU.jl/issues/846" begin |
41 | 67 | ir, rt = GCN.code_typed((Tuple{Tuple{Val{4}}, Tuple{Float32}},); always_inline=true) do t |
42 | 68 | t[1] |
|
49 | 75 | ############################################################################################ |
50 | 76 | @testset "assembly" begin |
51 | 77 |
|
| 78 | +@testset "s_load for kernarg struct access" begin |
| 79 | + mod = @eval module $(gensym()) |
| 80 | + struct MyStruct |
| 81 | + x::Float64 |
| 82 | + y::Float64 |
| 83 | + end |
| 84 | + |
| 85 | + function kernel(s::MyStruct, out::Ptr{Float64}) |
| 86 | + unsafe_store!(out, s.x + s.y) |
| 87 | + return |
| 88 | + end |
| 89 | + end |
| 90 | + |
| 91 | + @test @filecheck begin |
| 92 | + check"CHECK: s_load_dwordx" |
| 93 | + check"CHECK-NOT: flat_load" |
| 94 | + GCN.code_native(mod.kernel, Tuple{mod.MyStruct, Ptr{Float64}}; kernel=true) |
| 95 | + end |
| 96 | +end |
| 97 | + |
52 | 98 | @testset "skip scalar trap" begin |
53 | 99 | mod = @eval module $(gensym()) |
54 | 100 | workitem_idx_x() = ccall("llvm.amdgcn.workitem.id.x", llvmcall, Int32, ()) |
|
0 commit comments