From 57e467b969363d3dd0d711710a4efedd3cf19d1e Mon Sep 17 00:00:00 2001 From: Harsh Chauhan Date: Mon, 8 Jun 2026 23:00:29 +0530 Subject: [PATCH] add GTests for Conv GPU bias, 1D and 3D paths --- .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 124 ++++++++++++++++++ core/test/input_models/Conv1d.onnx | Bin 0 -> 308 bytes core/test/input_models/Conv3d.onnx | Bin 0 -> 422 bytes core/test/input_models/ConvWithBias.onnx | Bin 0 -> 314 bytes .../input_models/references/Conv1d.ref.hxx | 3 + .../input_models/references/Conv3d.ref.hxx | 3 + .../references/ConvWithBias.ref.hxx | 3 + 7 files changed, 133 insertions(+) create mode 100644 core/test/input_models/Conv1d.onnx create mode 100644 core/test/input_models/Conv3d.onnx create mode 100644 core/test/input_models/ConvWithBias.onnx create mode 100644 core/test/input_models/references/Conv1d.ref.hxx create mode 100644 core/test/input_models/references/Conv3d.ref.hxx create mode 100644 core/test/input_models/references/ConvWithBias.ref.hxx diff --git a/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx index 5ad9383..2846496 100644 --- a/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -164,6 +164,15 @@ #include "ConvWithAsymmetricPadding_FromONNX_GPU_ALPAKA.hxx" #include "input_models/references/ConvWithAsymmetricPadding.ref.hxx" +#include "ConvWithBias_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvWithBias.ref.hxx" + +#include "Conv1d_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Conv1d.ref.hxx" + +#include "Conv3d_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/Conv3d.ref.hxx" + #include "BatchNorm_FromONNX_GPU_ALPAKA.hxx" #include "BatchNormRelu_FromONNX_GPU_ALPAKA.hxx" @@ -2442,6 +2451,121 @@ TEST_F(SofieAlpakaTest, ConvWithAsymmetricPadding) } } +// Exercises the Conv GPU bias path (BiasBroadcastKernel + GEMM beta=1). The existing +// ConvWith* models have no bias input, so the bias kernel was never run. Two output +// channels with distinct biases (10, 100) make a wrong-channel broadcast visible. +TEST_F(SofieAlpakaTest, ConvWithBias) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // x[1,1,5,5] = iota 0..24 + std::vector input(25); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvWithBias_ExpectedOutput::all_ones) / sizeof(float)})); + + { + SOFIE_ConvWithBias::Session session("ConvWithBias_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = ConvWithBias_ExpectedOutput::all_ones; + constexpr size_t nOut_bias = sizeof(ConvWithBias_ExpectedOutput::all_ones) / sizeof(float); + + for (size_t i = 0; i < nOut_bias; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} + +// Exercises the Conv GPU 1D path (fDim==1 branches in weight-vec/im2col). All other +// alpaka Conv models are 2D. x[1,2,7], W[3,2,3] iota weights, bias [1,2,3], pads [1,1]. +TEST_F(SofieAlpakaTest, Conv1d) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // x[1,2,7] = iota 0..13 + std::vector input(14); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(Conv1d_ExpectedOutput::all_ones) / sizeof(float)})); + + { + SOFIE_Conv1d::Session session("Conv1d_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Conv1d_ExpectedOutput::all_ones; + constexpr size_t nOut_conv1d = sizeof(Conv1d_ExpectedOutput::all_ones) / sizeof(float); + + for (size_t i = 0; i < nOut_conv1d; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} + +// Exercises the Conv GPU 3D path (fDim>2 depth branches + depth handling in im2col). +// x[1,1,3,4,4], W[2,1,2,3,3] iota weights, bias [5,50], kernel (2,3,3), pads h/w by 1. +TEST_F(SofieAlpakaTest, Conv3d) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + // x[1,1,3,4,4] = iota 0..47 + std::vector input(48); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(Conv3d_ExpectedOutput::all_ones) / sizeof(float)})); + + { + SOFIE_Conv3d::Session session("Conv3d_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float *correct = Conv3d_ExpectedOutput::all_ones; + constexpr size_t nOut_conv3d = sizeof(Conv3d_ExpectedOutput::all_ones) / sizeof(float); + + for (size_t i = 0; i < nOut_conv3d; ++i) { + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; + } +} + TEST_F(SofieAlpakaTest, BatchNormalization) { constexpr float TOLERANCE = DEFAULT_TOLERANCE; diff --git a/core/test/input_models/Conv1d.onnx b/core/test/input_models/Conv1d.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a95292a6cd43b5275761e2b58fba9a30ea7f7ca6 GIT binary patch literal 308 zcmYk%&uYRj90&0HnqtC_>Pp7gb$IN_U3XdRK*ZB7qu|B4wV)_9v}LTf9({(Ld=wwW zNAa6Yh9Mt9e*Z|cGJV%Jt6jOMKdZHCo_+$JFnu#+OsS!E7{2VvujH1aO;MG_c2TdE zuCVezD(Q3RmU(T-lD`gE)lId|3q)`?J>U75CvRHnK+Bj?%4SpXySir_78^P^z!6R$ z#zMsK03)#Q3h&^cgZ((U`x}|gB82|O&F}GgV>s4iA`Ub`0Lmw_tAxBH8>Xo@sBL)C$QHc9Wp zZQZ?1@-(>{6r*95SkFo;8|s8x+Ruh@VTC2EcIULwoubUgaZ}D1 zl?=#H)SHrMzxo8h;)fafGfzLhDVsfGc@oDZ}1K?_<&FNf;oJ{ z5B!2~LinJd>%tA?@!4EY{U77p_4n{DW>@bhGMgGVSdfT5)QuZ)nIni`jT`X-0XI^$(4w@ZEn;^brN{ADChEci$S^#Z z_n8^&$X?OZNuC|5Q<)a^qUE$CxDyxxEuoovn>@RzF?BC-nZ?&bl|)4xicqxBQA6^I z=v;;HDLax~r#ESx=rpR7L5_d|MF1MiZp+wX^^aCVvEQDd84T6OcIc2Umpj?x~np#K6o?lI~B literal 0 HcmV?d00001 diff --git a/core/test/input_models/references/Conv1d.ref.hxx b/core/test/input_models/references/Conv1d.ref.hxx new file mode 100644 index 0000000..18e3ed0 --- /dev/null +++ b/core/test/input_models/references/Conv1d.ref.hxx @@ -0,0 +1,3 @@ +namespace Conv1d_ExpectedOutput { +float all_ones[] = {71.0, 104.0, 119.0, 134.0, 149.0, 164.0, 95.0, 168.0, 267.0, 318.0, 369.0, 420.0, 471.0, 312.0, 265.0, 430.0, 517.0, 604.0, 691.0, 778.0, 529.0}; +} // namespace Conv1d_ExpectedOutput diff --git a/core/test/input_models/references/Conv3d.ref.hxx b/core/test/input_models/references/Conv3d.ref.hxx new file mode 100644 index 0000000..95559ed --- /dev/null +++ b/core/test/input_models/references/Conv3d.ref.hxx @@ -0,0 +1,3 @@ +namespace Conv3d_ExpectedOutput { +float all_ones[] = {1201.0, 1801.0, 1921.0, 1269.0, 1886.0, 2798.0, 2951.0, 1928.0, 2318.0, 3410.0, 3563.0, 2312.0, 1429.0, 2077.0, 2161.0, 1385.0, 2545.0, 3721.0, 3841.0, 2485.0, 3614.0, 5246.0, 5399.0, 3464.0, 4046.0, 5858.0, 6011.0, 3848.0, 2389.0, 3421.0, 3505.0, 2217.0, 2758.0, 4222.0, 4558.0, 3114.0, 4631.0, 7055.0, 7532.0, 5105.0, 5927.0, 8963.0, 9440.0, 6353.0, 4138.0, 6226.0, 6526.0, 4382.0, 6406.0, 9598.0, 9934.0, 6634.0, 9815.0, 14687.0, 15164.0, 10097.0, 11111.0, 16595.0, 17072.0, 11345.0, 7402.0, 11026.0, 11326.0, 7518.0}; +} // namespace Conv3d_ExpectedOutput diff --git a/core/test/input_models/references/ConvWithBias.ref.hxx b/core/test/input_models/references/ConvWithBias.ref.hxx new file mode 100644 index 0000000..581cac4 --- /dev/null +++ b/core/test/input_models/references/ConvWithBias.ref.hxx @@ -0,0 +1,3 @@ +namespace ConvWithBias_ExpectedOutput { +float all_ones[] = {22.0, 31.0, 37.0, 43.0, 34.0, 43.0, 64.0, 73.0, 82.0, 61.0, 73.0, 109.0, 118.0, 127.0, 91.0, 103.0, 154.0, 163.0, 172.0, 121.0, 82.0, 121.0, 127.0, 133.0, 94.0, 112.0, 121.0, 127.0, 133.0, 124.0, 133.0, 154.0, 163.0, 172.0, 151.0, 163.0, 199.0, 208.0, 217.0, 181.0, 193.0, 244.0, 253.0, 262.0, 211.0, 172.0, 211.0, 217.0, 223.0, 184.0}; +} // namespace ConvWithBias_ExpectedOutput