From 57e467b969363d3dd0d711710a4efedd3cf19d1e Mon Sep 17 00:00:00 2001
From: Harsh Chauhan <harsh2005.hc@gmail.com>
Date: Mon, 8 Jun 2026 23:00:29 +0530
Subject: [PATCH] add GTests for Conv GPU bias, 1D and 3D paths

---
 .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 124 ++++++++++++++++++
 core/test/input_models/Conv1d.onnx            | Bin 0 -> 308 bytes
 core/test/input_models/Conv3d.onnx            | Bin 0 -> 422 bytes
 core/test/input_models/ConvWithBias.onnx      | Bin 0 -> 314 bytes
 .../input_models/references/Conv1d.ref.hxx    |   3 +
 .../input_models/references/Conv3d.ref.hxx    |   3 +
 .../references/ConvWithBias.ref.hxx           |   3 +
 7 files changed, 133 insertions(+)
 create mode 100644 core/test/input_models/Conv1d.onnx
 create mode 100644 core/test/input_models/Conv3d.onnx
 create mode 100644 core/test/input_models/ConvWithBias.onnx
 create mode 100644 core/test/input_models/references/Conv1d.ref.hxx
 create mode 100644 core/test/input_models/references/Conv3d.ref.hxx
 create mode 100644 core/test/input_models/references/ConvWithBias.ref.hxx
diff --git a/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx
index 5ad9383..2846496 100644
--- a/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx
+++ b/core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx
@@ -164,6 +164,15 @@
 #include "ConvWithAsymmetricPadding_FromONNX_GPU_ALPAKA.hxx"
 #include "input_models/references/ConvWithAsymmetricPadding.ref.hxx"
 
+#include "ConvWithBias_FromONNX_GPU_ALPAKA.hxx"
+#include "input_models/references/ConvWithBias.ref.hxx"
+
+#include "Conv1d_FromONNX_GPU_ALPAKA.hxx"
+#include "input_models/references/Conv1d.ref.hxx"
+
+#include "Conv3d_FromONNX_GPU_ALPAKA.hxx"
+#include "input_models/references/Conv3d.ref.hxx"
+
 #include "BatchNorm_FromONNX_GPU_ALPAKA.hxx"
 #include "BatchNormRelu_FromONNX_GPU_ALPAKA.hxx"
 
@@ -2442,6 +2451,121 @@ TEST_F(SofieAlpakaTest, ConvWithAsymmetricPadding)
    }
 }
 
+// Exercises the Conv GPU bias path (BiasBroadcastKernel + GEMM beta=1). The existing
+// ConvWith* models have no bias input, so the bias kernel was never run. Two output
+// channels with distinct biases (10, 100) make a wrong-channel broadcast visible.
+TEST_F(SofieAlpakaTest, ConvWithBias)
+{
+   constexpr float TOLERANCE = DEFAULT_TOLERANCE;
+
+   // x[1,1,5,5] = iota 0..24
+   std::vector<float> input(25);
+   std::iota(input.begin(), input.end(), 0.0f);
+
+   auto input_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{input.size()}));
+   float* input_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(input_h));
+   for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i];
+
+   auto input_d = alpaka::allocBuf<float, Idx>(device, Ext1D::all(Idx{input.size()}));
+   alpaka::memcpy(queue, input_d, input_h);
+   alpaka::wait(queue);
+
+   auto result_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{sizeof(ConvWithBias_ExpectedOutput::all_ones) / sizeof(float)}));
+
+   {
+        SOFIE_ConvWithBias::Session<alpaka::TagGpuCudaRt> session("ConvWithBias_FromONNX_GPU_ALPAKA.dat");
+        auto result = session.infer(input_d);
+        alpaka::wait(queue);
+        cudaDeviceSynchronize();
+        alpaka::memcpy(queue, result_h, result);
+        alpaka::wait(queue);
+   }
+
+   float* res_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(result_h));
+   float *correct = ConvWithBias_ExpectedOutput::all_ones;
+   constexpr size_t nOut_bias = sizeof(ConvWithBias_ExpectedOutput::all_ones) / sizeof(float);
+
+   for (size_t i = 0; i < nOut_bias; ++i) {
+      EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i;
+   }
+}
+
+// Exercises the Conv GPU 1D path (fDim==1 branches in weight-vec/im2col). All other
+// alpaka Conv models are 2D. x[1,2,7], W[3,2,3] iota weights, bias [1,2,3], pads [1,1].
+TEST_F(SofieAlpakaTest, Conv1d)
+{
+   constexpr float TOLERANCE = DEFAULT_TOLERANCE;
+
+   // x[1,2,7] = iota 0..13
+   std::vector<float> input(14);
+   std::iota(input.begin(), input.end(), 0.0f);
+
+   auto input_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{input.size()}));
+   float* input_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(input_h));
+   for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i];
+
+   auto input_d = alpaka::allocBuf<float, Idx>(device, Ext1D::all(Idx{input.size()}));
+   alpaka::memcpy(queue, input_d, input_h);
+   alpaka::wait(queue);
+
+   auto result_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{sizeof(Conv1d_ExpectedOutput::all_ones) / sizeof(float)}));
+
+   {
+        SOFIE_Conv1d::Session<alpaka::TagGpuCudaRt> session("Conv1d_FromONNX_GPU_ALPAKA.dat");
+        auto result = session.infer(input_d);
+        alpaka::wait(queue);
+        cudaDeviceSynchronize();
+        alpaka::memcpy(queue, result_h, result);
+        alpaka::wait(queue);
+   }
+
+   float* res_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(result_h));
+   float *correct = Conv1d_ExpectedOutput::all_ones;
+   constexpr size_t nOut_conv1d = sizeof(Conv1d_ExpectedOutput::all_ones) / sizeof(float);
+
+   for (size_t i = 0; i < nOut_conv1d; ++i) {
+      EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i;
+   }
+}
+
+// Exercises the Conv GPU 3D path (fDim>2 depth branches + depth handling in im2col).
+// x[1,1,3,4,4], W[2,1,2,3,3] iota weights, bias [5,50], kernel (2,3,3), pads h/w by 1.
+TEST_F(SofieAlpakaTest, Conv3d)
+{
+   constexpr float TOLERANCE = DEFAULT_TOLERANCE;
+
+   // x[1,1,3,4,4] = iota 0..47
+   std::vector<float> input(48);
+   std::iota(input.begin(), input.end(), 0.0f);
+
+   auto input_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{input.size()}));
+   float* input_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(input_h));
+   for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i];
+
+   auto input_d = alpaka::allocBuf<float, Idx>(device, Ext1D::all(Idx{input.size()}));
+   alpaka::memcpy(queue, input_d, input_h);
+   alpaka::wait(queue);
+
+   auto result_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{sizeof(Conv3d_ExpectedOutput::all_ones) / sizeof(float)}));
+
+   {
+        SOFIE_Conv3d::Session<alpaka::TagGpuCudaRt> session("Conv3d_FromONNX_GPU_ALPAKA.dat");
+        auto result = session.infer(input_d);
+        alpaka::wait(queue);
+        cudaDeviceSynchronize();
+        alpaka::memcpy(queue, result_h, result);
+        alpaka::wait(queue);
+   }
+
+   float* res_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(result_h));
+   float *correct = Conv3d_ExpectedOutput::all_ones;
+   constexpr size_t nOut_conv3d = sizeof(Conv3d_ExpectedOutput::all_ones) / sizeof(float);
+
+   for (size_t i = 0; i < nOut_conv3d; ++i) {
+      EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i;
+   }
+}
+
 TEST_F(SofieAlpakaTest, BatchNormalization)
 {
     constexpr float TOLERANCE = DEFAULT_TOLERANCE;
diff --git a/core/test/input_models/Conv1d.onnx b/core/test/input_models/Conv1d.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a95292a6cd43b5275761e2b58fba9a30ea7f7ca6
GIT binary patch
literal 308
zcmYk%&uYRj90&0HnqtC_>Pp7gb$IN_U3XdRK*ZB7qu|B4wV)_9v}LTf9({(Ld=wwW
zNAa6Yh9Mt9e*Z|cGJV%Jt6jOMKdZHCo_+$JFnu#+OsS!E7{2VvujH1aO;MG_c2TdE
zuCVezD(Q3RmU(T-lD`gE)lId|3q)`?J>U75CvRHnK+Bj?%4SpXySir_78^P^z!6R$
z#zMsK03)#Q3h&^cgZ((U`x}|gB82|O&F}GgV>s4iA`Ub`0Lmw_tAxBH<ow3}lv$%A
UL{NU9vc0PGBYd?=Ll*Ji47uSii~s-t

literal 0
HcmV?d00001

diff --git a/core/test/input_models/Conv3d.onnx b/core/test/input_models/Conv3d.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..f70848374377a262a31faf2902142d26e0da0e85
GIT binary patch
literal 422
zcmYk&Axy(S6b9gXy@p*b1Ih&g!9b8zBsdJmVaODUCNpEuCR-p~(>8><N(5pO*`Sa}
z<RlV_oJ1m#{I5)`*Y95P^4`ncsg7CAs&X_*2gOZ3&dTE_sV_A>Xo@sBL)C$QHc9Wp
zZQZ?1@-(>{6r*95SkFo;8|s8x+Ruh@VTC2EcIULwoubUgaZ<E-Le8x4q@y@|A>}D1
zl?=#H)SHrMzxo8h;)fa<n86&xsSp9|!y#C>fGfzLhDVsfGc@oDZ}1K?_<&FNf;oJ{
z5B!2~LinJd>%tA?@!4EY{U77p_4n{DW>@bhGMgGVSdfT5<AP|Zz*`h><RaN$=Qq__
YVFGt}*XXF0I(Q^kvv#EOwFuV!0FvfHdjJ3c

literal 0
HcmV?d00001

diff --git a/core/test/input_models/ConvWithBias.onnx b/core/test/input_models/ConvWithBias.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..181f722722b9c5f0304016200e4ba679cdaba161
GIT binary patch
literal 314
zcmb_XOA5j;5X~el4uhW>)QuZ)nIni`jT`X-0XI^$(4w@ZEn;^brN{ADChEci$S^#Z
z_n8^&$X?OZNuC|5Q<)a^qUE$CxDyxxEuoovn>@RzF?BC-nZ?&bl|)4xicqxBQA6^I
z=v;;HDLax~r#ESx=rpR7L5_d|MF1MiZp+wX^^aCV<StV0kg?gCv8Uco?cH)hBLo?c
qUHfg{qVGuaL-H>vEQ<ZW{Yr$^>Dd84T6OcIc2Umpj?x~np#K6o?lI~B

literal 0
HcmV?d00001

diff --git a/core/test/input_models/references/Conv1d.ref.hxx b/core/test/input_models/references/Conv1d.ref.hxx
new file mode 100644
index 0000000..18e3ed0
--- /dev/null
+++ b/core/test/input_models/references/Conv1d.ref.hxx
@@ -0,0 +1,3 @@
+namespace Conv1d_ExpectedOutput {
+float all_ones[] = {71.0, 104.0, 119.0, 134.0, 149.0, 164.0, 95.0, 168.0, 267.0, 318.0, 369.0, 420.0, 471.0, 312.0, 265.0, 430.0, 517.0, 604.0, 691.0, 778.0, 529.0};
+} // namespace Conv1d_ExpectedOutput
diff --git a/core/test/input_models/references/Conv3d.ref.hxx b/core/test/input_models/references/Conv3d.ref.hxx
new file mode 100644
index 0000000..95559ed
--- /dev/null
+++ b/core/test/input_models/references/Conv3d.ref.hxx
@@ -0,0 +1,3 @@
+namespace Conv3d_ExpectedOutput {
+float all_ones[] = {1201.0, 1801.0, 1921.0, 1269.0, 1886.0, 2798.0, 2951.0, 1928.0, 2318.0, 3410.0, 3563.0, 2312.0, 1429.0, 2077.0, 2161.0, 1385.0, 2545.0, 3721.0, 3841.0, 2485.0, 3614.0, 5246.0, 5399.0, 3464.0, 4046.0, 5858.0, 6011.0, 3848.0, 2389.0, 3421.0, 3505.0, 2217.0, 2758.0, 4222.0, 4558.0, 3114.0, 4631.0, 7055.0, 7532.0, 5105.0, 5927.0, 8963.0, 9440.0, 6353.0, 4138.0, 6226.0, 6526.0, 4382.0, 6406.0, 9598.0, 9934.0, 6634.0, 9815.0, 14687.0, 15164.0, 10097.0, 11111.0, 16595.0, 17072.0, 11345.0, 7402.0, 11026.0, 11326.0, 7518.0};
+} // namespace Conv3d_ExpectedOutput
diff --git a/core/test/input_models/references/ConvWithBias.ref.hxx b/core/test/input_models/references/ConvWithBias.ref.hxx
new file mode 100644
index 0000000..581cac4
--- /dev/null
+++ b/core/test/input_models/references/ConvWithBias.ref.hxx
@@ -0,0 +1,3 @@
+namespace ConvWithBias_ExpectedOutput {
+float all_ones[] = {22.0, 31.0, 37.0, 43.0, 34.0, 43.0, 64.0, 73.0, 82.0, 61.0, 73.0, 109.0, 118.0, 127.0, 91.0, 103.0, 154.0, 163.0, 172.0, 121.0, 82.0, 121.0, 127.0, 133.0, 94.0, 112.0, 121.0, 127.0, 133.0, 124.0, 133.0, 154.0, 163.0, 172.0, 151.0, 163.0, 199.0, 208.0, 217.0, 181.0, 193.0, 244.0, 253.0, 262.0, 211.0, 172.0, 211.0, 217.0, 223.0, 184.0};
+} // namespace ConvWithBias_ExpectedOutput