From 4fdc50615923c01828b2bb9c1c3a0770a388c2cc Mon Sep 17 00:00:00 2001 From: Harsh Chauhan Date: Wed, 13 May 2026 01:43:06 +0530 Subject: [PATCH 1/2] added GTests for Conv GPU batch>1 inference --- .../test/ConvBatchModelGenerator.py | 63 ++++++++++ .../TestCustomModelsFromONNXForAlpakaCuda.cxx | 111 ++++++++++++++++++ .../test/input_models/ConvBatch2.onnx | Bin 0 -> 193 bytes .../test/input_models/ConvBatch4.onnx | Bin 0 -> 193 bytes .../test/input_models/ConvBatch8.onnx | Bin 0 -> 193 bytes .../references/ConvBatch2.ref.hxx | 3 + .../references/ConvBatch4.ref.hxx | 3 + .../references/ConvBatch8.ref.hxx | 3 + 8 files changed, 183 insertions(+) create mode 100755 src/SOFIE_core/test/ConvBatchModelGenerator.py mode change 100644 => 100755 src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx create mode 100755 src/SOFIE_core/test/input_models/ConvBatch2.onnx create mode 100755 src/SOFIE_core/test/input_models/ConvBatch4.onnx create mode 100755 src/SOFIE_core/test/input_models/ConvBatch8.onnx create mode 100755 src/SOFIE_core/test/input_models/references/ConvBatch2.ref.hxx create mode 100755 src/SOFIE_core/test/input_models/references/ConvBatch4.ref.hxx create mode 100755 src/SOFIE_core/test/input_models/references/ConvBatch8.ref.hxx diff --git a/src/SOFIE_core/test/ConvBatchModelGenerator.py b/src/SOFIE_core/test/ConvBatchModelGenerator.py new file mode 100755 index 0000000..bdea620 --- /dev/null +++ b/src/SOFIE_core/test/ConvBatchModelGenerator.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +Generate batched Conv ONNX models for GPU batch > 1 testing. + +Same architecture as ConvWithPadding: 1in -> 1out, 3x3 all-ones kernel, +pad=1, no bias, 5x5 spatial. Batch dimension varies. + +Usage: python3 ConvBatchModelGenerator.py +""" + +import os +import numpy as np +import torch +import torch.nn.functional as F +import onnx +from onnx import numpy_helper, TensorProto, helper + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def make_model(batch_size): + name = f"ConvBatch{batch_size}" + + W = np.ones((1, 1, 3, 3), dtype=np.float32) + W_init = numpy_helper.from_array(W, name="W") + + X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [batch_size, 1, 5, 5]) + Y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [batch_size, 1, 5, 5]) + + node = helper.make_node("Conv", inputs=["x", "W"], outputs=["y"], + kernel_shape=[3, 3], pads=[1, 1, 1, 1]) + + graph = helper.make_graph([node], name, [X], [Y], [W_init]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 12)]) + model.ir_version = 7 + onnx.checker.check_model(model) + + onnx_path = os.path.join(SCRIPT_DIR, "input_models", f"{name}.onnx") + onnx.save(model, onnx_path) + print(f"saved {name}.onnx input=[{batch_size}, 1, 5, 5]") + + x = torch.arange(batch_size * 25, dtype=torch.float32).reshape(batch_size, 1, 5, 5) + W_t = torch.ones(1, 1, 3, 3) + with torch.no_grad(): + y = F.conv2d(x, W_t, padding=1) + + y_flat = y.numpy().flatten() + vals = ", ".join(f"{v:.6f}f" for v in y_flat) + ref = (f"namespace {name}_ExpectedOutput {{\n" + f"float correct[] = {{{vals}}};\n" + f"}} // namespace {name}_ExpectedOutput\n") + + ref_path = os.path.join(SCRIPT_DIR, "input_models", "references", f"{name}.ref.hxx") + with open(ref_path, "w") as f: + f.write(ref) + print(f"saved {name}.ref.hxx ({len(y_flat)} values)") + print(f"input: {x.numpy().flatten()}") + print(f"output: {y_flat}\n") + + +if __name__ == "__main__": + for b in [2, 4, 8]: + make_model(b) diff --git a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx old mode 100644 new mode 100755 index 9e696c6..2876508 --- a/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx +++ b/src/SOFIE_core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx @@ -118,6 +118,15 @@ #include "BatchNorm_FromONNX_GPU_ALPAKA.hxx" #include "BatchNormRelu_FromONNX_GPU_ALPAKA.hxx" +#include "ConvBatch2_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvBatch2.ref.hxx" + +#include "ConvBatch4_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvBatch4.ref.hxx" + +#include "ConvBatch8_FromONNX_GPU_ALPAKA.hxx" +#include "input_models/references/ConvBatch8.ref.hxx" + #include #include #include @@ -2264,3 +2273,105 @@ TEST_F(SofieAlpakaTest, BatchNormalizationRelu) EXPECT_LE(std::abs(res_ptr[i] - expected), TOLERANCE) << "i=" << i; } } + +TEST_F(SofieAlpakaTest, ConvBatch2) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input(50); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvBatch2_ExpectedOutput::correct) / sizeof(float)})); + + { + SOFIE_ConvBatch2::Session session("ConvBatch2_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ConvBatch2_ExpectedOutput::correct; + constexpr size_t nOut_batch2 = sizeof(ConvBatch2_ExpectedOutput::correct) / sizeof(float); + + for (size_t i = 0; i < nOut_batch2; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ConvBatch4) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input(100); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvBatch4_ExpectedOutput::correct) / sizeof(float)})); + + { + SOFIE_ConvBatch4::Session session("ConvBatch4_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ConvBatch4_ExpectedOutput::correct; + constexpr size_t nOut_batch4 = sizeof(ConvBatch4_ExpectedOutput::correct) / sizeof(float); + + for (size_t i = 0; i < nOut_batch4; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} + +TEST_F(SofieAlpakaTest, ConvBatch8) +{ + constexpr float TOLERANCE = DEFAULT_TOLERANCE; + + std::vector input(200); + std::iota(input.begin(), input.end(), 0.0f); + + auto input_h = alpaka::allocBuf(host, Ext1D::all(Idx{input.size()})); + float* input_ptr = reinterpret_cast(alpaka::getPtrNative(input_h)); + for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i]; + + auto input_d = alpaka::allocBuf(device, Ext1D::all(Idx{input.size()})); + alpaka::memcpy(queue, input_d, input_h); + alpaka::wait(queue); + + auto result_h = alpaka::allocBuf(host, Ext1D::all(Idx{sizeof(ConvBatch8_ExpectedOutput::correct) / sizeof(float)})); + + { + SOFIE_ConvBatch8::Session session("ConvBatch8_FromONNX_GPU_ALPAKA.dat"); + auto result = session.infer(input_d); + alpaka::wait(queue); + cudaDeviceSynchronize(); + alpaka::memcpy(queue, result_h, result); + alpaka::wait(queue); + } + + float* res_ptr = reinterpret_cast(alpaka::getPtrNative(result_h)); + float* correct = ConvBatch8_ExpectedOutput::correct; + constexpr size_t nOut_batch8 = sizeof(ConvBatch8_ExpectedOutput::correct) / sizeof(float); + + for (size_t i = 0; i < nOut_batch8; ++i) + EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i; +} diff --git a/src/SOFIE_core/test/input_models/ConvBatch2.onnx b/src/SOFIE_core/test/input_models/ConvBatch2.onnx new file mode 100755 index 0000000000000000000000000000000000000000..47a75c66b22924bcabc05ac9e7eb9f04c0c5ab14 GIT binary patch literal 193 zcmd;Jx7xcH&4ynvBiOOT7D zATg!bfe`|MLPA_1^-hT;$r(mk#vF`5$jrek!05yn?xn)O&|pspM@a+i782tU;b0UJ b;9}xn0%ArWW(DCSpkyU3NhcOA1_2%bUT+?r literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/ConvBatch4.onnx b/src/SOFIE_core/test/input_models/ConvBatch4.onnx new file mode 100755 index 0000000000000000000000000000000000000000..4148f2ec786f943d7bb52c380ff4ac9c415fc7cb GIT binary patch literal 193 zcmd;Jx7xcH&4ynvBiOOT7D zATg!bfe`|MLPA_1^-hT;$r&bE#vF`5$jrek!05yn?xn)O&|pspM@a+i782tU;b0UJ b;9}xn0b)iVW(DCSpkyU3NhcOA1_2%bU*R64 literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/ConvBatch8.onnx b/src/SOFIE_core/test/input_models/ConvBatch8.onnx new file mode 100755 index 0000000000000000000000000000000000000000..57cc9fe712a106bbfb1b006f9c70be648d60c14f GIT binary patch literal 193 zcmd;Jx7xcH&4ynvBiOOT7D zATg!bfe`|MLPA_1^-hT;$r%<}#vF`5$jrek!05yn?xn)O&|pspM@a+i782tU;b0UJ b;9}z70AfZUW(DCSpkyU3NhcOA1_2%bV#OY? literal 0 HcmV?d00001 diff --git a/src/SOFIE_core/test/input_models/references/ConvBatch2.ref.hxx b/src/SOFIE_core/test/input_models/references/ConvBatch2.ref.hxx new file mode 100755 index 0000000..99f3c54 --- /dev/null +++ b/src/SOFIE_core/test/input_models/references/ConvBatch2.ref.hxx @@ -0,0 +1,3 @@ +namespace ConvBatch2_ExpectedOutput { +float correct[] = {12.000000f, 21.000000f, 27.000000f, 33.000000f, 24.000000f, 33.000000f, 54.000000f, 63.000000f, 72.000000f, 51.000000f, 63.000000f, 99.000000f, 108.000000f, 117.000000f, 81.000000f, 93.000000f, 144.000000f, 153.000000f, 162.000000f, 111.000000f, 72.000000f, 111.000000f, 117.000000f, 123.000000f, 84.000000f, 112.000000f, 171.000000f, 177.000000f, 183.000000f, 124.000000f, 183.000000f, 279.000000f, 288.000000f, 297.000000f, 201.000000f, 213.000000f, 324.000000f, 333.000000f, 342.000000f, 231.000000f, 243.000000f, 369.000000f, 378.000000f, 387.000000f, 261.000000f, 172.000000f, 261.000000f, 267.000000f, 273.000000f, 184.000000f}; +} // namespace ConvBatch2_ExpectedOutput diff --git a/src/SOFIE_core/test/input_models/references/ConvBatch4.ref.hxx b/src/SOFIE_core/test/input_models/references/ConvBatch4.ref.hxx new file mode 100755 index 0000000..2f87aff --- /dev/null +++ b/src/SOFIE_core/test/input_models/references/ConvBatch4.ref.hxx @@ -0,0 +1,3 @@ +namespace ConvBatch4_ExpectedOutput { +float correct[] = {12.000000f, 21.000000f, 27.000000f, 33.000000f, 24.000000f, 33.000000f, 54.000000f, 63.000000f, 72.000000f, 51.000000f, 63.000000f, 99.000000f, 108.000000f, 117.000000f, 81.000000f, 93.000000f, 144.000000f, 153.000000f, 162.000000f, 111.000000f, 72.000000f, 111.000000f, 117.000000f, 123.000000f, 84.000000f, 112.000000f, 171.000000f, 177.000000f, 183.000000f, 124.000000f, 183.000000f, 279.000000f, 288.000000f, 297.000000f, 201.000000f, 213.000000f, 324.000000f, 333.000000f, 342.000000f, 231.000000f, 243.000000f, 369.000000f, 378.000000f, 387.000000f, 261.000000f, 172.000000f, 261.000000f, 267.000000f, 273.000000f, 184.000000f, 212.000000f, 321.000000f, 327.000000f, 333.000000f, 224.000000f, 333.000000f, 504.000000f, 513.000000f, 522.000000f, 351.000000f, 363.000000f, 549.000000f, 558.000000f, 567.000000f, 381.000000f, 393.000000f, 594.000000f, 603.000000f, 612.000000f, 411.000000f, 272.000000f, 411.000000f, 417.000000f, 423.000000f, 284.000000f, 312.000000f, 471.000000f, 477.000000f, 483.000000f, 324.000000f, 483.000000f, 729.000000f, 738.000000f, 747.000000f, 501.000000f, 513.000000f, 774.000000f, 783.000000f, 792.000000f, 531.000000f, 543.000000f, 819.000000f, 828.000000f, 837.000000f, 561.000000f, 372.000000f, 561.000000f, 567.000000f, 573.000000f, 384.000000f}; +} // namespace ConvBatch4_ExpectedOutput diff --git a/src/SOFIE_core/test/input_models/references/ConvBatch8.ref.hxx b/src/SOFIE_core/test/input_models/references/ConvBatch8.ref.hxx new file mode 100755 index 0000000..9238292 --- /dev/null +++ b/src/SOFIE_core/test/input_models/references/ConvBatch8.ref.hxx @@ -0,0 +1,3 @@ +namespace ConvBatch8_ExpectedOutput { +float correct[] = {12.000000f, 21.000000f, 27.000000f, 33.000000f, 24.000000f, 33.000000f, 54.000000f, 63.000000f, 72.000000f, 51.000000f, 63.000000f, 99.000000f, 108.000000f, 117.000000f, 81.000000f, 93.000000f, 144.000000f, 153.000000f, 162.000000f, 111.000000f, 72.000000f, 111.000000f, 117.000000f, 123.000000f, 84.000000f, 112.000000f, 171.000000f, 177.000000f, 183.000000f, 124.000000f, 183.000000f, 279.000000f, 288.000000f, 297.000000f, 201.000000f, 213.000000f, 324.000000f, 333.000000f, 342.000000f, 231.000000f, 243.000000f, 369.000000f, 378.000000f, 387.000000f, 261.000000f, 172.000000f, 261.000000f, 267.000000f, 273.000000f, 184.000000f, 212.000000f, 321.000000f, 327.000000f, 333.000000f, 224.000000f, 333.000000f, 504.000000f, 513.000000f, 522.000000f, 351.000000f, 363.000000f, 549.000000f, 558.000000f, 567.000000f, 381.000000f, 393.000000f, 594.000000f, 603.000000f, 612.000000f, 411.000000f, 272.000000f, 411.000000f, 417.000000f, 423.000000f, 284.000000f, 312.000000f, 471.000000f, 477.000000f, 483.000000f, 324.000000f, 483.000000f, 729.000000f, 738.000000f, 747.000000f, 501.000000f, 513.000000f, 774.000000f, 783.000000f, 792.000000f, 531.000000f, 543.000000f, 819.000000f, 828.000000f, 837.000000f, 561.000000f, 372.000000f, 561.000000f, 567.000000f, 573.000000f, 384.000000f, 412.000000f, 621.000000f, 627.000000f, 633.000000f, 424.000000f, 633.000000f, 954.000000f, 963.000000f, 972.000000f, 651.000000f, 663.000000f, 999.000000f, 1008.000000f, 1017.000000f, 681.000000f, 693.000000f, 1044.000000f, 1053.000000f, 1062.000000f, 711.000000f, 472.000000f, 711.000000f, 717.000000f, 723.000000f, 484.000000f, 512.000000f, 771.000000f, 777.000000f, 783.000000f, 524.000000f, 783.000000f, 1179.000000f, 1188.000000f, 1197.000000f, 801.000000f, 813.000000f, 1224.000000f, 1233.000000f, 1242.000000f, 831.000000f, 843.000000f, 1269.000000f, 1278.000000f, 1287.000000f, 861.000000f, 572.000000f, 861.000000f, 867.000000f, 873.000000f, 584.000000f, 612.000000f, 921.000000f, 927.000000f, 933.000000f, 624.000000f, 933.000000f, 1404.000000f, 1413.000000f, 1422.000000f, 951.000000f, 963.000000f, 1449.000000f, 1458.000000f, 1467.000000f, 981.000000f, 993.000000f, 1494.000000f, 1503.000000f, 1512.000000f, 1011.000000f, 672.000000f, 1011.000000f, 1017.000000f, 1023.000000f, 684.000000f, 712.000000f, 1071.000000f, 1077.000000f, 1083.000000f, 724.000000f, 1083.000000f, 1629.000000f, 1638.000000f, 1647.000000f, 1101.000000f, 1113.000000f, 1674.000000f, 1683.000000f, 1692.000000f, 1131.000000f, 1143.000000f, 1719.000000f, 1728.000000f, 1737.000000f, 1161.000000f, 772.000000f, 1161.000000f, 1167.000000f, 1173.000000f, 784.000000f}; +} // namespace ConvBatch8_ExpectedOutput From fc4b9624f7024c70cf21454bc2b141cd7554935b Mon Sep 17 00:00:00 2001 From: Harsh Chauhan Date: Wed, 13 May 2026 02:18:48 +0530 Subject: [PATCH 2/2] removed generator script --- .../test/ConvBatchModelGenerator.py | 63 ------------------- 1 file changed, 63 deletions(-) delete mode 100755 src/SOFIE_core/test/ConvBatchModelGenerator.py diff --git a/src/SOFIE_core/test/ConvBatchModelGenerator.py b/src/SOFIE_core/test/ConvBatchModelGenerator.py deleted file mode 100755 index bdea620..0000000 --- a/src/SOFIE_core/test/ConvBatchModelGenerator.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -""" -Generate batched Conv ONNX models for GPU batch > 1 testing. - -Same architecture as ConvWithPadding: 1in -> 1out, 3x3 all-ones kernel, -pad=1, no bias, 5x5 spatial. Batch dimension varies. - -Usage: python3 ConvBatchModelGenerator.py -""" - -import os -import numpy as np -import torch -import torch.nn.functional as F -import onnx -from onnx import numpy_helper, TensorProto, helper - -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) - - -def make_model(batch_size): - name = f"ConvBatch{batch_size}" - - W = np.ones((1, 1, 3, 3), dtype=np.float32) - W_init = numpy_helper.from_array(W, name="W") - - X = helper.make_tensor_value_info("x", TensorProto.FLOAT, [batch_size, 1, 5, 5]) - Y = helper.make_tensor_value_info("y", TensorProto.FLOAT, [batch_size, 1, 5, 5]) - - node = helper.make_node("Conv", inputs=["x", "W"], outputs=["y"], - kernel_shape=[3, 3], pads=[1, 1, 1, 1]) - - graph = helper.make_graph([node], name, [X], [Y], [W_init]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 12)]) - model.ir_version = 7 - onnx.checker.check_model(model) - - onnx_path = os.path.join(SCRIPT_DIR, "input_models", f"{name}.onnx") - onnx.save(model, onnx_path) - print(f"saved {name}.onnx input=[{batch_size}, 1, 5, 5]") - - x = torch.arange(batch_size * 25, dtype=torch.float32).reshape(batch_size, 1, 5, 5) - W_t = torch.ones(1, 1, 3, 3) - with torch.no_grad(): - y = F.conv2d(x, W_t, padding=1) - - y_flat = y.numpy().flatten() - vals = ", ".join(f"{v:.6f}f" for v in y_flat) - ref = (f"namespace {name}_ExpectedOutput {{\n" - f"float correct[] = {{{vals}}};\n" - f"}} // namespace {name}_ExpectedOutput\n") - - ref_path = os.path.join(SCRIPT_DIR, "input_models", "references", f"{name}.ref.hxx") - with open(ref_path, "w") as f: - f.write(ref) - print(f"saved {name}.ref.hxx ({len(y_flat)} values)") - print(f"input: {x.numpy().flatten()}") - print(f"output: {y_flat}\n") - - -if __name__ == "__main__": - for b in [2, 4, 8]: - make_model(b)