Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion core/inc/SOFIE/ROperator.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ enum class OperatorKind {
UNARY_COS=22,
UNARY_ABS=23,
CLIP=24,
NOT=25
NOT=25,
SELU=26
};

inline const char* toString(OperatorKind kind) {
Expand Down
40 changes: 40 additions & 0 deletions core/inc/SOFIE/ROperator_Selu.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public:
fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){
fInputTensorNames = { fNX };
fOutputTensorNames = { fNY };
fKind = OperatorKind::SELU;
}

std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
Expand Down Expand Up @@ -59,6 +60,45 @@ public:
}

std::vector<std::string> GetStdLibs() override { return { std::string("cmath") };}

std::string Generate_GPU_Kernel_ALPAKA(std::string /*opName*/) override {
std::string op;
op = "\n//---- SELU_KERNEL_ALPAKA//\n";
op += "struct SeluKernel {\n";
op += SP + "template<typename TAcc, typename T>\n";
op += SP + "ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* __restrict__ data, T* __restrict__ out, std::size_t numElements) const {\n";
op += SP + SP + "const auto idx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc)[0];\n";
op += SP + SP + "if (idx < numElements) {\n";
op += SP + SP + SP + "T x = data[idx];\n";
op += SP + SP + SP + "T inner = T(1.6732632423543772848170429916717) * (exp(x) - T(1));\n";
op += SP + SP + SP + "out[idx] = T(1.0507009873554804934193349852946) * ((x > T(0) ? x : T(0)) + (inner < T(0) ? inner : T(0)));\n";
op += SP + SP + "}\n";
op += SP + "}\n";
op += "};\n";
return op;
}

std::string Generate_GPU_Kernel_Definitions_ALPAKA(std::string /*opName*/) override {
return SP + "SeluKernel seluKernel;\n";
}

std::string Generate_GPU_ALPAKA(std::string OpName) override {
OpName = "op_" + OpName;
if (fShape.empty()) {
throw std::runtime_error("SOFIE Selu called to Generate_GPU_ALPAKA without being initialized");
}
std::stringstream out;
std::string length = ConvertDimShapeToLength(fShape);
out << "\n//------ SELU_GPU_ALPAKA\n";
out << SP << "auto const elementsPerThread_" << fNX << " = Vec::all(static_cast<Idx>(1));\n";
out << SP << "auto const elementsPerGrid_" << fNX << " = Vec::all(Idx{" << length << "});\n";
out << SP << "auto const workDiv_" << fNX << " = sofie_workdiv(elementsPerGrid_" << fNX << ");\n";
out << SP << "auto task_" << OpName << " = alpaka::createTaskKernel<Acc>(workDiv_" << fNX
<< ", seluKernel, alpaka::getPtrNative(deviceBuf_" << fNX
<< "), alpaka::getPtrNative(deviceBuf_" << fNY << "), static_cast<Idx>(" << length << "));\n";
out << SP << "alpaka::enqueue(queue, task_" << OpName << ");\n";
return out.str();
}
};

}//SOFIE
Expand Down
3 changes: 2 additions & 1 deletion core/src/RModel_ALPAKA.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,8 @@ void RModel::GenerateSessionCode_GPU_ALPAKA() {
SOFIE::OperatorKind::UNARY_SIN,
SOFIE::OperatorKind::UNARY_COS,
SOFIE::OperatorKind::UNARY_ABS,
SOFIE::OperatorKind::NOT
SOFIE::OperatorKind::NOT,
SOFIE::OperatorKind::SELU
};

bool OpNeedsBlas = false;
Expand Down
37 changes: 37 additions & 0 deletions core/test/TestCustomModelsFromONNXForAlpakaCuda.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@
#include "Clip_FromONNX_GPU_ALPAKA.hxx"
#include "Not_FromONNX_GPU_ALPAKA.hxx"

#include "Selu_FromONNX_GPU_ALPAKA.hxx"
#include "input_models/references/Selu.ref.hxx"

#include "GNN_model_FromONNX_GPU_ALPAKA.hxx"

#include <alpaka/alpaka.hpp>
Expand Down Expand Up @@ -3161,3 +3164,37 @@ TEST_F(SofieAlpakaTest, Logic_BitwiseNot)
for (std::size_t i = 0; i < N; ++i)
EXPECT_EQ(res[i], ref[i]) << " index=" << i;
}

TEST_F(SofieAlpakaTest, Selu)
{
constexpr float TOLERANCE = DEFAULT_TOLERANCE;

// input spans negative + positive so the SELU negative branch is exercised
std::vector<float> input({1.0f, -2.0f, 3.0f, 0.5f, -1.0f, 2.0f});

auto input_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{input.size()}));
float* input_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(input_h));
for (Idx i = 0; i < input.size(); ++i) input_ptr[i] = input[i];

auto input_d = alpaka::allocBuf<float, Idx>(device, Ext1D::all(Idx{input.size()}));
alpaka::memcpy(queue, input_d, input_h);
alpaka::wait(queue);

constexpr size_t nOut = sizeof(Selu_ExpectedOutput::outputs) / sizeof(float);
auto result_h = alpaka::allocBuf<float, Idx>(host, Ext1D::all(Idx{nOut}));

{
SOFIE_Selu::Session<alpaka::TagGpuCudaRt> session;
auto result = session.infer(input_d);
alpaka::wait(queue);
cudaDeviceSynchronize();
alpaka::memcpy(queue, result_h, result);
alpaka::wait(queue);
}

float* res_ptr = reinterpret_cast<float*>(alpaka::getPtrNative(result_h));
float* correct = Selu_ExpectedOutput::outputs;
for (size_t i = 0; i < nOut; ++i) {
EXPECT_LE(std::abs(res_ptr[i] - correct[i]), TOLERANCE) << "i=" << i;
}
}
Binary file added core/test/input_models/Selu.onnx
Binary file not shown.
5 changes: 5 additions & 0 deletions core/test/input_models/references/Selu.ref.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Auto-generated SELU reference - DO NOT EDIT
#pragma once
namespace Selu_ExpectedOutput {
float outputs[] = {1.05070102f, -1.52016652f, 3.15210295f, 0.52535051f, -1.11133075f, 2.10140204f};
} // namespace Selu_ExpectedOutput