Skip to content
Open
93 changes: 91 additions & 2 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3839,7 +3839,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
FindAllSubgraphs(all_subgraphs);

const auto& model_path = GetModel().ModelPath();
std::unordered_set<PathString> validated_external_data_paths;
InlinedHashSet<PathString> validated_external_data_paths;

auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status {
// if we have any initializers that are not in memory, put them there.
Expand Down Expand Up @@ -3903,7 +3903,96 @@ Status Graph::ConvertInitializersIntoOrtValues() {
return Status::OK();
};

return ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func);
ORT_RETURN_IF_ERROR(ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func));

// Validate and inline external data in node tensor attributes.
// In-memory references are rejected (no legitimate source creates them for attributes).
// File-based external data paths are validated, read from disk, and inlined as raw_data
// so all EPs (including plugins) can access attribute data uniformly.
auto inline_external_attr_tensors_func = [&](Graph& graph) -> Status {
for (auto& node : graph.Nodes()) {
for (auto& [attr_name, attr_proto] : node.GetMutableAttributes()) {
if (utils::HasTensor(attr_proto)) {
auto* tensor_proto = attr_proto.mutable_t();
if (utils::HasExternalData(*tensor_proto)) {
ORT_RETURN_IF(utils::HasExternalDataInMemory(*tensor_proto),
"Node '", node.Name(), "' attribute '", attr_name,
"' contains an in-memory external data reference, which is not permitted ",
"for node attributes.");

std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
ORT_RETURN_IF_ERROR(
onnxruntime::ExternalDataInfo::Create(tensor_proto->external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();

if (validated_external_data_paths.count(location) == 0) {
auto path_status = utils::ValidateExternalDataPath(model_path, location);
if (!path_status.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Node '", node.Name(), "' attribute '", attr_name,
"': ", path_status.ErrorMessage());
}
validated_external_data_paths.insert(location);
}

// Read external data and inline it into the TensorProto.
std::vector<uint8_t> buffer;
{
auto unpack_status = utils::UnpackInitializerData(*tensor_proto, model_path, buffer);
if (!unpack_status.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"Node '", node.Name(), "' attribute '", attr_name,
"': ", unpack_status.ErrorMessage());
}
}
tensor_proto->clear_external_data();
tensor_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT);
utils::SetRawDataInTensorProto(*tensor_proto, buffer.data(), buffer.size());
}
} else if (utils::HasTensors(attr_proto)) {
for (auto& tensor_proto : *attr_proto.mutable_tensors()) {
if (utils::HasExternalData(tensor_proto)) {
ORT_RETURN_IF(utils::HasExternalDataInMemory(tensor_proto),
"Node '", node.Name(), "' attribute '", attr_name,
"' contains an in-memory external data reference, which is not permitted ",
"for node attributes.");

std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
ORT_RETURN_IF_ERROR(
onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();

if (validated_external_data_paths.count(location) == 0) {
auto path_status = utils::ValidateExternalDataPath(model_path, location);
if (!path_status.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Node '", node.Name(), "' attribute '", attr_name,
"': ", path_status.ErrorMessage());
}
validated_external_data_paths.insert(location);
}

std::vector<uint8_t> buffer;
{
auto unpack_status = utils::UnpackInitializerData(tensor_proto, model_path, buffer);
if (!unpack_status.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"Node '", node.Name(), "' attribute '", attr_name,
"': ", unpack_status.ErrorMessage());
}
}
tensor_proto.clear_external_data();
tensor_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT);
utils::SetRawDataInTensorProto(tensor_proto, buffer.data(), buffer.size());
}
}
}
}
}
return Status::OK();
};

return ForThisAndAllSubgraphs(all_subgraphs, inline_external_attr_tensors_func);
}

void Graph::SetName(const std::string& name) {
Expand Down
173 changes: 173 additions & 0 deletions onnxruntime/test/providers/cpu/ml/label_encoder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "gtest/gtest.h"
#include "core/framework/tensorprotoutils.h"
#include "test/providers/provider_test_utils.h"
#include "test/util/include/file_util.h"
#include "core/platform/path_lib.h"

namespace onnxruntime {
namespace test {
Expand Down Expand Up @@ -756,5 +758,176 @@ TEST(LabelEncoder, EmptyInputOpset4) {
test.Run();
}

// External data in tensor attributes: file-based external data is validated and inlined
// during session initialization. In-memory references are rejected by the ONNX checker
// during Graph::Resolve() (it validates that external data locations are regular files).
// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run.
#if !defined(ORT_NO_EXCEPTIONS)

// RAII helper that creates a unique dummy binary file and removes it on destruction.
static std::pair<std::string, ScopedFileDeleter> CreateExternalDataFile(const void* data, size_t num_bytes) {
PathString filename(ORT_TSTR("ext_data_XXXXXX"));
FILE* fp = nullptr;
CreateTestFile(fp, filename);
size_t written = fwrite(data, 1, num_bytes, fp);
ORT_ENFORCE(written == num_bytes, "Failed to write external data file");
ORT_ENFORCE(fclose(fp) == 0, "Failed to close external data file");
return {ToUTF8String(filename), ScopedFileDeleter(filename)};
}

// Helper: create a TensorProto that references external data in the given file.
static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::string& name,
const std::string& filename,
int64_t num_elements) {
ONNX_NAMESPACE::TensorProto proto;
proto.set_name(name);
proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
proto.add_dims(num_elements);
proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* loc = proto.add_external_data();
loc->set_key("location");
loc->set_value(filename);
auto* offset = proto.add_external_data();
offset->set_key("offset");
offset->set_value("0");
auto* length = proto.add_external_data();
length->set_key("length");
length->set_value(std::to_string(num_elements * static_cast<int64_t>(sizeof(int64_t))));
return proto;
}

// Helper: create a TensorProto with in-memory external data reference (should be rejected).
static ONNX_NAMESPACE::TensorProto MakeInMemoryExternalTensorProto(const std::string& name,
int64_t num_elements) {
ONNX_NAMESPACE::TensorProto proto;
proto.set_name(name);
proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
proto.add_dims(num_elements);
proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
auto* loc = proto.add_external_data();
loc->set_key("location");
loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag));
auto* offset = proto.add_external_data();
offset->set_key("offset");
offset->set_value("12345678");
auto* length = proto.add_external_data();
length->set_key("length");
length->set_value(std::to_string(num_elements * static_cast<int64_t>(sizeof(int64_t))));
return proto;
}

// Valid external data in tensor attributes should be loaded and inlined during session initialization.
TEST(LabelEncoder, ExternalDataInKeysTensorOpset4) {
std::vector<int64_t> key_data{1, 2};
auto [ext_path, ext_deleter] = CreateExternalDataFile(key_data.data(), key_data.size() * sizeof(int64_t));

OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);
test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_path, 2));

ONNX_NAMESPACE::TensorProto values_proto;
values_proto.set_name("values_tensor");
values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
values_proto.add_dims(2);
values_proto.add_int64_data(10);
values_proto.add_int64_data(20);
test.AddAttribute("values_tensor", values_proto);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(42);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", {1, 3}, {1, 2, 99});
test.AddOutput<int64_t>("Y", {1, 3}, {10, 20, 42});

test.Run();
}

// In-memory external data references in node attributes are rejected during initialization.
TEST(LabelEncoder, RejectsInMemoryExternalDataInKeysTensorOpset4) {
OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);
test.AddAttribute("keys_tensor", MakeInMemoryExternalTensorProto("keys_tensor", 2));

ONNX_NAMESPACE::TensorProto values_proto;
values_proto.set_name("values_tensor");
values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
values_proto.add_dims(2);
values_proto.add_int64_data(10);
values_proto.add_int64_data(20);
test.AddAttribute("values_tensor", values_proto);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(0);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", {1, 2}, {1, 2});
test.AddOutput<int64_t>("Y", {1, 2}, {10, 20});

// Error originates from the ONNX checker (checker::check_node) during Graph::Resolve().
// There is no way to disable this check.
test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file");
}

#endif // !defined(ORT_NO_EXCEPTIONS)
Comment thread
yuslepukhin marked this conversation as resolved.

// Duplicate keys: emplace() keeps the first occurrence. Verify this behavior.
TEST(LabelEncoder, DuplicateKeysFirstWinsOpset4) {
std::vector<std::int64_t> dims{1, 3};

std::vector<int64_t> input{1, 2, 3};
// key 1 maps to 10 (first), not 99 (second duplicate)
std::vector<int64_t> output{10, 20, 42};
std::vector<int64_t> key_data{1, 2, 1}; // duplicate key 1
std::vector<int64_t> value_data{10, 20, 99};

OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

test.AddAttribute("keys_int64s", key_data);
test.AddAttribute("values_int64s", value_data);

ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(42);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", dims, input);
test.AddOutput<int64_t>("Y", dims, output);

test.Run();
}

// Singleton 1D default_tensor (dims=[1]) — the ONNX spec requires this shape
TEST(LabelEncoder, SingletonDefaultTensorOpset4) {
std::vector<std::int64_t> dims{1, 3};

std::vector<int64_t> input{1, 2, 99};
std::vector<int64_t> output{10, 20, -7};

OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain);

test.AddAttribute("keys_int64s", std::vector<int64_t>{1, 2});
test.AddAttribute("values_int64s", std::vector<int64_t>{10, 20});

// 1D singleton default_tensor with dims=[1]
ONNX_NAMESPACE::TensorProto default_proto;
default_proto.set_name("default_tensor");
default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
default_proto.add_dims(1);
default_proto.add_int64_data(-7);
test.AddAttribute("default_tensor", default_proto);

test.AddInput<int64_t>("X", dims, input);
test.AddOutput<int64_t>("Y", dims, output);

test.Run();
}

} // namespace test
} // namespace onnxruntime
Loading
Loading