diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index fe2df6a87d124..4af88ca51fc1f 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -3839,7 +3839,7 @@ Status Graph::ConvertInitializersIntoOrtValues() { FindAllSubgraphs(all_subgraphs); const auto& model_path = GetModel().ModelPath(); - std::unordered_set validated_external_data_paths; + InlinedHashSet validated_external_data_paths; auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status { // if we have any initializers that are not in memory, put them there. @@ -3903,7 +3903,96 @@ Status Graph::ConvertInitializersIntoOrtValues() { return Status::OK(); }; - return ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func); + ORT_RETURN_IF_ERROR(ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func)); + + // Validate and inline external data in node tensor attributes. + // In-memory references are rejected (no legitimate source creates them for attributes). + // File-based external data paths are validated, read from disk, and inlined as raw_data + // so all EPs (including plugins) can access attribute data uniformly. + auto inline_external_attr_tensors_func = [&](Graph& graph) -> Status { + for (auto& node : graph.Nodes()) { + for (auto& [attr_name, attr_proto] : node.GetMutableAttributes()) { + if (utils::HasTensor(attr_proto)) { + auto* tensor_proto = attr_proto.mutable_t(); + if (utils::HasExternalData(*tensor_proto)) { + ORT_RETURN_IF(utils::HasExternalDataInMemory(*tensor_proto), + "Node '", node.Name(), "' attribute '", attr_name, + "' contains an in-memory external data reference, which is not permitted ", + "for node attributes."); + + std::unique_ptr external_data_info; + ORT_RETURN_IF_ERROR( + onnxruntime::ExternalDataInfo::Create(tensor_proto->external_data(), external_data_info)); + const auto& location = external_data_info->GetRelPath(); + + if (validated_external_data_paths.count(location) == 0) { + auto path_status = utils::ValidateExternalDataPath(model_path, location); + if (!path_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", path_status.ErrorMessage()); + } + validated_external_data_paths.insert(location); + } + + // Read external data and inline it into the TensorProto. + std::vector buffer; + { + auto unpack_status = utils::UnpackInitializerData(*tensor_proto, model_path, buffer); + if (!unpack_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", unpack_status.ErrorMessage()); + } + } + tensor_proto->clear_external_data(); + tensor_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); + utils::SetRawDataInTensorProto(*tensor_proto, buffer.data(), buffer.size()); + } + } else if (utils::HasTensors(attr_proto)) { + for (auto& tensor_proto : *attr_proto.mutable_tensors()) { + if (utils::HasExternalData(tensor_proto)) { + ORT_RETURN_IF(utils::HasExternalDataInMemory(tensor_proto), + "Node '", node.Name(), "' attribute '", attr_name, + "' contains an in-memory external data reference, which is not permitted ", + "for node attributes."); + + std::unique_ptr external_data_info; + ORT_RETURN_IF_ERROR( + onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info)); + const auto& location = external_data_info->GetRelPath(); + + if (validated_external_data_paths.count(location) == 0) { + auto path_status = utils::ValidateExternalDataPath(model_path, location); + if (!path_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", path_status.ErrorMessage()); + } + validated_external_data_paths.insert(location); + } + + std::vector buffer; + { + auto unpack_status = utils::UnpackInitializerData(tensor_proto, model_path, buffer); + if (!unpack_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", unpack_status.ErrorMessage()); + } + } + tensor_proto.clear_external_data(); + tensor_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); + utils::SetRawDataInTensorProto(tensor_proto, buffer.data(), buffer.size()); + } + } + } + } + } + return Status::OK(); + }; + + return ForThisAndAllSubgraphs(all_subgraphs, inline_external_attr_tensors_func); } void Graph::SetName(const std::string& name) { diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index 034d206fec2f4..8ed2dc744a29c 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -4,6 +4,8 @@ #include "gtest/gtest.h" #include "core/framework/tensorprotoutils.h" #include "test/providers/provider_test_utils.h" +#include "test/util/include/file_util.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { @@ -756,5 +758,176 @@ TEST(LabelEncoder, EmptyInputOpset4) { test.Run(); } +// External data in tensor attributes: file-based external data is validated and inlined +// during session initialization. In-memory references are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. +#if !defined(ORT_NO_EXCEPTIONS) + +// RAII helper that creates a unique dummy binary file and removes it on destruction. +static std::pair CreateExternalDataFile(const void* data, size_t num_bytes) { + PathString filename(ORT_TSTR("ext_data_XXXXXX")); + FILE* fp = nullptr; + CreateTestFile(fp, filename); + size_t written = fwrite(data, 1, num_bytes, fp); + ORT_ENFORCE(written == num_bytes, "Failed to write external data file"); + ORT_ENFORCE(fclose(fp) == 0, "Failed to close external data file"); + return {ToUTF8String(filename), ScopedFileDeleter(filename)}; +} + +// Helper: create a TensorProto that references external data in the given file. +static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::string& name, + const std::string& filename, + int64_t num_elements) { + ONNX_NAMESPACE::TensorProto proto; + proto.set_name(name); + proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + proto.add_dims(num_elements); + proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = proto.add_external_data(); + loc->set_key("location"); + loc->set_value(filename); + auto* offset = proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("0"); + auto* length = proto.add_external_data(); + length->set_key("length"); + length->set_value(std::to_string(num_elements * static_cast(sizeof(int64_t)))); + return proto; +} + +// Helper: create a TensorProto with in-memory external data reference (should be rejected). +static ONNX_NAMESPACE::TensorProto MakeInMemoryExternalTensorProto(const std::string& name, + int64_t num_elements) { + ONNX_NAMESPACE::TensorProto proto; + proto.set_name(name); + proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + proto.add_dims(num_elements); + proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); + auto* offset = proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("12345678"); + auto* length = proto.add_external_data(); + length->set_key("length"); + length->set_value(std::to_string(num_elements * static_cast(sizeof(int64_t)))); + return proto; +} + +// Valid external data in tensor attributes should be loaded and inlined during session initialization. +TEST(LabelEncoder, ExternalDataInKeysTensorOpset4) { + std::vector key_data{1, 2}; + auto [ext_path, ext_deleter] = CreateExternalDataFile(key_data.data(), key_data.size() * sizeof(int64_t)); + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_path, 2)); + + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + values_proto.add_dims(2); + values_proto.add_int64_data(10); + values_proto.add_int64_data(20); + test.AddAttribute("values_tensor", values_proto); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(42); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", {1, 3}, {1, 2, 99}); + test.AddOutput("Y", {1, 3}, {10, 20, 42}); + + test.Run(); +} + +// In-memory external data references in node attributes are rejected during initialization. +TEST(LabelEncoder, RejectsInMemoryExternalDataInKeysTensorOpset4) { + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + test.AddAttribute("keys_tensor", MakeInMemoryExternalTensorProto("keys_tensor", 2)); + + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + values_proto.add_dims(2); + values_proto.add_int64_data(10); + values_proto.add_int64_data(20); + test.AddAttribute("values_tensor", values_proto); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(0); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", {1, 2}, {1, 2}); + test.AddOutput("Y", {1, 2}, {10, 20}); + + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + +// Duplicate keys: emplace() keeps the first occurrence. Verify this behavior. +TEST(LabelEncoder, DuplicateKeysFirstWinsOpset4) { + std::vector dims{1, 3}; + + std::vector input{1, 2, 3}; + // key 1 maps to 10 (first), not 99 (second duplicate) + std::vector output{10, 20, 42}; + std::vector key_data{1, 2, 1}; // duplicate key 1 + std::vector value_data{10, 20, 99}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + test.AddAttribute("keys_int64s", key_data); + test.AddAttribute("values_int64s", value_data); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(42); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + +// Singleton 1D default_tensor (dims=[1]) — the ONNX spec requires this shape +TEST(LabelEncoder, SingletonDefaultTensorOpset4) { + std::vector dims{1, 3}; + + std::vector input{1, 2, 99}; + std::vector output{10, 20, -7}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + test.AddAttribute("keys_int64s", std::vector{1, 2}); + test.AddAttribute("values_int64s", std::vector{10, 20}); + + // 1D singleton default_tensor with dims=[1] + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(-7); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index 1510f3fe3e012..76b4b5f478ddf 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -3,6 +3,8 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include "core/framework/tensorprotoutils.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { @@ -44,7 +46,7 @@ static ONNX_NAMESPACE::TensorProto make_tensor(std::vector array, std:: } template -void _multiply_update_array(std::vector& data, int n, T inc = 0) { +void MultiplyUpdateArray(std::vector& data, int n, T inc = 0) { std::vector copy = data; data.resize(copy.size() * n); T cst = 0; @@ -57,7 +59,7 @@ void _multiply_update_array(std::vector& data, int n, T inc = 0) { } template -void _multiply_update_childnode(std::vector& childnodes, std::vector& childleafs, std::vector& otherchildleafs, int n) { +void MultiplyUpdateChildnode(std::vector& childnodes, std::vector& childleafs, std::vector& otherchildleafs, int n) { int64_t leafs_cnt = 0; int64_t nodes_cnt = childnodes.size(); for (auto& childleaf : childleafs) { @@ -87,7 +89,7 @@ void _multiply_update_childnode(std::vector& childnodes, std::vector& chil } template -void _multiply_arrays_values(std::vector& data, int64_t val) { +void MultiplyArraysValues(std::vector& data, int64_t val) { for (auto& curr : data) { curr *= val; } @@ -113,16 +115,16 @@ void GenTreeAndRunTest(const std::vector& X, const std::vector& Y, const i if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); - _multiply_update_array(nodes_featureids, n_trees); - _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); - _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); - _multiply_update_array(nodes_trueleafs, n_trees); - _multiply_update_array(nodes_falseleafs, n_trees); - _multiply_update_array(leaf_targetids, n_trees); - _multiply_update_array(nodes_modes, n_trees); - _multiply_update_array(nodes_splits, n_trees); - _multiply_update_array(leaf_weights, n_trees); + MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); + MultiplyUpdateArray(nodes_featureids, n_trees); + MultiplyUpdateChildnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); + MultiplyUpdateChildnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_falseleafs, n_trees); + MultiplyUpdateArray(leaf_targetids, n_trees); + MultiplyUpdateArray(nodes_modes, n_trees); + MultiplyUpdateArray(nodes_splits, n_trees); + MultiplyUpdateArray(leaf_weights, n_trees); } auto nodes_modes_as_tensor = make_tensor(nodes_modes, "nodes_modes"); @@ -171,17 +173,17 @@ void GenTreeAndRunTestWithSetMembership(const std::vector& X, const std::vect if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); - _multiply_update_array(nodes_featureids, n_trees); - _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); - _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); - _multiply_update_array(nodes_trueleafs, n_trees); - _multiply_update_array(nodes_falseleafs, n_trees); - _multiply_update_array(leaf_targetids, n_trees); - _multiply_update_array(nodes_modes, n_trees); - _multiply_update_array(nodes_splits, n_trees); - _multiply_update_array(membership_values, n_trees); - _multiply_update_array(leaf_weights, n_trees); + MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); + MultiplyUpdateArray(nodes_featureids, n_trees); + MultiplyUpdateChildnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); + MultiplyUpdateChildnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_falseleafs, n_trees); + MultiplyUpdateArray(leaf_targetids, n_trees); + MultiplyUpdateArray(nodes_modes, n_trees); + MultiplyUpdateArray(nodes_splits, n_trees); + MultiplyUpdateArray(membership_values, n_trees); + MultiplyUpdateArray(leaf_weights, n_trees); } auto nodes_modes_as_tensor = make_tensor(nodes_modes, "nodes_modes"); @@ -225,7 +227,7 @@ TEST(MLOpTest, TreeEnsembleDouble) { std::vector Y = {5.23f, 0.f, 5.23f, 0.f, 0.f, 12.12f}; GenTreeAndRunTest(X, Y, 1, 1); - _multiply_arrays_values(Y, 3); + MultiplyArraysValues(Y, 3); GenTreeAndRunTest(X, Y, 1, 3); } @@ -240,7 +242,7 @@ TEST(MLOpTest, TreeEnsembleSetMembership) { 0.f, 10.f, 0.f, 0.f}; GenTreeAndRunTestWithSetMembership(X, Y, 1, 1); - _multiply_arrays_values(Y, 5); + MultiplyArraysValues(Y, 5); GenTreeAndRunTestWithSetMembership(X, Y, 1, 5); } @@ -452,5 +454,69 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { test.Run(); } +// In-memory external data references in node attributes are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. +#if !defined(ORT_NO_EXCEPTIONS) + +// In-memory external data references in node attributes are rejected during Graph::Resolve(). +TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { + OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); + + // nodes_splits with in-memory external data reference (should be rejected) + ONNX_NAMESPACE::TensorProto splits_proto; + splits_proto.set_name("nodes_splits"); + splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + splits_proto.add_dims(1); + splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = splits_proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); + auto* offset = splits_proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("12345678"); + auto* length = splits_proto.add_external_data(); + length->set_key("length"); + length->set_value("4"); + test.AddAttribute("nodes_splits", splits_proto); + + // Minimal valid structure for remaining attributes + ONNX_NAMESPACE::TensorProto leaf_weights_proto; + leaf_weights_proto.set_name("leaf_weights"); + leaf_weights_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + leaf_weights_proto.add_dims(2); + leaf_weights_proto.add_float_data(1.0f); + leaf_weights_proto.add_float_data(2.0f); + test.AddAttribute("leaf_weights", leaf_weights_proto); + + ONNX_NAMESPACE::TensorProto modes_proto; + modes_proto.set_name("nodes_modes"); + modes_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + modes_proto.add_dims(1); + modes_proto.add_int32_data(0); + test.AddAttribute("nodes_modes", modes_proto); + + test.AddAttribute("aggregate_function", static_cast(1)); + test.AddAttribute("leaf_targetids", std::vector{0, 0}); + test.AddAttribute("n_targets", static_cast(1)); + test.AddAttribute("nodes_falseleafs", std::vector{1}); + test.AddAttribute("nodes_falsenodeids", std::vector{1}); + test.AddAttribute("nodes_featureids", std::vector{0}); + test.AddAttribute("nodes_trueleafs", std::vector{1}); + test.AddAttribute("nodes_truenodeids", std::vector{0}); + test.AddAttribute("post_transform", static_cast(0)); + test.AddAttribute("tree_roots", std::vector{0}); + + std::vector X = {1.f}; + test.AddInput("X", {1, 1}, X); + test.AddOutput("Y", {1, 1}, {0.f}); + + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index 7dbb40556a929..988da6e68c69b 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -3,12 +3,14 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include "core/framework/tensorprotoutils.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { template -void _multiply_update_array(std::vector& data, int n, T inc = 0) { +void MultiplyUpdateArray(std::vector& data, int n, T inc = 0) { std::vector copy = data; data.resize(copy.size() * n); T cst = 0; @@ -20,7 +22,7 @@ void _multiply_update_array(std::vector& data, int n, T inc = 0) { } } -void _multiply_update_array_string(std::vector& data, int n) { +void MultiplyUpdateArrayString(std::vector& data, int n) { std::vector copy = data; data.resize(copy.size() * n); for (int i = 0; i < n; ++i) { @@ -52,17 +54,17 @@ void GenTreeAndRunTest(int opsetml, const std::vector& X, const std::vector 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } // add attributes @@ -146,17 +148,17 @@ void GenTreeAndRunTest_as_tensor(int opsetml, const std::vector& X, const std if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } // add attributes @@ -356,17 +358,17 @@ void GenTreeAndRunTest1(int opsetml, const std::string& aggFunction, bool one_ob if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } std::vector results; @@ -469,17 +471,17 @@ void GenTreeAndRunTest1_as_tensor(int opsetml, const std::string& aggFunction, b if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } std::vector results; @@ -1081,5 +1083,61 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { test.Run(OpTester::ExpectResult::kExpectFailure, "base_values should have 0 or 2 values."); } +// In-memory external data references in node attributes are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. +#if !defined(ORT_NO_EXCEPTIONS) + +TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute) { + OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain); + + // Minimal valid tree structure + std::vector lefts = {1, 0, 0}; + std::vector rights = {2, 0, 0}; + std::vector treeids = {0, 0, 0}; + std::vector nodeids = {0, 1, 2}; + std::vector featureids = {0, 0, 0}; + std::vector modes = {"BRANCH_LEQ", "LEAF", "LEAF"}; + + test.AddAttribute("nodes_truenodeids", lefts); + test.AddAttribute("nodes_falsenodeids", rights); + test.AddAttribute("nodes_treeids", treeids); + test.AddAttribute("nodes_nodeids", nodeids); + test.AddAttribute("nodes_featureids", featureids); + test.AddAttribute("nodes_modes", modes); + test.AddAttribute("target_treeids", std::vector{0, 0}); + test.AddAttribute("target_nodeids", std::vector{1, 2}); + test.AddAttribute("target_ids", std::vector{0, 0}); + test.AddAttribute("target_weights", std::vector{1.f, 2.f}); + test.AddAttribute("n_targets", static_cast(1)); + + // nodes_values_as_tensor with in-memory external data reference (should be rejected) + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("nodes_values_as_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + values_proto.add_dims(3); + values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = values_proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ToUTF8String(utils::kTensorProtoNativeEndianMemoryAddressTag)); + auto* offset = values_proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("12345678"); + auto* length = values_proto.add_external_data(); + length->set_key("length"); + length->set_value("12"); + test.AddAttribute("nodes_values_as_tensor", values_proto); + + std::vector X = {1.f}; + test.AddInput("X", {1, 1}, X); + test.AddOutput("Y", {1, 1}, {0.f}); + + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + } // namespace test } // namespace onnxruntime