From fa6b42b08f135a5a2c06bcc25c4b9afe3bed510c Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 24 Jun 2026 15:40:37 -0700 Subject: [PATCH 1/9] Reject external data in tensor attributes for LabelEncoder and TreeEnsemble Tensor attributes (keys_tensor, values_tensor, default_tensor, and *_as_tensor) in LabelEncoder and TreeEnsemble kernels did not validate against external data location. Add HasExternalData() checks before calling UnpackTensor to reject such attributes during kernel construction. Add unit tests covering external data rejection for: - LabelEncoder opset 4 (keys, values, default tensors) - TreeEnsembleRegressor opset 3 - TreeEnsemble opset 5 Also add tests for duplicate key behavior (first-wins) and scalar default_tensor. --- .../core/providers/cpu/ml/label_encoder.h | 4 + .../providers/cpu/ml/tree_ensemble_helper.cc | 3 + .../providers/cpu/ml/label_encoder_test.cc | 162 ++++++++++++++++++ .../providers/cpu/ml/tree_ensembler_test.cc | 54 ++++++ .../providers/cpu/ml/treeregressor_test.cc | 48 ++++++ 5 files changed, 271 insertions(+) diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.h b/onnxruntime/core/providers/cpu/ml/label_encoder.h index 6cdde84fdb082..7c6bd35ceae18 100644 --- a/onnxruntime/core/providers/cpu/ml/label_encoder.h +++ b/onnxruntime/core/providers/cpu/ml/label_encoder.h @@ -119,6 +119,8 @@ std::vector GetAttribute(const OpKernelInfo& info, const std::string& name, c } else { ORT_ENFORCE(result.IsOK(), "LabelEncoder is missing attribute ", tensor_name, " or ", name); } + ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto), + "Tensor attribute ", tensor_name, " with external data is not supported."); SafeInt element_count(1); for (auto dim : attr_tensor_proto.dims()) { element_count *= dim; @@ -135,6 +137,8 @@ T GetDefault(const OpKernelInfo& info, const std::string& attr_name, const T& ba ONNX_NAMESPACE::TensorProto attr_tensor_proto; auto result = info.GetAttr("default_tensor", &attr_tensor_proto); if (result.IsOK() && utils::HasDataType(attr_tensor_proto)) { + ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto), + "Tensor attribute default_tensor with external data is not supported."); T default_value; result = utils::UnpackTensor(attr_tensor_proto, std::filesystem::path(), &default_value, 1); ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack default tensor ", attr_name); diff --git a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc index 399dfd56b93c6..b37854c3d0198 100644 --- a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc +++ b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc @@ -28,6 +28,9 @@ Status GetAnyVectorAttrsOrDefault(const OpKernelInfo& info, const std::string& n return Status::OK(); } + ORT_RETURN_IF(utils::HasExternalData(proto), + "Tensor attribute ", name, " with external data is not supported."); + const SafeInt tensor_size(n_elements); data.clear(); data.resize(tensor_size); diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index 034d206fec2f4..108813c43bd94 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -756,5 +756,167 @@ TEST(LabelEncoder, EmptyInputOpset4) { test.Run(); } +// External data in tensor attributes is not supported. The kernel must reject such attributes +// during construction. These tests verify the rejection. +// In no-exceptions builds, ORT_ENFORCE calls abort() so these tests cannot run. +#if !defined(ORT_NO_EXCEPTIONS) + +TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + // Create keys_tensor with external data location + ONNX_NAMESPACE::TensorProto keys_proto; + keys_proto.set_name("keys_tensor"); + keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + keys_proto.add_dims(2); + keys_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* entry = keys_proto.add_external_data(); + entry->set_key("location"); + entry->set_value("some_file.bin"); + test.AddAttribute("keys_tensor", keys_proto); + + // Normal values_tensor + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + values_proto.add_dims(2); + values_proto.add_int64_data(10); + values_proto.add_int64_data(20); + test.AddAttribute("values_tensor", values_proto); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(0); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", {1, 2}, {1, 2}); + test.AddOutput("Y", {1, 2}, {10, 20}); + + // CUDA EP uses a different code path that doesn't hit this issue, exclude it. + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", + {kCudaExecutionProvider}); +} + +TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + test.AddAttribute("keys_int64s", std::vector{1, 2}); + test.AddAttribute("values_int64s", std::vector{10, 20}); + + // default_tensor with external data location + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* entry = default_proto.add_external_data(); + entry->set_key("location"); + entry->set_value("some_file.bin"); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", {1, 2}, {1, 3}); + test.AddOutput("Y", {1, 2}, {10, 0}); + + // CUDA EP uses a different code path that doesn't hit this issue, exclude it. + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", + {kCudaExecutionProvider}); +} + +TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + // Normal keys_tensor + ONNX_NAMESPACE::TensorProto keys_proto; + keys_proto.set_name("keys_tensor"); + keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + keys_proto.add_dims(2); + keys_proto.add_int64_data(1); + keys_proto.add_int64_data(2); + test.AddAttribute("keys_tensor", keys_proto); + + // values_tensor with external data location + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + values_proto.add_dims(2); + values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* entry = values_proto.add_external_data(); + entry->set_key("location"); + entry->set_value("some_file.bin"); + test.AddAttribute("values_tensor", values_proto); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(0); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", {1, 2}, {1, 2}); + test.AddOutput("Y", {1, 2}, {10, 20}); + + // CUDA EP uses a different code path that doesn't hit this issue, exclude it. + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", + {kCudaExecutionProvider}); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + +// Duplicate keys: emplace() keeps the first occurrence. Verify this behavior. +TEST(LabelEncoder, DuplicateKeysFirstWinsOpset4) { + std::vector dims{1, 3}; + + std::vector input{1, 2, 3}; + // key 1 maps to 10 (first), not 99 (second duplicate) + std::vector output{10, 20, 42}; + std::vector key_data{1, 2, 1}; // duplicate key 1 + std::vector value_data{10, 20, 99}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + test.AddAttribute("keys_int64s", key_data); + test.AddAttribute("values_int64s", value_data); + + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + default_proto.add_dims(1); + default_proto.add_int64_data(42); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + +// Scalar (zero-rank) default_tensor — single element with no dims +TEST(LabelEncoder, ScalarDefaultTensorOpset4) { + std::vector dims{1, 3}; + + std::vector input{1, 2, 99}; + std::vector output{10, 20, -7}; + + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + + test.AddAttribute("keys_int64s", std::vector{1, 2}); + test.AddAttribute("values_int64s", std::vector{10, 20}); + + // Scalar default_tensor: no dims, single element + ONNX_NAMESPACE::TensorProto default_proto; + default_proto.set_name("default_tensor"); + default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + // No add_dims() — zero-rank tensor (scalar) + default_proto.add_int64_data(-7); + test.AddAttribute("default_tensor", default_proto); + + test.AddInput("X", dims, input); + test.AddOutput("Y", dims, output); + + test.Run(); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index 1510f3fe3e012..32ca4b8a831a8 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -452,5 +452,59 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { test.Run(); } +// External data in tensor attributes is not supported. In no-exceptions builds, the enforcement +// calls abort() so these tests cannot run. +#if !defined(ORT_NO_EXCEPTIONS) + +TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { + OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); + + // nodes_splits with external data location + ONNX_NAMESPACE::TensorProto splits_proto; + splits_proto.set_name("nodes_splits"); + splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + splits_proto.add_dims(3); + splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* entry = splits_proto.add_external_data(); + entry->set_key("location"); + entry->set_value("some_file.bin"); + test.AddAttribute("nodes_splits", splits_proto); + + // Minimal valid structure for remaining attributes + ONNX_NAMESPACE::TensorProto leaf_weights_proto; + leaf_weights_proto.set_name("leaf_weights"); + leaf_weights_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + leaf_weights_proto.add_dims(2); + leaf_weights_proto.add_float_data(1.0f); + leaf_weights_proto.add_float_data(2.0f); + test.AddAttribute("leaf_weights", leaf_weights_proto); + + ONNX_NAMESPACE::TensorProto modes_proto; + modes_proto.set_name("nodes_modes"); + modes_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + modes_proto.add_dims(1); + modes_proto.add_int32_data(0); + test.AddAttribute("nodes_modes", modes_proto); + + test.AddAttribute("aggregate_function", static_cast(1)); + test.AddAttribute("leaf_targetids", std::vector{0, 0}); + test.AddAttribute("n_targets", static_cast(1)); + test.AddAttribute("nodes_falseleafs", std::vector{1}); + test.AddAttribute("nodes_falsenodeids", std::vector{1}); + test.AddAttribute("nodes_featureids", std::vector{0}); + test.AddAttribute("nodes_trueleafs", std::vector{1}); + test.AddAttribute("nodes_truenodeids", std::vector{0}); + test.AddAttribute("post_transform", static_cast(0)); + test.AddAttribute("tree_roots", std::vector{0}); + + std::vector X = {1.f}; + test.AddInput("X", {1, 1}, X); + test.AddOutput("Y", {1, 1}, {0.f}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index 7dbb40556a929..d8ec8c6556835 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -1081,5 +1081,53 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { test.Run(OpTester::ExpectResult::kExpectFailure, "base_values should have 0 or 2 values."); } +// External data in tensor attributes is not supported. The kernel must reject such attributes +// during construction. In no-exceptions builds, ORT_ENFORCE/ORT_THROW_IF_ERROR calls abort(). +#if !defined(ORT_NO_EXCEPTIONS) + +TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { + OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain); + + // Minimal valid tree structure + std::vector lefts = {1, 0, 0}; + std::vector rights = {2, 0, 0}; + std::vector treeids = {0, 0, 0}; + std::vector nodeids = {0, 1, 2}; + std::vector featureids = {0, 0, 0}; + std::vector modes = {"BRANCH_LEQ", "LEAF", "LEAF"}; + + test.AddAttribute("nodes_truenodeids", lefts); + test.AddAttribute("nodes_falsenodeids", rights); + test.AddAttribute("nodes_treeids", treeids); + test.AddAttribute("nodes_nodeids", nodeids); + test.AddAttribute("nodes_featureids", featureids); + test.AddAttribute("nodes_values", std::vector{0.5f, 0.f, 0.f}); + test.AddAttribute("nodes_modes", modes); + test.AddAttribute("target_treeids", std::vector{0, 0}); + test.AddAttribute("target_nodeids", std::vector{1, 2}); + test.AddAttribute("target_ids", std::vector{0, 0}); + test.AddAttribute("target_weights", std::vector{1.f, 2.f}); + test.AddAttribute("n_targets", static_cast(1)); + + // nodes_values_as_tensor with external data location + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("nodes_values_as_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); + values_proto.add_dims(3); + values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* entry = values_proto.add_external_data(); + entry->set_key("location"); + entry->set_value("some_file.bin"); + test.AddAttribute("nodes_values_as_tensor", values_proto); + + std::vector X = {1.f}; + test.AddInput("X", {1, 1}, X); + test.AddOutput("Y", {1, 1}, {0.f}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); +} + +#endif // !defined(ORT_NO_EXCEPTIONS) + } // namespace test } // namespace onnxruntime From b28e28320b074de9d1582d1c1775db232bf39b2b Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 24 Jun 2026 17:07:21 -0700 Subject: [PATCH 2/9] Address PR review comments - Move HasExternalData check before dims().empty() early-return in GetAnyVectorAttrsOrDefault so scalar external-data tensors are also rejected. - Remove conflicting nodes_values attribute from the TreeEnsembleRegressor test so the external-data rejection is the sole failure mode. --- onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc | 6 +++--- onnxruntime/test/providers/cpu/ml/treeregressor_test.cc | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc index b37854c3d0198..5655be6c6bbbe 100644 --- a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc +++ b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc @@ -19,6 +19,9 @@ Status GetAnyVectorAttrsOrDefault(const OpKernelInfo& info, const std::string& n ONNX_NAMESPACE::TensorProto proto; auto result = info.GetAttr(name, &proto); + ORT_RETURN_IF(utils::HasExternalData(proto), + "Tensor attribute ", name, " with external data is not supported."); + SafeInt n_elements(1); for (auto dim : proto.dims()) { n_elements *= dim; @@ -28,9 +31,6 @@ Status GetAnyVectorAttrsOrDefault(const OpKernelInfo& info, const std::string& n return Status::OK(); } - ORT_RETURN_IF(utils::HasExternalData(proto), - "Tensor attribute ", name, " with external data is not supported."); - const SafeInt tensor_size(n_elements); data.clear(); data.resize(tensor_size); diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index d8ec8c6556835..eb11a8225d55d 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -1101,7 +1101,6 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { test.AddAttribute("nodes_treeids", treeids); test.AddAttribute("nodes_nodeids", nodeids); test.AddAttribute("nodes_featureids", featureids); - test.AddAttribute("nodes_values", std::vector{0.5f, 0.f, 0.f}); test.AddAttribute("nodes_modes", modes); test.AddAttribute("target_treeids", std::vector{0, 0}); test.AddAttribute("target_nodeids", std::vector{1, 2}); @@ -1109,7 +1108,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { test.AddAttribute("target_weights", std::vector{1.f, 2.f}); test.AddAttribute("n_targets", static_cast(1)); - // nodes_values_as_tensor with external data location + // Use nodes_values_as_tensor (without setting nodes_values) with external data location ONNX_NAMESPACE::TensorProto values_proto; values_proto.set_name("nodes_values_as_tensor"); values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); From 152e2a3d4d3cb0a0939d81032f7b9c736774bccb Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 24 Jun 2026 17:43:04 -0700 Subject: [PATCH 3/9] Fix CI failures in external data rejection tests - Create real temp files (RAII-guarded) so ONNX checker passes file-existence validation before reaching kernel construction. - Fix ScalarDefaultTensorOpset4: ONNX spec requires default_tensor to be a singleton 1D tensor (dims=[1]), not 0D. - Rename _multiply_update_array -> MultiplyUpdateArray and _multiply_update_array_string -> MultiplyUpdateArrayString to follow naming conventions. --- .../providers/cpu/ml/label_encoder_test.cc | 82 +++++++------ .../providers/cpu/ml/tree_ensembler_test.cc | 60 +++++---- .../providers/cpu/ml/treeregressor_test.cc | 116 ++++++++++-------- 3 files changed, 153 insertions(+), 105 deletions(-) diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index 108813c43bd94..2a8d2452a08e1 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -4,6 +4,7 @@ #include "gtest/gtest.h" #include "core/framework/tensorprotoutils.h" #include "test/providers/provider_test_utils.h" +#include namespace onnxruntime { namespace test { @@ -761,19 +762,44 @@ TEST(LabelEncoder, EmptyInputOpset4) { // In no-exceptions builds, ORT_ENFORCE calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) +// RAII helper that creates a dummy binary file on construction and removes it on destruction. +struct ScopedExternalDataFile { + std::string path; + ScopedExternalDataFile(const std::string& filename, size_t num_bytes) : path(filename) { + std::ofstream ofs(path, std::ios::binary); + std::vector data(num_bytes, 0); + ofs.write(data.data(), static_cast(num_bytes)); + } + ~ScopedExternalDataFile() { std::remove(path.c_str()); } + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ScopedExternalDataFile); +}; + +// Helper: create a TensorProto that references external data in the given file. +static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::string& name, + const std::string& filename, + int64_t num_elements) { + ONNX_NAMESPACE::TensorProto proto; + proto.set_name(name); + proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + proto.add_dims(num_elements); + proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = proto.add_external_data(); + loc->set_key("location"); + loc->set_value(filename); + auto* offset = proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("0"); + auto* length = proto.add_external_data(); + length->set_key("length"); + length->set_value(std::to_string(num_elements * static_cast(sizeof(int64_t)))); + return proto; +} + TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { - OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + ScopedExternalDataFile ext_file("label_encoder_test_ext_keys.bin", 16); // 2 x int64 - // Create keys_tensor with external data location - ONNX_NAMESPACE::TensorProto keys_proto; - keys_proto.set_name("keys_tensor"); - keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - keys_proto.add_dims(2); - keys_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); - auto* entry = keys_proto.add_external_data(); - entry->set_key("location"); - entry->set_value("some_file.bin"); - test.AddAttribute("keys_tensor", keys_proto); + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_file.path, 2)); // Normal values_tensor ONNX_NAMESPACE::TensorProto values_proto; @@ -800,21 +826,14 @@ TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { } TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { + ScopedExternalDataFile ext_file("label_encoder_test_ext_default.bin", 8); // 1 x int64 + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); test.AddAttribute("keys_int64s", std::vector{1, 2}); test.AddAttribute("values_int64s", std::vector{10, 20}); - // default_tensor with external data location - ONNX_NAMESPACE::TensorProto default_proto; - default_proto.set_name("default_tensor"); - default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - default_proto.add_dims(1); - default_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); - auto* entry = default_proto.add_external_data(); - entry->set_key("location"); - entry->set_value("some_file.bin"); - test.AddAttribute("default_tensor", default_proto); + test.AddAttribute("default_tensor", MakeExternalInt64TensorProto("default_tensor", ext_file.path, 1)); test.AddInput("X", {1, 2}, {1, 3}); test.AddOutput("Y", {1, 2}, {10, 0}); @@ -825,6 +844,8 @@ TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { } TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { + ScopedExternalDataFile ext_file("label_encoder_test_ext_values.bin", 16); // 2 x int64 + OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); // Normal keys_tensor @@ -836,16 +857,7 @@ TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { keys_proto.add_int64_data(2); test.AddAttribute("keys_tensor", keys_proto); - // values_tensor with external data location - ONNX_NAMESPACE::TensorProto values_proto; - values_proto.set_name("values_tensor"); - values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - values_proto.add_dims(2); - values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); - auto* entry = values_proto.add_external_data(); - entry->set_key("location"); - entry->set_value("some_file.bin"); - test.AddAttribute("values_tensor", values_proto); + test.AddAttribute("values_tensor", MakeExternalInt64TensorProto("values_tensor", ext_file.path, 2)); ONNX_NAMESPACE::TensorProto default_proto; default_proto.set_name("default_tensor"); @@ -892,8 +904,8 @@ TEST(LabelEncoder, DuplicateKeysFirstWinsOpset4) { test.Run(); } -// Scalar (zero-rank) default_tensor — single element with no dims -TEST(LabelEncoder, ScalarDefaultTensorOpset4) { +// Singleton 1D default_tensor (dims=[1]) — the ONNX spec requires this shape +TEST(LabelEncoder, SingletonDefaultTensorOpset4) { std::vector dims{1, 3}; std::vector input{1, 2, 99}; @@ -904,11 +916,11 @@ TEST(LabelEncoder, ScalarDefaultTensorOpset4) { test.AddAttribute("keys_int64s", std::vector{1, 2}); test.AddAttribute("values_int64s", std::vector{10, 20}); - // Scalar default_tensor: no dims, single element + // 1D singleton default_tensor with dims=[1] ONNX_NAMESPACE::TensorProto default_proto; default_proto.set_name("default_tensor"); default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - // No add_dims() — zero-rank tensor (scalar) + default_proto.add_dims(1); default_proto.add_int64_data(-7); test.AddAttribute("default_tensor", default_proto); diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index 32ca4b8a831a8..ab32ff5245f8a 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -3,6 +3,7 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include namespace onnxruntime { namespace test { @@ -44,7 +45,7 @@ static ONNX_NAMESPACE::TensorProto make_tensor(std::vector array, std:: } template -void _multiply_update_array(std::vector& data, int n, T inc = 0) { +void MultiplyUpdateArray(std::vector& data, int n, T inc = 0) { std::vector copy = data; data.resize(copy.size() * n); T cst = 0; @@ -113,16 +114,16 @@ void GenTreeAndRunTest(const std::vector& X, const std::vector& Y, const i if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); - _multiply_update_array(nodes_featureids, n_trees); + MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); + MultiplyUpdateArray(nodes_featureids, n_trees); _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); - _multiply_update_array(nodes_trueleafs, n_trees); - _multiply_update_array(nodes_falseleafs, n_trees); - _multiply_update_array(leaf_targetids, n_trees); - _multiply_update_array(nodes_modes, n_trees); - _multiply_update_array(nodes_splits, n_trees); - _multiply_update_array(leaf_weights, n_trees); + MultiplyUpdateArray(nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_falseleafs, n_trees); + MultiplyUpdateArray(leaf_targetids, n_trees); + MultiplyUpdateArray(nodes_modes, n_trees); + MultiplyUpdateArray(nodes_splits, n_trees); + MultiplyUpdateArray(leaf_weights, n_trees); } auto nodes_modes_as_tensor = make_tensor(nodes_modes, "nodes_modes"); @@ -171,17 +172,17 @@ void GenTreeAndRunTestWithSetMembership(const std::vector& X, const std::vect if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); - _multiply_update_array(nodes_featureids, n_trees); + MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); + MultiplyUpdateArray(nodes_featureids, n_trees); _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); - _multiply_update_array(nodes_trueleafs, n_trees); - _multiply_update_array(nodes_falseleafs, n_trees); - _multiply_update_array(leaf_targetids, n_trees); - _multiply_update_array(nodes_modes, n_trees); - _multiply_update_array(nodes_splits, n_trees); - _multiply_update_array(membership_values, n_trees); - _multiply_update_array(leaf_weights, n_trees); + MultiplyUpdateArray(nodes_trueleafs, n_trees); + MultiplyUpdateArray(nodes_falseleafs, n_trees); + MultiplyUpdateArray(leaf_targetids, n_trees); + MultiplyUpdateArray(nodes_modes, n_trees); + MultiplyUpdateArray(nodes_splits, n_trees); + MultiplyUpdateArray(membership_values, n_trees); + MultiplyUpdateArray(leaf_weights, n_trees); } auto nodes_modes_as_tensor = make_tensor(nodes_modes, "nodes_modes"); @@ -457,6 +458,17 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { #if !defined(ORT_NO_EXCEPTIONS) TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { + // RAII helper: creates a dummy binary file on construction, removes it on destruction. + struct ScopedFile { + std::string path; + ScopedFile(const std::string& p, size_t n) : path(p) { + std::ofstream ofs(path, std::ios::binary); + std::vector data(n, 0); + ofs.write(data.data(), static_cast(n)); + } + ~ScopedFile() { std::remove(path.c_str()); } + } ext_file("tree_ensemble_test_ext_splits.bin", 12); // 3 x float + OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); // nodes_splits with external data location @@ -465,9 +477,15 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); splits_proto.add_dims(3); splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); - auto* entry = splits_proto.add_external_data(); - entry->set_key("location"); - entry->set_value("some_file.bin"); + auto* loc = splits_proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ext_file.path); + auto* offset = splits_proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("0"); + auto* length = splits_proto.add_external_data(); + length->set_key("length"); + length->set_value("12"); test.AddAttribute("nodes_splits", splits_proto); // Minimal valid structure for remaining attributes diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index eb11a8225d55d..fc76eba475917 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -3,12 +3,13 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include namespace onnxruntime { namespace test { template -void _multiply_update_array(std::vector& data, int n, T inc = 0) { +void MultiplyUpdateArray(std::vector& data, int n, T inc = 0) { std::vector copy = data; data.resize(copy.size() * n); T cst = 0; @@ -20,7 +21,7 @@ void _multiply_update_array(std::vector& data, int n, T inc = 0) { } } -void _multiply_update_array_string(std::vector& data, int n) { +void MultiplyUpdateArrayString(std::vector& data, int n) { std::vector copy = data; data.resize(copy.size() * n); for (int i = 0; i < n; ++i) { @@ -52,17 +53,17 @@ void GenTreeAndRunTest(int opsetml, const std::vector& X, const std::vector 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } // add attributes @@ -146,17 +147,17 @@ void GenTreeAndRunTest_as_tensor(int opsetml, const std::vector& X, const std if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } // add attributes @@ -356,17 +357,17 @@ void GenTreeAndRunTest1(int opsetml, const std::string& aggFunction, bool one_ob if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } std::vector results; @@ -469,17 +470,17 @@ void GenTreeAndRunTest1_as_tensor(int opsetml, const std::string& aggFunction, b if (n_trees > 1) { // Multiplies the number of trees to test the parallelization by trees. - _multiply_update_array(lefts, n_trees); - _multiply_update_array(rights, n_trees); - _multiply_update_array(treeids, n_trees, (int64_t)3); - _multiply_update_array(nodeids, n_trees); - _multiply_update_array(featureids, n_trees); - _multiply_update_array(thresholds, n_trees); - _multiply_update_array_string(modes, n_trees); - _multiply_update_array(target_treeids, n_trees, (int64_t)3); - _multiply_update_array(target_nodeids, n_trees); - _multiply_update_array(target_classids, n_trees); - _multiply_update_array(target_weights, n_trees); + MultiplyUpdateArray(lefts, n_trees); + MultiplyUpdateArray(rights, n_trees); + MultiplyUpdateArray(treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(nodeids, n_trees); + MultiplyUpdateArray(featureids, n_trees); + MultiplyUpdateArray(thresholds, n_trees); + MultiplyUpdateArrayString(modes, n_trees); + MultiplyUpdateArray(target_treeids, n_trees, (int64_t)3); + MultiplyUpdateArray(target_nodeids, n_trees); + MultiplyUpdateArray(target_classids, n_trees); + MultiplyUpdateArray(target_weights, n_trees); } std::vector results; @@ -1086,6 +1087,17 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { #if !defined(ORT_NO_EXCEPTIONS) TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { + // RAII helper: creates a dummy binary file on construction, removes it on destruction. + struct ScopedFile { + std::string path; + ScopedFile(const std::string& p, size_t n) : path(p) { + std::ofstream ofs(path, std::ios::binary); + std::vector data(n, 0); + ofs.write(data.data(), static_cast(n)); + } + ~ScopedFile() { std::remove(path.c_str()); } + } ext_file("tree_regressor_test_ext_values.bin", 12); // 3 x float + OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain); // Minimal valid tree structure @@ -1114,9 +1126,15 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); values_proto.add_dims(3); values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); - auto* entry = values_proto.add_external_data(); - entry->set_key("location"); - entry->set_value("some_file.bin"); + auto* loc = values_proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ext_file.path); + auto* offset = values_proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("0"); + auto* length = values_proto.add_external_data(); + length->set_key("length"); + length->set_value("12"); test.AddAttribute("nodes_values_as_tensor", values_proto); std::vector X = {1.f}; From 7c57550c89917dbfb6664cf934e2d8022e50b360 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 24 Jun 2026 17:44:14 -0700 Subject: [PATCH 4/9] Rename underscore-prefixed helpers in tree_ensembler_test.cc - _multiply_update_childnode -> MultiplyUpdateChildnode - _multiply_arrays_values -> MultiplyArraysValues --- .../test/providers/cpu/ml/tree_ensembler_test.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index ab32ff5245f8a..c9cadec31a384 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -58,7 +58,7 @@ void MultiplyUpdateArray(std::vector& data, int n, T inc = 0) { } template -void _multiply_update_childnode(std::vector& childnodes, std::vector& childleafs, std::vector& otherchildleafs, int n) { +void MultiplyUpdateChildnode(std::vector& childnodes, std::vector& childleafs, std::vector& otherchildleafs, int n) { int64_t leafs_cnt = 0; int64_t nodes_cnt = childnodes.size(); for (auto& childleaf : childleafs) { @@ -88,7 +88,7 @@ void _multiply_update_childnode(std::vector& childnodes, std::vector& chil } template -void _multiply_arrays_values(std::vector& data, int64_t val) { +void MultiplyArraysValues(std::vector& data, int64_t val) { for (auto& curr : data) { curr *= val; } @@ -116,8 +116,8 @@ void GenTreeAndRunTest(const std::vector& X, const std::vector& Y, const i // Multiplies the number of trees to test the parallelization by trees. MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); MultiplyUpdateArray(nodes_featureids, n_trees); - _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); - _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); + MultiplyUpdateChildnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); + MultiplyUpdateChildnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); MultiplyUpdateArray(nodes_trueleafs, n_trees); MultiplyUpdateArray(nodes_falseleafs, n_trees); MultiplyUpdateArray(leaf_targetids, n_trees); @@ -174,8 +174,8 @@ void GenTreeAndRunTestWithSetMembership(const std::vector& X, const std::vect // Multiplies the number of trees to test the parallelization by trees. MultiplyUpdateArray(tree_roots, n_trees, (int64_t)nodes_truenodeids.size()); MultiplyUpdateArray(nodes_featureids, n_trees); - _multiply_update_childnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); - _multiply_update_childnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); + MultiplyUpdateChildnode(nodes_truenodeids, nodes_trueleafs, nodes_falseleafs, n_trees); + MultiplyUpdateChildnode(nodes_falsenodeids, nodes_falseleafs, nodes_trueleafs, n_trees); MultiplyUpdateArray(nodes_trueleafs, n_trees); MultiplyUpdateArray(nodes_falseleafs, n_trees); MultiplyUpdateArray(leaf_targetids, n_trees); @@ -226,7 +226,7 @@ TEST(MLOpTest, TreeEnsembleDouble) { std::vector Y = {5.23f, 0.f, 5.23f, 0.f, 0.f, 12.12f}; GenTreeAndRunTest(X, Y, 1, 1); - _multiply_arrays_values(Y, 3); + MultiplyArraysValues(Y, 3); GenTreeAndRunTest(X, Y, 1, 3); } @@ -241,7 +241,7 @@ TEST(MLOpTest, TreeEnsembleSetMembership) { 0.f, 10.f, 0.f, 0.f}; GenTreeAndRunTestWithSetMembership(X, Y, 1, 1); - _multiply_arrays_values(Y, 5); + MultiplyArraysValues(Y, 5); GenTreeAndRunTestWithSetMembership(X, Y, 1, 5); } From 741807274858f194b12b155fb7d58b49816fd37d Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 26 Jun 2026 12:12:27 -0700 Subject: [PATCH 5/9] Use unique temp files for external data tests - Replace fixed filenames with CreateTestFile + ScopedFileDeleter to avoid collisions when tests run in parallel via CTest. - Add explicit #include for std::remove. - Remove unused include. --- .../providers/cpu/ml/label_encoder_test.cc | 35 +++++++++---------- .../providers/cpu/ml/tree_ensembler_test.cc | 23 ++++++------ .../providers/cpu/ml/treeregressor_test.cc | 23 ++++++------ 3 files changed, 39 insertions(+), 42 deletions(-) diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index 2a8d2452a08e1..80e7fc1630c2a 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -4,7 +4,7 @@ #include "gtest/gtest.h" #include "core/framework/tensorprotoutils.h" #include "test/providers/provider_test_utils.h" -#include +#include "test/util/include/file_util.h" namespace onnxruntime { namespace test { @@ -762,17 +762,16 @@ TEST(LabelEncoder, EmptyInputOpset4) { // In no-exceptions builds, ORT_ENFORCE calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) -// RAII helper that creates a dummy binary file on construction and removes it on destruction. -struct ScopedExternalDataFile { - std::string path; - ScopedExternalDataFile(const std::string& filename, size_t num_bytes) : path(filename) { - std::ofstream ofs(path, std::ios::binary); - std::vector data(num_bytes, 0); - ofs.write(data.data(), static_cast(num_bytes)); - } - ~ScopedExternalDataFile() { std::remove(path.c_str()); } - ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ScopedExternalDataFile); -}; +// RAII helper that creates a unique dummy binary file and removes it on destruction. +static std::pair CreateExternalDataFile(size_t num_bytes) { + PathString filename(ORT_TSTR("ext_data_XXXXXX")); + FILE* fp = nullptr; + CreateTestFile(fp, filename); + std::vector data(num_bytes, 0); + fwrite(data.data(), 1, num_bytes, fp); + fclose(fp); + return {ToUTF8String(filename), ScopedFileDeleter(filename)}; +} // Helper: create a TensorProto that references external data in the given file. static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::string& name, @@ -796,10 +795,10 @@ static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::strin } TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { - ScopedExternalDataFile ext_file("label_encoder_test_ext_keys.bin", 16); // 2 x int64 + auto [ext_path, ext_deleter] = CreateExternalDataFile(16); // 2 x int64 OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); - test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_file.path, 2)); + test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_path, 2)); // Normal values_tensor ONNX_NAMESPACE::TensorProto values_proto; @@ -826,14 +825,14 @@ TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { } TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { - ScopedExternalDataFile ext_file("label_encoder_test_ext_default.bin", 8); // 1 x int64 + auto [ext_path, ext_deleter] = CreateExternalDataFile(8); // 1 x int64 OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); test.AddAttribute("keys_int64s", std::vector{1, 2}); test.AddAttribute("values_int64s", std::vector{10, 20}); - test.AddAttribute("default_tensor", MakeExternalInt64TensorProto("default_tensor", ext_file.path, 1)); + test.AddAttribute("default_tensor", MakeExternalInt64TensorProto("default_tensor", ext_path, 1)); test.AddInput("X", {1, 2}, {1, 3}); test.AddOutput("Y", {1, 2}, {10, 0}); @@ -844,7 +843,7 @@ TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { } TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { - ScopedExternalDataFile ext_file("label_encoder_test_ext_values.bin", 16); // 2 x int64 + auto [ext_path, ext_deleter] = CreateExternalDataFile(16); // 2 x int64 OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); @@ -857,7 +856,7 @@ TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { keys_proto.add_int64_data(2); test.AddAttribute("keys_tensor", keys_proto); - test.AddAttribute("values_tensor", MakeExternalInt64TensorProto("values_tensor", ext_file.path, 2)); + test.AddAttribute("values_tensor", MakeExternalInt64TensorProto("values_tensor", ext_path, 2)); ONNX_NAMESPACE::TensorProto default_proto; default_proto.set_name("default_tensor"); diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index c9cadec31a384..213c77be555e2 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" -#include +#include "test/util/include/file_util.h" namespace onnxruntime { namespace test { @@ -458,16 +458,15 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { #if !defined(ORT_NO_EXCEPTIONS) TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { - // RAII helper: creates a dummy binary file on construction, removes it on destruction. - struct ScopedFile { - std::string path; - ScopedFile(const std::string& p, size_t n) : path(p) { - std::ofstream ofs(path, std::ios::binary); - std::vector data(n, 0); - ofs.write(data.data(), static_cast(n)); - } - ~ScopedFile() { std::remove(path.c_str()); } - } ext_file("tree_ensemble_test_ext_splits.bin", 12); // 3 x float + // Create a unique temp file so the ONNX checker passes file-existence validation. + PathString filename(ORT_TSTR("ext_data_XXXXXX")); + FILE* fp = nullptr; + CreateTestFile(fp, filename); + std::vector data(12, 0); // 3 x float + fwrite(data.data(), 1, data.size(), fp); + fclose(fp); + ScopedFileDeleter ext_deleter(filename); + std::string ext_path = ToUTF8String(filename); OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); @@ -479,7 +478,7 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = splits_proto.add_external_data(); loc->set_key("location"); - loc->set_value(ext_file.path); + loc->set_value(ext_path); auto* offset = splits_proto.add_external_data(); offset->set_key("offset"); offset->set_value("0"); diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index fc76eba475917..aeb440b05f283 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -3,7 +3,7 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" -#include +#include "test/util/include/file_util.h" namespace onnxruntime { namespace test { @@ -1087,16 +1087,15 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { #if !defined(ORT_NO_EXCEPTIONS) TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { - // RAII helper: creates a dummy binary file on construction, removes it on destruction. - struct ScopedFile { - std::string path; - ScopedFile(const std::string& p, size_t n) : path(p) { - std::ofstream ofs(path, std::ios::binary); - std::vector data(n, 0); - ofs.write(data.data(), static_cast(n)); - } - ~ScopedFile() { std::remove(path.c_str()); } - } ext_file("tree_regressor_test_ext_values.bin", 12); // 3 x float + // Create a unique temp file so the ONNX checker passes file-existence validation. + PathString filename(ORT_TSTR("ext_data_XXXXXX")); + FILE* fp = nullptr; + CreateTestFile(fp, filename); + std::vector data(12, 0); // 3 x float + fwrite(data.data(), 1, data.size(), fp); + fclose(fp); + ScopedFileDeleter ext_deleter(filename); + std::string ext_path = ToUTF8String(filename); OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain); @@ -1128,7 +1127,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = values_proto.add_external_data(); loc->set_key("location"); - loc->set_value(ext_file.path); + loc->set_value(ext_path); auto* offset = values_proto.add_external_data(); offset->set_key("offset"); offset->set_value("0"); From 042e42eb71e1884579496ca169b4517fa8d6c8d5 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 26 Jun 2026 12:30:08 -0700 Subject: [PATCH 6/9] Fix CI: add CUDA EP external data guard, fix TreeEnsemble test dims - Add HasExternalData check to CUDA EP non-plugin LabelEncoder path (GetAttrOrTensor and TryGetScalarTensorAttribute) so error messages are consistent across all EPs. - Remove CUDA EP exclusion from LabelEncoder tests since all EPs now reject external data uniformly. - Fix TreeEnsemble opset 5 test: nodes_splits dims must match the number of elements in nodes_featureids and other node attributes (changed from 3 to 1). --- onnxruntime/core/providers/cuda/ml/label_encoder.cc | 4 ++++ .../test/providers/cpu/ml/label_encoder_test.cc | 12 +++--------- .../test/providers/cpu/ml/tree_ensembler_test.cc | 8 ++++---- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/onnxruntime/core/providers/cuda/ml/label_encoder.cc b/onnxruntime/core/providers/cuda/ml/label_encoder.cc index e159d6e7c90cc..af687ae48fcc1 100644 --- a/onnxruntime/core/providers/cuda/ml/label_encoder.cc +++ b/onnxruntime/core/providers/cuda/ml/label_encoder.cc @@ -71,6 +71,8 @@ static bool TryGetScalarTensorAttribute(const OpKernelInfo& info, const std::str auto* attr_tensor_proto = GetTensorProto(attr_tensor_holder); auto result = info.GetAttr(tensor_name, attr_tensor_proto); if (result.IsOK() && utils::HasDataType(*attr_tensor_proto)) { + ORT_ENFORCE(!utils::HasExternalData(*attr_tensor_proto), + "Tensor attribute ", tensor_name, " with external data is not supported."); const auto [raw_data, raw_data_len] = GetRawData(*attr_tensor_proto); result = utils::UnpackTensor(*attr_tensor_proto, raw_data, raw_data_len, &value, 1); ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack tensor attribute ", attr_name); @@ -117,6 +119,8 @@ static std::vector GetAttrOrTensor(const OpKernelInfo& info, const std::strin } else { ORT_ENFORCE(result.IsOK(), "LabelEncoder is missing attribute ", tensor_name, " or ", name); } + ORT_ENFORCE(!utils::HasExternalData(*attr_tensor_proto), + "Tensor attribute ", tensor_name, " with external data is not supported."); SafeInt element_count(1); for (auto dim : attr_tensor_proto->dims()) { element_count *= dim; diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index 80e7fc1630c2a..a52b75c5d1cc1 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -819,9 +819,7 @@ TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { test.AddInput("X", {1, 2}, {1, 2}); test.AddOutput("Y", {1, 2}, {10, 20}); - // CUDA EP uses a different code path that doesn't hit this issue, exclude it. - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", - {kCudaExecutionProvider}); + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); } TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { @@ -837,9 +835,7 @@ TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { test.AddInput("X", {1, 2}, {1, 3}); test.AddOutput("Y", {1, 2}, {10, 0}); - // CUDA EP uses a different code path that doesn't hit this issue, exclude it. - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", - {kCudaExecutionProvider}); + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); } TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { @@ -868,9 +864,7 @@ TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { test.AddInput("X", {1, 2}, {1, 2}); test.AddOutput("Y", {1, 2}, {10, 20}); - // CUDA EP uses a different code path that doesn't hit this issue, exclude it. - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported", - {kCudaExecutionProvider}); + test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); } #endif // !defined(ORT_NO_EXCEPTIONS) diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index 213c77be555e2..dbf1ee6f23cc9 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -462,7 +462,7 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { PathString filename(ORT_TSTR("ext_data_XXXXXX")); FILE* fp = nullptr; CreateTestFile(fp, filename); - std::vector data(12, 0); // 3 x float + std::vector data(4, 0); // 1 x float fwrite(data.data(), 1, data.size(), fp); fclose(fp); ScopedFileDeleter ext_deleter(filename); @@ -470,11 +470,11 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); - // nodes_splits with external data location + // nodes_splits with external data location (1 node) ONNX_NAMESPACE::TensorProto splits_proto; splits_proto.set_name("nodes_splits"); splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); - splits_proto.add_dims(3); + splits_proto.add_dims(1); splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = splits_proto.add_external_data(); loc->set_key("location"); @@ -484,7 +484,7 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { offset->set_value("0"); auto* length = splits_proto.add_external_data(); length->set_key("length"); - length->set_value("12"); + length->set_value("4"); test.AddAttribute("nodes_splits", splits_proto); // Minimal valid structure for remaining attributes From 2e0397628ba1b80a94629b579dfb74742aa21328 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 26 Jun 2026 14:33:04 -0700 Subject: [PATCH 7/9] Validate and inline external data in node tensor attributes Move external data validation from per-kernel checks to a centralized graph-level pass in ConvertInitializersIntoOrtValues(). This: - Validates file paths using ValidateExternalDataPath (same as initializers) - Rejects in-memory external data references for node attributes - Reads validated external data from disk and inlines it as raw_data - Removes per-kernel HasExternalData guards (CPU, CUDA) since data is now inlined before kernels construct - Uses InlinedHashSet for validated paths - Uses utils::HasTensor/HasTensors utilities Tests updated to verify in-memory reference rejection and valid external data inlining. Uses proper kTensorProtoNativeEndianMemoryAddressTag constant. --- onnxruntime/core/graph/graph.cc | 69 +++++++++++++++- .../core/providers/cpu/ml/label_encoder.h | 4 - .../providers/cpu/ml/tree_ensemble_helper.cc | 3 - .../core/providers/cuda/ml/label_encoder.cc | 4 - .../providers/cpu/ml/label_encoder_test.cc | 82 ++++++++++--------- .../providers/cpu/ml/tree_ensembler_test.cc | 24 ++---- .../providers/cpu/ml/treeregressor_test.cc | 27 ++---- 7 files changed, 126 insertions(+), 87 deletions(-) diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index fe2df6a87d124..8d91603fc0429 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -3839,7 +3839,7 @@ Status Graph::ConvertInitializersIntoOrtValues() { FindAllSubgraphs(all_subgraphs); const auto& model_path = GetModel().ModelPath(); - std::unordered_set validated_external_data_paths; + InlinedHashSet validated_external_data_paths; auto put_weights_maybe_in_memory_func = [&](Graph& graph) -> Status { // if we have any initializers that are not in memory, put them there. @@ -3903,7 +3903,72 @@ Status Graph::ConvertInitializersIntoOrtValues() { return Status::OK(); }; - return ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func); + ORT_RETURN_IF_ERROR(ForThisAndAllSubgraphs(all_subgraphs, put_weights_maybe_in_memory_func)); + + // Validate and inline external data in node tensor attributes. + // In-memory references are rejected (no legitimate source creates them for attributes). + // File-based external data paths are validated, read from disk, and inlined as raw_data + // so all EPs (including plugins) can access attribute data uniformly. + auto inline_external_attr_tensors_func = [&](Graph& graph) -> Status { + for (auto& node : graph.Nodes()) { + for (auto& [attr_name, attr_proto] : node.GetMutableAttributes()) { + if (utils::HasTensor(attr_proto)) { + auto* tensor_proto = attr_proto.mutable_t(); + if (utils::HasExternalData(*tensor_proto)) { + ORT_RETURN_IF(utils::HasExternalDataInMemory(*tensor_proto), + "Node '", node.Name(), "' attribute '", attr_name, + "' contains an in-memory external data reference, which is not permitted ", + "for node attributes."); + + std::unique_ptr external_data_info; + ORT_RETURN_IF_ERROR( + onnxruntime::ExternalDataInfo::Create(tensor_proto->external_data(), external_data_info)); + const auto& location = external_data_info->GetRelPath(); + + if (validated_external_data_paths.count(location) == 0) { + ORT_RETURN_IF_ERROR(utils::ValidateExternalDataPath(model_path, location)); + validated_external_data_paths.insert(location); + } + + // Read external data and inline it into the TensorProto. + std::vector buffer; + ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, model_path, buffer)); + tensor_proto->clear_external_data(); + tensor_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); + utils::SetRawDataInTensorProto(*tensor_proto, buffer.data(), buffer.size()); + } + } else if (utils::HasTensors(attr_proto)) { + for (auto& tensor_proto : *attr_proto.mutable_tensors()) { + if (utils::HasExternalData(tensor_proto)) { + ORT_RETURN_IF(utils::HasExternalDataInMemory(tensor_proto), + "Node '", node.Name(), "' attribute '", attr_name, + "' contains an in-memory external data reference, which is not permitted ", + "for node attributes."); + + std::unique_ptr external_data_info; + ORT_RETURN_IF_ERROR( + onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info)); + const auto& location = external_data_info->GetRelPath(); + + if (validated_external_data_paths.count(location) == 0) { + ORT_RETURN_IF_ERROR(utils::ValidateExternalDataPath(model_path, location)); + validated_external_data_paths.insert(location); + } + + std::vector buffer; + ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(tensor_proto, model_path, buffer)); + tensor_proto.clear_external_data(); + tensor_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); + utils::SetRawDataInTensorProto(tensor_proto, buffer.data(), buffer.size()); + } + } + } + } + } + return Status::OK(); + }; + + return ForThisAndAllSubgraphs(all_subgraphs, inline_external_attr_tensors_func); } void Graph::SetName(const std::string& name) { diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.h b/onnxruntime/core/providers/cpu/ml/label_encoder.h index 7c6bd35ceae18..6cdde84fdb082 100644 --- a/onnxruntime/core/providers/cpu/ml/label_encoder.h +++ b/onnxruntime/core/providers/cpu/ml/label_encoder.h @@ -119,8 +119,6 @@ std::vector GetAttribute(const OpKernelInfo& info, const std::string& name, c } else { ORT_ENFORCE(result.IsOK(), "LabelEncoder is missing attribute ", tensor_name, " or ", name); } - ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto), - "Tensor attribute ", tensor_name, " with external data is not supported."); SafeInt element_count(1); for (auto dim : attr_tensor_proto.dims()) { element_count *= dim; @@ -137,8 +135,6 @@ T GetDefault(const OpKernelInfo& info, const std::string& attr_name, const T& ba ONNX_NAMESPACE::TensorProto attr_tensor_proto; auto result = info.GetAttr("default_tensor", &attr_tensor_proto); if (result.IsOK() && utils::HasDataType(attr_tensor_proto)) { - ORT_ENFORCE(!utils::HasExternalData(attr_tensor_proto), - "Tensor attribute default_tensor with external data is not supported."); T default_value; result = utils::UnpackTensor(attr_tensor_proto, std::filesystem::path(), &default_value, 1); ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack default tensor ", attr_name); diff --git a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc index 5655be6c6bbbe..399dfd56b93c6 100644 --- a/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc +++ b/onnxruntime/core/providers/cpu/ml/tree_ensemble_helper.cc @@ -19,9 +19,6 @@ Status GetAnyVectorAttrsOrDefault(const OpKernelInfo& info, const std::string& n ONNX_NAMESPACE::TensorProto proto; auto result = info.GetAttr(name, &proto); - ORT_RETURN_IF(utils::HasExternalData(proto), - "Tensor attribute ", name, " with external data is not supported."); - SafeInt n_elements(1); for (auto dim : proto.dims()) { n_elements *= dim; diff --git a/onnxruntime/core/providers/cuda/ml/label_encoder.cc b/onnxruntime/core/providers/cuda/ml/label_encoder.cc index af687ae48fcc1..e159d6e7c90cc 100644 --- a/onnxruntime/core/providers/cuda/ml/label_encoder.cc +++ b/onnxruntime/core/providers/cuda/ml/label_encoder.cc @@ -71,8 +71,6 @@ static bool TryGetScalarTensorAttribute(const OpKernelInfo& info, const std::str auto* attr_tensor_proto = GetTensorProto(attr_tensor_holder); auto result = info.GetAttr(tensor_name, attr_tensor_proto); if (result.IsOK() && utils::HasDataType(*attr_tensor_proto)) { - ORT_ENFORCE(!utils::HasExternalData(*attr_tensor_proto), - "Tensor attribute ", tensor_name, " with external data is not supported."); const auto [raw_data, raw_data_len] = GetRawData(*attr_tensor_proto); result = utils::UnpackTensor(*attr_tensor_proto, raw_data, raw_data_len, &value, 1); ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack tensor attribute ", attr_name); @@ -119,8 +117,6 @@ static std::vector GetAttrOrTensor(const OpKernelInfo& info, const std::strin } else { ORT_ENFORCE(result.IsOK(), "LabelEncoder is missing attribute ", tensor_name, " or ", name); } - ORT_ENFORCE(!utils::HasExternalData(*attr_tensor_proto), - "Tensor attribute ", tensor_name, " with external data is not supported."); SafeInt element_count(1); for (auto dim : attr_tensor_proto->dims()) { element_count *= dim; diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index a52b75c5d1cc1..f6cd4582f6e49 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -5,6 +5,7 @@ #include "core/framework/tensorprotoutils.h" #include "test/providers/provider_test_utils.h" #include "test/util/include/file_util.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { @@ -763,12 +764,11 @@ TEST(LabelEncoder, EmptyInputOpset4) { #if !defined(ORT_NO_EXCEPTIONS) // RAII helper that creates a unique dummy binary file and removes it on destruction. -static std::pair CreateExternalDataFile(size_t num_bytes) { +static std::pair CreateExternalDataFile(const void* data, size_t num_bytes) { PathString filename(ORT_TSTR("ext_data_XXXXXX")); FILE* fp = nullptr; CreateTestFile(fp, filename); - std::vector data(num_bytes, 0); - fwrite(data.data(), 1, num_bytes, fp); + fwrite(data, 1, num_bytes, fp); fclose(fp); return {ToUTF8String(filename), ScopedFileDeleter(filename)}; } @@ -794,13 +794,34 @@ static ONNX_NAMESPACE::TensorProto MakeExternalInt64TensorProto(const std::strin return proto; } -TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { - auto [ext_path, ext_deleter] = CreateExternalDataFile(16); // 2 x int64 +// Helper: create a TensorProto with in-memory external data reference (should be rejected). +static ONNX_NAMESPACE::TensorProto MakeInMemoryExternalTensorProto(const std::string& name, + int64_t num_elements) { + ONNX_NAMESPACE::TensorProto proto; + proto.set_name(name); + proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + proto.add_dims(num_elements); + proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); + auto* loc = proto.add_external_data(); + loc->set_key("location"); + loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); + auto* offset = proto.add_external_data(); + offset->set_key("offset"); + offset->set_value("12345678"); + auto* length = proto.add_external_data(); + length->set_key("length"); + length->set_value(std::to_string(num_elements * static_cast(sizeof(int64_t)))); + return proto; +} + +// Valid external data in tensor attributes should be loaded and inlined during session initialization. +TEST(LabelEncoder, ExternalDataInKeysTensorOpset4) { + std::vector key_data{1, 2}; + auto [ext_path, ext_deleter] = CreateExternalDataFile(key_data.data(), key_data.size() * sizeof(int64_t)); OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); test.AddAttribute("keys_tensor", MakeExternalInt64TensorProto("keys_tensor", ext_path, 2)); - // Normal values_tensor ONNX_NAMESPACE::TensorProto values_proto; values_proto.set_name("values_tensor"); values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); @@ -813,46 +834,27 @@ TEST(LabelEncoder, RejectsExternalDataInKeysTensorOpset4) { default_proto.set_name("default_tensor"); default_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); default_proto.add_dims(1); - default_proto.add_int64_data(0); + default_proto.add_int64_data(42); test.AddAttribute("default_tensor", default_proto); - test.AddInput("X", {1, 2}, {1, 2}); - test.AddOutput("Y", {1, 2}, {10, 20}); - - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); -} - -TEST(LabelEncoder, RejectsExternalDataInDefaultTensorOpset4) { - auto [ext_path, ext_deleter] = CreateExternalDataFile(8); // 1 x int64 - - OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); - - test.AddAttribute("keys_int64s", std::vector{1, 2}); - test.AddAttribute("values_int64s", std::vector{10, 20}); + test.AddInput("X", {1, 3}, {1, 2, 99}); + test.AddOutput("Y", {1, 3}, {10, 20, 42}); - test.AddAttribute("default_tensor", MakeExternalInt64TensorProto("default_tensor", ext_path, 1)); - - test.AddInput("X", {1, 2}, {1, 3}); - test.AddOutput("Y", {1, 2}, {10, 0}); - - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); + test.Run(); } -TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { - auto [ext_path, ext_deleter] = CreateExternalDataFile(16); // 2 x int64 - +// In-memory external data references in node attributes are rejected during initialization. +TEST(LabelEncoder, RejectsInMemoryExternalDataInKeysTensorOpset4) { OpTester test("LabelEncoder", 4, onnxruntime::kMLDomain); + test.AddAttribute("keys_tensor", MakeInMemoryExternalTensorProto("keys_tensor", 2)); - // Normal keys_tensor - ONNX_NAMESPACE::TensorProto keys_proto; - keys_proto.set_name("keys_tensor"); - keys_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - keys_proto.add_dims(2); - keys_proto.add_int64_data(1); - keys_proto.add_int64_data(2); - test.AddAttribute("keys_tensor", keys_proto); - - test.AddAttribute("values_tensor", MakeExternalInt64TensorProto("values_tensor", ext_path, 2)); + ONNX_NAMESPACE::TensorProto values_proto; + values_proto.set_name("values_tensor"); + values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + values_proto.add_dims(2); + values_proto.add_int64_data(10); + values_proto.add_int64_data(20); + test.AddAttribute("values_tensor", values_proto); ONNX_NAMESPACE::TensorProto default_proto; default_proto.set_name("default_tensor"); @@ -864,7 +866,7 @@ TEST(LabelEncoder, RejectsExternalDataInValuesTensorOpset4) { test.AddInput("X", {1, 2}, {1, 2}); test.AddOutput("Y", {1, 2}, {10, 20}); - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); + test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); } #endif // !defined(ORT_NO_EXCEPTIONS) diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index dbf1ee6f23cc9..3bb6241a1aa03 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -3,7 +3,8 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" -#include "test/util/include/file_util.h" +#include "core/framework/tensorprotoutils.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { @@ -457,20 +458,11 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { // calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) -TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { - // Create a unique temp file so the ONNX checker passes file-existence validation. - PathString filename(ORT_TSTR("ext_data_XXXXXX")); - FILE* fp = nullptr; - CreateTestFile(fp, filename); - std::vector data(4, 0); // 1 x float - fwrite(data.data(), 1, data.size(), fp); - fclose(fp); - ScopedFileDeleter ext_deleter(filename); - std::string ext_path = ToUTF8String(filename); - +// In-memory external data references in node attributes are rejected during initialization. +TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); - // nodes_splits with external data location (1 node) + // nodes_splits with in-memory external data reference (should be rejected) ONNX_NAMESPACE::TensorProto splits_proto; splits_proto.set_name("nodes_splits"); splits_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); @@ -478,10 +470,10 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { splits_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = splits_proto.add_external_data(); loc->set_key("location"); - loc->set_value(ext_path); + loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); auto* offset = splits_proto.add_external_data(); offset->set_key("offset"); - offset->set_value("0"); + offset->set_value("12345678"); auto* length = splits_proto.add_external_data(); length->set_key("length"); length->set_value("4"); @@ -518,7 +510,7 @@ TEST(MLOpTest, TreeEnsembleRejectsExternalDataInTensorAttribute) { test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); + test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); } #endif // !defined(ORT_NO_EXCEPTIONS) diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index aeb440b05f283..25f08d55209e1 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -3,7 +3,8 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" -#include "test/util/include/file_util.h" +#include "core/framework/tensorprotoutils.h" +#include "core/platform/path_lib.h" namespace onnxruntime { namespace test { @@ -1082,21 +1083,11 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { test.Run(OpTester::ExpectResult::kExpectFailure, "base_values should have 0 or 2 values."); } -// External data in tensor attributes is not supported. The kernel must reject such attributes -// during construction. In no-exceptions builds, ORT_ENFORCE/ORT_THROW_IF_ERROR calls abort(). +// In-memory external data references in node attributes are rejected during initialization. +// In no-exceptions builds, ORT_RETURN_IF calls abort(). #if !defined(ORT_NO_EXCEPTIONS) -TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { - // Create a unique temp file so the ONNX checker passes file-existence validation. - PathString filename(ORT_TSTR("ext_data_XXXXXX")); - FILE* fp = nullptr; - CreateTestFile(fp, filename); - std::vector data(12, 0); // 3 x float - fwrite(data.data(), 1, data.size(), fp); - fclose(fp); - ScopedFileDeleter ext_deleter(filename); - std::string ext_path = ToUTF8String(filename); - +TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute) { OpTester test("TreeEnsembleRegressor", 3, onnxruntime::kMLDomain); // Minimal valid tree structure @@ -1119,7 +1110,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { test.AddAttribute("target_weights", std::vector{1.f, 2.f}); test.AddAttribute("n_targets", static_cast(1)); - // Use nodes_values_as_tensor (without setting nodes_values) with external data location + // nodes_values_as_tensor with in-memory external data reference (should be rejected) ONNX_NAMESPACE::TensorProto values_proto; values_proto.set_name("nodes_values_as_tensor"); values_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT); @@ -1127,10 +1118,10 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = values_proto.add_external_data(); loc->set_key("location"); - loc->set_value(ext_path); + loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); auto* offset = values_proto.add_external_data(); offset->set_key("offset"); - offset->set_value("0"); + offset->set_value("12345678"); auto* length = values_proto.add_external_data(); length->set_key("length"); length->set_value("12"); @@ -1140,7 +1131,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsExternalDataInTensorAttribute) { test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "external data is not supported"); + test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); } #endif // !defined(ORT_NO_EXCEPTIONS) From 9a1b6ea2de2e7e6d5bee122513c7403d056440a2 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 26 Jun 2026 15:05:39 -0700 Subject: [PATCH 8/9] Fix expected error messages for in-memory external data rejection tests The ONNX checker (checker::check_node) runs during Graph::Resolve() and validates external data file paths before our inlining code runs. It rejects the in-memory tag as 'not regular file'. Update expected error substrings and add comments explaining this. --- onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc | 5 ++++- onnxruntime/test/providers/cpu/ml/treeregressor_test.cc | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index 3bb6241a1aa03..da802d459b1f2 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -459,6 +459,9 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { #if !defined(ORT_NO_EXCEPTIONS) // In-memory external data references in node attributes are rejected during initialization. +// The ONNX checker (checker::check_node) runs during Graph::Resolve() before our +// inlining code and validates that external data locations are regular files. +// There is no way to disable this check. The error message comes from the ONNX checker. TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); @@ -510,7 +513,7 @@ TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); } #endif // !defined(ORT_NO_EXCEPTIONS) diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index 25f08d55209e1..8e1ca9caf8c98 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -1084,6 +1084,9 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { } // In-memory external data references in node attributes are rejected during initialization. +// The ONNX checker (checker::check_node) runs during Graph::Resolve() before our +// inlining code and validates that external data locations are regular files. +// There is no way to disable this check. The error message comes from the ONNX checker. // In no-exceptions builds, ORT_RETURN_IF calls abort(). #if !defined(ORT_NO_EXCEPTIONS) @@ -1118,7 +1121,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute values_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL); auto* loc = values_proto.add_external_data(); loc->set_key("location"); - loc->set_value(ToUTF8String(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)); + loc->set_value(ToUTF8String(utils::kTensorProtoNativeEndianMemoryAddressTag)); auto* offset = values_proto.add_external_data(); offset->set_key("offset"); offset->set_value("12345678"); @@ -1131,7 +1134,7 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); - test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); } #endif // !defined(ORT_NO_EXCEPTIONS) From 83e018f5094d0f1e8e2fd5a4b4e442bad80def8a Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 26 Jun 2026 15:13:40 -0700 Subject: [PATCH 9/9] Address PR review comments - Add node/attribute context to ValidateExternalDataPath and UnpackInitializerData error messages in graph.cc - Update test comments to accurately describe ONNX checker behavior (fail_check aborts in no-exceptions builds, not ORT_ENFORCE/ORT_RETURN_IF) - Add ORT_ENFORCE checks on fwrite/fclose in CreateExternalDataFile - Add inline comments next to expected error messages explaining they originate from the ONNX checker during Graph::Resolve() --- onnxruntime/core/graph/graph.cc | 32 ++++++++++++++++--- .../providers/cpu/ml/label_encoder_test.cc | 16 ++++++---- .../providers/cpu/ml/tree_ensembler_test.cc | 12 +++---- .../providers/cpu/ml/treeregressor_test.cc | 10 +++--- 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 8d91603fc0429..4af88ca51fc1f 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -3926,13 +3926,25 @@ Status Graph::ConvertInitializersIntoOrtValues() { const auto& location = external_data_info->GetRelPath(); if (validated_external_data_paths.count(location) == 0) { - ORT_RETURN_IF_ERROR(utils::ValidateExternalDataPath(model_path, location)); + auto path_status = utils::ValidateExternalDataPath(model_path, location); + if (!path_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", path_status.ErrorMessage()); + } validated_external_data_paths.insert(location); } // Read external data and inline it into the TensorProto. std::vector buffer; - ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, model_path, buffer)); + { + auto unpack_status = utils::UnpackInitializerData(*tensor_proto, model_path, buffer); + if (!unpack_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", unpack_status.ErrorMessage()); + } + } tensor_proto->clear_external_data(); tensor_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); utils::SetRawDataInTensorProto(*tensor_proto, buffer.data(), buffer.size()); @@ -3951,12 +3963,24 @@ Status Graph::ConvertInitializersIntoOrtValues() { const auto& location = external_data_info->GetRelPath(); if (validated_external_data_paths.count(location) == 0) { - ORT_RETURN_IF_ERROR(utils::ValidateExternalDataPath(model_path, location)); + auto path_status = utils::ValidateExternalDataPath(model_path, location); + if (!path_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", path_status.ErrorMessage()); + } validated_external_data_paths.insert(location); } std::vector buffer; - ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(tensor_proto, model_path, buffer)); + { + auto unpack_status = utils::UnpackInitializerData(tensor_proto, model_path, buffer); + if (!unpack_status.IsOK()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + "Node '", node.Name(), "' attribute '", attr_name, + "': ", unpack_status.ErrorMessage()); + } + } tensor_proto.clear_external_data(); tensor_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_DEFAULT); utils::SetRawDataInTensorProto(tensor_proto, buffer.data(), buffer.size()); diff --git a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc index f6cd4582f6e49..8ed2dc744a29c 100644 --- a/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc +++ b/onnxruntime/test/providers/cpu/ml/label_encoder_test.cc @@ -758,9 +758,10 @@ TEST(LabelEncoder, EmptyInputOpset4) { test.Run(); } -// External data in tensor attributes is not supported. The kernel must reject such attributes -// during construction. These tests verify the rejection. -// In no-exceptions builds, ORT_ENFORCE calls abort() so these tests cannot run. +// External data in tensor attributes: file-based external data is validated and inlined +// during session initialization. In-memory references are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) // RAII helper that creates a unique dummy binary file and removes it on destruction. @@ -768,8 +769,9 @@ static std::pair CreateExternalDataFile(const vo PathString filename(ORT_TSTR("ext_data_XXXXXX")); FILE* fp = nullptr; CreateTestFile(fp, filename); - fwrite(data, 1, num_bytes, fp); - fclose(fp); + size_t written = fwrite(data, 1, num_bytes, fp); + ORT_ENFORCE(written == num_bytes, "Failed to write external data file"); + ORT_ENFORCE(fclose(fp) == 0, "Failed to close external data file"); return {ToUTF8String(filename), ScopedFileDeleter(filename)}; } @@ -866,7 +868,9 @@ TEST(LabelEncoder, RejectsInMemoryExternalDataInKeysTensorOpset4) { test.AddInput("X", {1, 2}, {1, 2}); test.AddOutput("Y", {1, 2}, {10, 20}); - test.Run(OpTester::ExpectResult::kExpectFailure, "in-memory external data reference"); + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. + test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); } #endif // !defined(ORT_NO_EXCEPTIONS) diff --git a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc index da802d459b1f2..76b4b5f478ddf 100644 --- a/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc +++ b/onnxruntime/test/providers/cpu/ml/tree_ensembler_test.cc @@ -454,14 +454,12 @@ TEST(MLOpTest, TreeEnsembleIssue25400) { test.Run(); } -// External data in tensor attributes is not supported. In no-exceptions builds, the enforcement -// calls abort() so these tests cannot run. +// In-memory external data references in node attributes are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) -// In-memory external data references in node attributes are rejected during initialization. -// The ONNX checker (checker::check_node) runs during Graph::Resolve() before our -// inlining code and validates that external data locations are regular files. -// There is no way to disable this check. The error message comes from the ONNX checker. +// In-memory external data references in node attributes are rejected during Graph::Resolve(). TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { OpTester test("TreeEnsemble", 5, onnxruntime::kMLDomain); @@ -513,6 +511,8 @@ TEST(MLOpTest, TreeEnsembleRejectsInMemoryExternalDataInTensorAttribute) { test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); } diff --git a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc index 8e1ca9caf8c98..988da6e68c69b 100644 --- a/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc +++ b/onnxruntime/test/providers/cpu/ml/treeregressor_test.cc @@ -1083,11 +1083,9 @@ TEST(MLOpTest, TreeEnsembleRegressorBaseValuesWrongSize) { test.Run(OpTester::ExpectResult::kExpectFailure, "base_values should have 0 or 2 values."); } -// In-memory external data references in node attributes are rejected during initialization. -// The ONNX checker (checker::check_node) runs during Graph::Resolve() before our -// inlining code and validates that external data locations are regular files. -// There is no way to disable this check. The error message comes from the ONNX checker. -// In no-exceptions builds, ORT_RETURN_IF calls abort(). +// In-memory external data references in node attributes are rejected by the ONNX checker +// during Graph::Resolve() (it validates that external data locations are regular files). +// In no-exceptions builds, the ONNX checker's fail_check calls abort() so these tests cannot run. #if !defined(ORT_NO_EXCEPTIONS) TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute) { @@ -1134,6 +1132,8 @@ TEST(MLOpTest, TreeEnsembleRegressorRejectsInMemoryExternalDataInTensorAttribute test.AddInput("X", {1, 1}, X); test.AddOutput("Y", {1, 1}, {0.f}); + // Error originates from the ONNX checker (checker::check_node) during Graph::Resolve(). + // There is no way to disable this check. test.Run(OpTester::ExpectResult::kExpectFailure, "is not regular file"); }