From 1cacf9890778affa98a2ad608abd8396d909d30c Mon Sep 17 00:00:00 2001 From: Daniil Ivanik <61067749+divanik@users.noreply.github.com> Date: Tue, 7 Apr 2026 14:53:33 +0000 Subject: [PATCH 1/2] Merge pull request #100645 from ClickHouse/divanik/add_bytes_and_rows_count_to_iceberg_data_object Parse record_count and size_bytes fields from iceberg manifest file --- src/Core/ProtocolDefines.h | 3 +- .../Common/AvroForIcebergDeserializer.cpp | 15 +- .../DataLakes/Iceberg/Constant.h | 2 + .../Iceberg/IcebergDataObjectInfo.cpp | 52 +++++- .../DataLakes/Iceberg/IcebergDataObjectInfo.h | 2 + .../DataLakes/Iceberg/ManifestFile.h | 10 +- .../StorageObjectStorageSource.cpp | 20 +++ .../gtest_datalake_table_state_serde.cpp | 96 +++++++++++ .../test_file_stats_logging.py | 159 ++++++++++++++++++ 9 files changed, 353 insertions(+), 6 deletions(-) create mode 100644 tests/integration/test_storage_iceberg_with_spark/test_file_stats_logging.py diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 45e6463a4225..ae7cff5d0e38 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -38,7 +38,8 @@ static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_DATA_LAKE_ME static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_ICEBERG_METADATA = 3; static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_FILE_BUCKETS_INFO = 4; static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_EXCLUDED_ROWS = 5; -static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_EXCLUDED_ROWS; +static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_ICEBERG_FILE_STATS = 6; +static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_ICEBERG_FILE_STATS; static constexpr auto DATA_LAKE_TABLE_STATE_SNAPSHOT_PROTOCOL_VERSION = 1; diff --git a/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp b/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp index c27bb2ac7117..fb295c895d50 100644 --- a/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Common/AvroForIcebergDeserializer.cpp @@ -236,6 +236,9 @@ ParsedManifestFileEntryPtr AvroForIcebergDeserializer::createParsedManifestFileE sort_order_id = sort_order_id_value.safeGet(); } + const auto record_count = getValueFromRowByName(row_index, c_data_file_record_count, TypeIndex::Int64).safeGet(); + const auto file_size_in_bytes = getValueFromRowByName(row_index, c_data_file_file_size_in_bytes, TypeIndex::Int64).safeGet(); + switch (content_type) { case FileContentType::DATA: { @@ -253,7 +256,9 @@ ParsedManifestFileEntryPtr AvroForIcebergDeserializer::createParsedManifestFileE /*lower_reference_data_file_path_ = */ std::nullopt, /*upper_reference_data_file_path_ = */ std::nullopt, /*equality_ids*/ std::nullopt, - sort_order_id); + sort_order_id, + record_count, + file_size_in_bytes); } case FileContentType::POSITION_DELETE: { /// reference_file_path can be absent in schema for some reason, though it is present in specification: https://iceberg.apache.org/spec/#manifests @@ -296,7 +301,9 @@ ParsedManifestFileEntryPtr AvroForIcebergDeserializer::createParsedManifestFileE lower_reference_data_file_path, upper_reference_data_file_path, /*equality_ids*/ std::nullopt, - /*sort_order_id = */ std::nullopt); + /*sort_order_id = */ std::nullopt, + record_count, + file_size_in_bytes); } case FileContentType::EQUALITY_DELETE: { std::vector equality_ids; @@ -325,7 +332,9 @@ ParsedManifestFileEntryPtr AvroForIcebergDeserializer::createParsedManifestFileE /*lower_reference_data_file_path_ = */ std::nullopt, /*upper_reference_data_file_path_ = */ std::nullopt, equality_ids, - /*sort_order_id = */ std::nullopt); + /*sort_order_id = */ std::nullopt, + record_count, + file_size_in_bytes); } } } diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h index 6f1e455e351a..23f205736a5d 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Constant.h @@ -149,6 +149,8 @@ DEFINE_ICEBERG_FIELD_COMPOUND(data_file, lower_bounds); DEFINE_ICEBERG_FIELD_COMPOUND(data_file, upper_bounds); DEFINE_ICEBERG_FIELD_COMPOUND(data_file, referenced_data_file); DEFINE_ICEBERG_FIELD_COMPOUND(data_file, sort_order_id); +DEFINE_ICEBERG_FIELD_COMPOUND(data_file, record_count); +DEFINE_ICEBERG_FIELD_COMPOUND(data_file, file_size_in_bytes); /// Fallback defaults for snapshot retention policy when table properties are absent. /// These values follow the Java reference implementation; the Iceberg spec does not diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.cpp index 9a963feb3d3f..7046483523d5 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.cpp @@ -43,7 +43,9 @@ IcebergDataObjectInfo::IcebergDataObjectInfo( data_manifest_file_entry_->sequence_number, data_manifest_file_entry_->parsed_entry->file_format, /* position_deletes_objects */ {}, - /* equality_deletes_objects */ {}} + /* equality_deletes_objects */ {}, + data_manifest_file_entry_->parsed_entry->record_count, + data_manifest_file_entry_->parsed_entry->file_size_in_bytes} { } @@ -137,6 +139,27 @@ void IcebergObjectSerializableInfo::serializeForClusterFunctionProtocol(WriteBuf } } } + if (protocol_version >= DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_ICEBERG_FILE_STATS) + { + if (record_count.has_value()) + { + writeVarUInt(1, out); + writeVarInt(*record_count, out); + } + else + { + writeVarUInt(0, out); + } + if (file_size_in_bytes.has_value()) + { + writeVarUInt(1, out); + writeVarInt(*file_size_in_bytes, out); + } + else + { + writeVarUInt(0, out); + } + } } void IcebergObjectSerializableInfo::deserializeForClusterFunctionProtocol(ReadBuffer & in, size_t protocol_version) @@ -193,6 +216,33 @@ void IcebergObjectSerializableInfo::deserializeForClusterFunctionProtocol(ReadBu } } } + if (protocol_version >= DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_ICEBERG_FILE_STATS) + { + size_t has_record_count = 0; + readVarUInt(has_record_count, in); + if (has_record_count) + { + Int64 value = 0; + readVarInt(value, in); + record_count = value; + } + else + { + record_count = std::nullopt; + } + size_t has_file_size = 0; + readVarUInt(has_file_size, in); + if (has_file_size) + { + Int64 value = 0; + readVarInt(value, in); + file_size_in_bytes = value; + } + else + { + file_size_in_bytes = std::nullopt; + } + } } void IcebergObjectSerializableInfo::checkVersion(size_t protocol_version) const diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.h index ce8e551d6e32..75c1bb9fba3d 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.h +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergDataObjectInfo.h @@ -22,6 +22,8 @@ struct IcebergObjectSerializableInfo String file_format; std::vector position_deletes_objects; std::vector equality_deletes_objects; + std::optional record_count; + std::optional file_size_in_bytes; void serializeForClusterFunctionProtocol(WriteBuffer & out, size_t protocol_version) const; void deserializeForClusterFunctionProtocol(ReadBuffer & in, size_t protocol_version); diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h index 3a1bccc53411..3a1b100a5e66 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.h @@ -96,6 +96,10 @@ struct ParsedManifestFileEntry : boost::noncopyable /// Data file is sorted with this sort_order_id (can be read from metadata.json) std::optional sort_order_id; + /// File-level statistics from Iceberg manifest (required fields per spec) + Int64 record_count; + Int64 file_size_in_bytes; + ParsedManifestFileEntry( FileContentType content_type_, String file_path_key_, @@ -110,7 +114,9 @@ struct ParsedManifestFileEntry : boost::noncopyable std::optional lower_reference_data_file_path_, std::optional upper_reference_data_file_path_, std::optional> equality_ids_, - std::optional sort_order_id_) + std::optional sort_order_id_, + Int64 record_count_, + Int64 file_size_in_bytes_) : content_type(content_type_) , file_path_key(std::move(file_path_key_)) , row_number(row_number_) @@ -125,6 +131,8 @@ struct ParsedManifestFileEntry : boost::noncopyable , upper_reference_data_file_path(std::move(upper_reference_data_file_path_)) , equality_ids(std::move(equality_ids_)) , sort_order_id(sort_order_id_) + , record_count(record_count_) + , file_size_in_bytes(file_size_in_bytes_) { } }; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index a6567a237634..374795064495 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,23 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } +void logIcebergFileStats(const ObjectInfoPtr & object_info, const LoggerPtr & log) +{ +#if USE_AVRO + if (auto iceberg_object = std::dynamic_pointer_cast(object_info)) + { + const auto & info = iceberg_object->info; + if (info.record_count.has_value()) + LOG_TEST(log, "Iceberg record_count for '{}': {}", object_info->getPath(), *info.record_count); + if (info.file_size_in_bytes.has_value()) + LOG_TEST(log, "Iceberg file_size_in_bytes for '{}': {}", object_info->getPath(), *info.file_size_in_bytes); + } +#else + UNUSED(object_info); + UNUSED(log); +#endif +} + StorageObjectStorageSource::StorageObjectStorageSource( String name_, ObjectStoragePtr object_storage_, @@ -878,6 +896,8 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info->getObjectMetadata()->size_bytes, object_info->getFileFormat().value_or(configuration->getFormat())); + logIcebergFileStats(object_info, log); + bool use_native_reader_v3 = format_settings.has_value() ? format_settings->parquet.use_native_reader_v3 : context_->getSettingsRef()[Setting::input_format_parquet_use_native_reader_v3]; diff --git a/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp b/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp index 1b5e3aa2c233..cf5b8befdaf2 100644 --- a/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp +++ b/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include using namespace DB; @@ -60,3 +62,97 @@ TEST(DatalakeStateSerde, DataLakeStateSerde) } } + +TEST(DatalakeStateSerde, IcebergObjectSerializableInfoRoundTrip) +{ + Iceberg::IcebergObjectSerializableInfo info; + info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.underlying_format_read_schema_id = 42; + info.schema_id_relevant_to_iterator = 7; + info.sequence_number = 123456; + info.file_format = "PARQUET"; + info.position_deletes_objects = {{"s3://bucket/deletes/pos1.parquet", "PARQUET", "s3://bucket/path/to/file.parquet"}}; + info.equality_deletes_objects = {{"s3://bucket/deletes/eq1.parquet", "PARQUET", std::vector{1, 2, 3}, 42}}; + info.record_count = 100500; + info.file_size_in_bytes = 999888777; + + String str; + { + WriteBufferFromString write_buffer{str}; + info.serializeForClusterFunctionProtocol(write_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION); + } + ReadBufferFromMemory read_buffer(str.data(), str.size()); + Iceberg::IcebergObjectSerializableInfo deserialized; + deserialized.deserializeForClusterFunctionProtocol(read_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION); + + ASSERT_TRUE(read_buffer.eof()); + ASSERT_EQ(deserialized.data_object_file_path_key, info.data_object_file_path_key); + ASSERT_EQ(deserialized.underlying_format_read_schema_id, info.underlying_format_read_schema_id); + ASSERT_EQ(deserialized.schema_id_relevant_to_iterator, info.schema_id_relevant_to_iterator); + ASSERT_EQ(deserialized.sequence_number, info.sequence_number); + ASSERT_EQ(deserialized.file_format, info.file_format); + ASSERT_EQ(deserialized.position_deletes_objects.size(), 1); + ASSERT_EQ(deserialized.equality_deletes_objects.size(), 1); + ASSERT_TRUE(deserialized.record_count.has_value()); + ASSERT_EQ(*deserialized.record_count, 100500); + ASSERT_TRUE(deserialized.file_size_in_bytes.has_value()); + ASSERT_EQ(*deserialized.file_size_in_bytes, 999888777); +} + + +TEST(DatalakeStateSerde, IcebergObjectSerializableInfoWithoutFileStats) +{ + Iceberg::IcebergObjectSerializableInfo info; + info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.underlying_format_read_schema_id = 1; + info.schema_id_relevant_to_iterator = 1; + info.sequence_number = 0; + info.file_format = "PARQUET"; + info.record_count = 42; + info.file_size_in_bytes = 1024; + + /// Serialize at protocol version 5 (before file stats were added) + String str; + { + WriteBufferFromString write_buffer{str}; + info.serializeForClusterFunctionProtocol(write_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_EXCLUDED_ROWS); + } + + /// Deserialize at protocol version 5 — the new fields should be absent + ReadBufferFromMemory read_buffer(str.data(), str.size()); + Iceberg::IcebergObjectSerializableInfo deserialized; + deserialized.deserializeForClusterFunctionProtocol(read_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION_WITH_EXCLUDED_ROWS); + + ASSERT_TRUE(read_buffer.eof()); + ASSERT_EQ(deserialized.data_object_file_path_key, info.data_object_file_path_key); + ASSERT_FALSE(deserialized.record_count.has_value()); + ASSERT_FALSE(deserialized.file_size_in_bytes.has_value()); +} + + +TEST(DatalakeStateSerde, IcebergObjectSerializableInfoNulloptFileStats) +{ + /// Test round-trip when the fields are explicitly nullopt + Iceberg::IcebergObjectSerializableInfo info; + info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.underlying_format_read_schema_id = 1; + info.schema_id_relevant_to_iterator = 1; + info.sequence_number = 0; + info.file_format = "PARQUET"; + info.record_count = std::nullopt; + info.file_size_in_bytes = std::nullopt; + + String str; + { + WriteBufferFromString write_buffer{str}; + info.serializeForClusterFunctionProtocol(write_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION); + } + ReadBufferFromMemory read_buffer(str.data(), str.size()); + Iceberg::IcebergObjectSerializableInfo deserialized; + deserialized.deserializeForClusterFunctionProtocol(read_buffer, DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION); + + ASSERT_TRUE(read_buffer.eof()); + ASSERT_FALSE(deserialized.record_count.has_value()); + ASSERT_FALSE(deserialized.file_size_in_bytes.has_value()); +} + diff --git a/tests/integration/test_storage_iceberg_with_spark/test_file_stats_logging.py b/tests/integration/test_storage_iceberg_with_spark/test_file_stats_logging.py new file mode 100644 index 000000000000..742560fc4716 --- /dev/null +++ b/tests/integration/test_storage_iceberg_with_spark/test_file_stats_logging.py @@ -0,0 +1,159 @@ +import logging +import pytest +import re + +from helpers.iceberg_utils import ( + default_upload_directory, + write_iceberg_from_df, + generate_data, + get_creation_expression, + get_uuid_str, +) + +from helpers.config_cluster import minio_secret_key + + +def parse_logged_file_stats(instance, query_ids): + """ + Parse Iceberg file stats from server log, filtering by any of the given query_ids. + Returns dict: {file_path: {"record_count": int, "file_size_in_bytes": int}} + """ + stats = {} + + record_count_logs = instance.grep_in_log("Iceberg record_count for") + for log_line in record_count_logs.splitlines(): + if not any(qid in log_line for qid in query_ids): + continue + match = re.search(r"Iceberg record_count for '([^']+)': (\d+)", log_line) + if match: + path, count = match.group(1), int(match.group(2)) + stats.setdefault(path, {})["record_count"] = count + + file_size_logs = instance.grep_in_log("Iceberg file_size_in_bytes for") + for log_line in file_size_logs.splitlines(): + if not any(qid in log_line for qid in query_ids): + continue + match = re.search( + r"Iceberg file_size_in_bytes for '([^']+)': (\d+)", log_line + ) + if match: + path, size = match.group(1), int(match.group(2)) + stats.setdefault(path, {})["file_size_in_bytes"] = size + + return stats + + +@pytest.mark.parametrize("run_on_cluster", [False, True]) +@pytest.mark.parametrize("format_version", ["1", "2"]) +@pytest.mark.parametrize("storage_type", ["s3"]) +def test_iceberg_file_stats_logging( + started_cluster_iceberg_with_spark, format_version, storage_type, run_on_cluster +): + """ + Verify that record_count and file_size_in_bytes parsed from Iceberg manifest + match actual file row counts and sizes in object storage. + When run_on_cluster=True, also verifies the values survive cluster function + protocol serialization. + """ + instance = started_cluster_iceberg_with_spark.instances["node1"] + spark = started_cluster_iceberg_with_spark.spark_session + bucket = started_cluster_iceberg_with_spark.minio_bucket + + cluster_suffix = "_cluster" if run_on_cluster else "" + TABLE_NAME = ( + "test_file_stats_logging_" + + format_version + + "_" + + storage_type + + cluster_suffix + + "_" + + get_uuid_str() + ) + + NUM_ROWS = 100 + + write_iceberg_from_df( + spark, + generate_data(spark, 0, NUM_ROWS), + TABLE_NAME, + mode="overwrite", + format_version=format_version, + ) + + default_upload_directory( + started_cluster_iceberg_with_spark, + storage_type, + f"/iceberg_data/default/{TABLE_NAME}/", + f"/iceberg_data/default/{TABLE_NAME}/", + ) + + table_function_expr = get_creation_expression( + storage_type, + TABLE_NAME, + started_cluster_iceberg_with_spark, + table_function=True, + run_on_cluster=run_on_cluster, + ) + + query_id = TABLE_NAME + "_query_id" + result = instance.query( + f"SELECT * FROM {table_function_expr}", query_id=query_id + ) + assert len(result.strip().split("\n")) == NUM_ROWS + + # Collect all query_ids related to this query across all nodes + # (in cluster mode, worker nodes get their own query_id but share initial_query_id) + all_query_ids = {query_id} + for node_name, node_instance in started_cluster_iceberg_with_spark.instances.items(): + node_instance.query("SYSTEM FLUSH LOGS") + related_ids = node_instance.query( + f"SELECT query_id FROM system.query_log WHERE initial_query_id = '{query_id}'" + ).strip() + for qid in related_ids.splitlines(): + if qid: + all_query_ids.add(qid) + logging.info(f"All related query_ids: {all_query_ids}") + + # Collect file stats from all nodes, filtered by any related query_id + all_stats = {} + for node_name, node_instance in started_cluster_iceberg_with_spark.instances.items(): + node_stats = parse_logged_file_stats(node_instance, all_query_ids) + logging.info(f"[{node_name}] Parsed file stats from logs: {node_stats}") + all_stats.update(node_stats) + + assert len(all_stats) > 0, "Expected at least one file with logged stats" + + for file_path, file_stats in all_stats.items(): + assert "record_count" in file_stats, f"Missing record_count for {file_path}" + assert ( + "file_size_in_bytes" in file_stats + ), f"Missing file_size_in_bytes for {file_path}" + + logged_record_count = file_stats["record_count"] + logged_file_size = file_stats["file_size_in_bytes"] + + # Verify record_count matches actual row count by reading the parquet file directly + actual_row_count = int( + instance.query( + f"SELECT count() FROM s3('http://minio1:9001/{bucket}/{file_path}', 'minio', '{minio_secret_key}', 'Parquet')" + ).strip() + ) + assert logged_record_count == actual_row_count, ( + f"record_count mismatch for {file_path}: " + f"logged={logged_record_count}, actual={actual_row_count}" + ) + + # Verify file_size_in_bytes matches actual object size in S3 + actual_size = started_cluster_iceberg_with_spark.minio_client.stat_object( + bucket, file_path + ).size + assert logged_file_size == actual_size, ( + f"file_size_in_bytes mismatch for {file_path}: " + f"logged={logged_file_size}, actual={actual_size}" + ) + + # Verify total record_count sums to at least NUM_ROWS + total_record_count = sum(s.get("record_count", 0) for s in all_stats.values()) + assert total_record_count >= NUM_ROWS, ( + f"Total record_count ({total_record_count}) < expected ({NUM_ROWS})" + ) From 3cb64e52ee67212a5319b9d529e7172317c75434 Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Sat, 9 May 2026 14:51:31 +0200 Subject: [PATCH 2/2] fix gtest to use string instead of IcebergPath --- .../tests/gtest_datalake_table_state_serde.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp b/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp index cf5b8befdaf2..7b962a491af1 100644 --- a/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp +++ b/src/Storages/ObjectStorage/tests/gtest_datalake_table_state_serde.cpp @@ -66,7 +66,7 @@ TEST(DatalakeStateSerde, DataLakeStateSerde) TEST(DatalakeStateSerde, IcebergObjectSerializableInfoRoundTrip) { Iceberg::IcebergObjectSerializableInfo info; - info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.data_object_file_path_key = "s3://bucket/path/to/file.parquet"; info.underlying_format_read_schema_id = 42; info.schema_id_relevant_to_iterator = 7; info.sequence_number = 123456; @@ -103,7 +103,7 @@ TEST(DatalakeStateSerde, IcebergObjectSerializableInfoRoundTrip) TEST(DatalakeStateSerde, IcebergObjectSerializableInfoWithoutFileStats) { Iceberg::IcebergObjectSerializableInfo info; - info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.data_object_file_path_key = "s3://bucket/path/to/file.parquet"; info.underlying_format_read_schema_id = 1; info.schema_id_relevant_to_iterator = 1; info.sequence_number = 0; @@ -134,7 +134,7 @@ TEST(DatalakeStateSerde, IcebergObjectSerializableInfoNulloptFileStats) { /// Test round-trip when the fields are explicitly nullopt Iceberg::IcebergObjectSerializableInfo info; - info.data_object_file_path_key = DB::Iceberg::IcebergPathFromMetadata::deserialize("s3://bucket/path/to/file.parquet"); + info.data_object_file_path_key = "s3://bucket/path/to/file.parquet"; info.underlying_format_read_schema_id = 1; info.schema_id_relevant_to_iterator = 1; info.sequence_number = 0;