From 18ce68945093e42f53d96f0b92cc4f2afe8c28bb Mon Sep 17 00:00:00 2001 From: Konstantin Vedernikov <75157521+scanhex12@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:56:11 +0000 Subject: [PATCH 1/2] Merge pull request #101278 from Desel72/fix/issue-100565 Fix Iceberg `ALTER TABLE UPDATE` exception on partitioned tables. --- .../DataLakes/Iceberg/Mutations.cpp | 6 ++++ ...update_partition_key_empty_block.reference | 1 + ...ceberg_update_partition_key_empty_block.sh | 34 +++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.reference create mode 100755 tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.sh diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp index 9b4e242087eb..eccb127d42b6 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp @@ -171,6 +171,9 @@ static std::optional writeDataFiles( bool has_any_rows = false; while (executor.pull(block)) { + if (block.rows() == 0) + continue; + has_any_rows = true; Chunk chunk(block.getColumns(), block.rows()); auto partition_result = getPartitionedChunks(chunk, chunk_partitioner); @@ -277,6 +280,9 @@ static std::optional writeDataFiles( Block block; while (executor.pull(block)) { + if (block.rows() == 0) + continue; + auto data_block = getNonVirtualColumns(block); Chunk chunk(data_block.getColumns(), data_block.rows()); auto partition_result = getPartitionedChunks(chunk, chunk_partitioner); diff --git a/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.reference b/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.reference new file mode 100644 index 000000000000..f2ad6c76f011 --- /dev/null +++ b/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.reference @@ -0,0 +1 @@ +c diff --git a/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.sh b/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.sh new file mode 100755 index 000000000000..c078efef4ec8 --- /dev/null +++ b/tests/queries/0_stateless/04061_iceberg_update_partition_key_empty_block.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# Regression test for https://github.com/ClickHouse/ClickHouse/issues/100565 +# Two consecutive ALTER TABLE UPDATE on a partitioned Iceberg table should not +# cause a "Logical error: 'partitions_count > 0'" exception. The second UPDATE +# reads the original data file whose rows were position-deleted by the first +# UPDATE, producing an empty block that must be skipped during partitioning. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +TABLE="t_${CLICKHOUSE_DATABASE}_${RANDOM}" +TABLE_PATH="${USER_FILES_PATH}/${TABLE}/" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS ${TABLE}" +${CLICKHOUSE_CLIENT} --query " + CREATE TABLE ${TABLE} (c0 String) + ENGINE = IcebergLocal('${TABLE_PATH}', 'Parquet') + PARTITION BY (c0) +" + +${CLICKHOUSE_CLIENT} --allow_insert_into_iceberg=1 --query "INSERT INTO ${TABLE} VALUES ('a')" + +${CLICKHOUSE_CLIENT} --allow_insert_into_iceberg=1 --query "ALTER TABLE ${TABLE} UPDATE c0 = 'b' WHERE TRUE" + +# This second UPDATE previously triggered: Logical error: 'partitions_count > 0' +${CLICKHOUSE_CLIENT} --allow_insert_into_iceberg=1 --query "ALTER TABLE ${TABLE} UPDATE c0 = 'c' WHERE TRUE" + +${CLICKHOUSE_CLIENT} --query "SELECT c0 FROM ${TABLE}" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS ${TABLE}" +rm -rf "${TABLE_PATH}" From 241666c7689d9ee56bfb3bfeb7ddd4316eea20db Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 13 Apr 2026 10:59:24 +0000 Subject: [PATCH 2/2] Merge pull request #102337 from Desel72/fix/issue-101916 Fix Iceberg mutation crash: unwrap LowCardinality/Nullable before Avro serialization --- .../DataLakes/Iceberg/Mutations.cpp | 25 +++++++++-- ...2_iceberg_update_low_cardinality.reference | 4 ++ .../04092_iceberg_update_low_cardinality.sh | 42 +++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/04092_iceberg_update_low_cardinality.reference create mode 100755 tests/queries/0_stateless/04092_iceberg_update_low_cardinality.sh diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp index eccb127d42b6..40c57700c668 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Mutations.cpp @@ -1,7 +1,10 @@ +#include #include #include #include #include +#include +#include #include #include #include @@ -99,7 +102,7 @@ static Block getPositionDeleteFileSampleBlock() return Block(delete_file_columns_desc); } -static Block getNonVirtualColumns(const Block & block) +static Block getNonVirtualColumns(const Block & block, bool remove_low_cardinality = false) { auto virtual_columns_desc = VirtualColumnUtils::getVirtualNamesForFileLikeStorage(); std::unordered_set virtual_columns; @@ -110,7 +113,14 @@ static Block getNonVirtualColumns(const Block & block) { if (virtual_columns.contains(block.getNames()[i])) continue; - columns.push_back(ColumnWithTypeAndName(block.getColumns()[i], block.getDataTypes()[i], block.getNames()[i])); + auto col_type = block.getDataTypes()[i]; + auto col_data = block.getColumns()[i]; + if (remove_low_cardinality) + { + col_type = removeLowCardinality(col_type); + col_data = col_data->convertToFullColumnIfLowCardinality(); + } + columns.push_back(ColumnWithTypeAndName(col_data, col_type, block.getNames()[i])); } return Block(columns); } @@ -218,12 +228,19 @@ static std::optional writeDataFiles( col_data_filename.column = partition_chunk.getColumns()[col_data_filename_index]; col_position.column = partition_chunk.getColumns()[col_position_index]; + /// The virtual column `_iceberg_metadata_file_path` may arrive as + /// LowCardinality(String) from the pipeline, but the position delete + /// file format expects plain String. Unwrap it. + col_data_filename.column = col_data_filename.column->convertToFullColumnIfLowCardinality(); + col_data_filename.type = removeLowCardinality(col_data_filename.type); + if (const ColumnNullable * nullable = typeid_cast(col_position.column.get())) { const auto & null_map = nullable->getNullMapData(); if (std::any_of(null_map.begin(), null_map.end(), [](UInt8 x) { return x != 0; })) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected null _row_number"); col_position.column = nullable->getNestedColumnPtr(); + col_position.type = removeNullable(col_position.type); } auto col_data_filename_without_namespaces = ColumnString::create(); @@ -283,7 +300,9 @@ static std::optional writeDataFiles( if (block.rows() == 0) continue; - auto data_block = getNonVirtualColumns(block); + /// Strip virtual columns and unwrap LowCardinality to ensure the + /// block types are compatible with the Avro serializer schema. + auto data_block = getNonVirtualColumns(block, /*remove_low_cardinality=*/ true); Chunk chunk(data_block.getColumns(), data_block.rows()); auto partition_result = getPartitionedChunks(chunk, chunk_partitioner); diff --git a/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.reference b/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.reference new file mode 100644 index 000000000000..77f818794703 --- /dev/null +++ b/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.reference @@ -0,0 +1,4 @@ +2 +3 +10 +OK diff --git a/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.sh b/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.sh new file mode 100755 index 000000000000..fdda2c322580 --- /dev/null +++ b/tests/queries/0_stateless/04092_iceberg_update_low_cardinality.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ICEBERG_TABLE_PATH="${CLICKHOUSE_USER_FILES}/lakehouses/${CLICKHOUSE_DATABASE}_t0" + +# Cleanup +rm -rf "${ICEBERG_TABLE_PATH}" + +# Test 1: ALTER UPDATE on IcebergLocal with default (Parquet) format. +# The position delete file writer used to crash because `_iceberg_metadata_file_path` +# arrives as LowCardinality(String) from the pipeline, but the Avro/Parquet serializer +# expected plain String. +${CLICKHOUSE_CLIENT} --query " + SET allow_experimental_insert_into_iceberg = 1; + CREATE TABLE t0 (c0 Int) ENGINE = IcebergLocal('${ICEBERG_TABLE_PATH}'); + INSERT INTO t0 VALUES (1), (2), (3); + ALTER TABLE t0 UPDATE c0 = 10 WHERE c0 = 1; + SELECT c0 FROM t0 ORDER BY c0; +" + +# Cleanup +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t0" +rm -rf "${ICEBERG_TABLE_PATH}" + +# Test 2: ALTER UPDATE on IcebergLocal with Avro format. +# This was the exact reproduction case from the issue. +${CLICKHOUSE_CLIENT} --query " + SET allow_experimental_insert_into_iceberg = 1; + CREATE TABLE t0 (c0 Int) ENGINE = IcebergLocal('${ICEBERG_TABLE_PATH}', 'Avro'); + INSERT INTO t0 VALUES (1); + ALTER TABLE t0 UPDATE c0 = 1 WHERE TRUE; +" + +echo "OK" + +# Cleanup +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS t0" +rm -rf "${ICEBERG_TABLE_PATH}"