diff --git a/ci/workflows/backport_branches.py b/ci/workflows/backport_branches.py index 86172c171eb4..e476f4b4c15e 100644 --- a/ci/workflows/backport_branches.py +++ b/ci/workflows/backport_branches.py @@ -49,7 +49,7 @@ enable_job_filtering_by_changes=True, enable_cache=True, enable_report=True, - enable_automerge=True, + enable_automerge=False, enable_cidb=True, enable_commit_status_on_failure=True, pre_hooks=[ diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index cbff830dc5e5..2a1297acb22c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,13 +2,13 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54522) +SET(VERSION_REVISION 54523) SET(VERSION_MAJOR 25) SET(VERSION_MINOR 8) -SET(VERSION_PATCH 22) -SET(VERSION_GITHASH 099badce0f7744623716d4efa2971d3e1c63d1cf) -SET(VERSION_DESCRIBE v25.8.22.20001.altinityantalya) -SET(VERSION_STRING 25.8.22.20001.altinityantalya) +SET(VERSION_PATCH 23) +SET(VERSION_GITHASH 6d29497525664acca46a1d1cd0d5787e9ad0857d) +SET(VERSION_DESCRIBE v25.8.23.20001.altinityantalya) +SET(VERSION_STRING 25.8.23.20001.altinityantalya) # end of autochange SET(VERSION_TWEAK 20001) diff --git a/docker/keeper/Dockerfile.distroless b/docker/keeper/Dockerfile.distroless index 37491d02f883..1adc6a362498 100644 --- a/docker/keeper/Dockerfile.distroless +++ b/docker/keeper/Dockerfile.distroless @@ -3,8 +3,8 @@ # The entrypoint is the compiled `clickhouse docker-init --keeper` subcommand. # # Build targets: -# production — gcr.io/distroless/cc-debian12:nonroot (default) -# debug — gcr.io/distroless/cc-debian12:debug-nonroot (includes busybox shell) +# production — gcr.io/distroless/cc-debian13:nonroot (default) +# debug — gcr.io/distroless/cc-debian13:debug-nonroot (includes busybox shell) # # Usage: # docker build -f Dockerfile.distroless --target production -t clickhouse/clickhouse-keeper:distroless . @@ -80,10 +80,17 @@ RUN clickhouse local -q 'SELECT 1' >/dev/null 2>&1 && exit 0 || : \ && apt-get install --yes --no-install-recommends dirmngr gnupg2 \ && mkdir -p /etc/apt/sources.list.d \ && GNUPGHOME=$(mktemp -d) \ - && GNUPGHOME="$GNUPGHOME" gpg --batch --no-default-keyring \ - --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ - --keyserver hkp://keyserver.ubuntu.com:80 \ - --recv-keys 3a9ea1193a97b548be1457d48919f6bd2b48d754 \ + && ( set +e; \ + for KEYSERVER in \ + hkp://keys.openpgp.org:80 \ + hkp://pgp.mit.edu:80 \ + hkp://keyserver.ubuntu.com:80; do \ + GNUPGHOME="$GNUPGHOME" gpg --batch --no-default-keyring \ + --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ + --keyserver "$KEYSERVER" \ + --recv-keys 3a9ea1193a97b548be1457d48919f6bd2b48d754 && break; \ + done || exit 1 \ + ) \ && rm -rf "$GNUPGHOME" \ && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ @@ -138,8 +145,8 @@ RUN mkdir -p \ # ────────────────────────────────────────────────────────────────────────────── # Stage 2: Production distroless image. # ────────────────────────────────────────────────────────────────────────────── -# Pinned 2026-03-10. Refresh: docker pull gcr.io/distroless/cc-debian12:nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian12:nonroot -FROM gcr.io/distroless/cc-debian12:nonroot@sha256:7e5b8df2f4d36f5599ef4ab856d7d444922531709becb03f3368c6d797d0a5eb AS production +# Pinned 2026-04-26. Refresh: docker pull gcr.io/distroless/cc-debian13:nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian13:nonroot +FROM gcr.io/distroless/cc-debian13:nonroot@sha256:8f960b7fc6a5d6e28bb07f982655925d6206678bd9a6cde2ad00ddb5e2077d78 AS production COPY --from=ch-builder /output/ / @@ -161,8 +168,8 @@ ENTRYPOINT ["/usr/bin/clickhouse", "docker-init", "--keeper"] # Stage 3: Debug image — same as production but includes the busybox shell # at /busybox/sh for interactive troubleshooting. # ────────────────────────────────────────────────────────────────────────────── -# Pinned 2026-03-10. Refresh: docker pull gcr.io/distroless/cc-debian12:debug-nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian12:debug-nonroot -FROM gcr.io/distroless/cc-debian12:debug-nonroot@sha256:641f055b21555d5e4f77b7f1f3caca80840a76c4f7ae5df36a159a436941d2c2 AS debug +# Pinned 2026-04-26. Refresh: docker pull gcr.io/distroless/cc-debian13:debug-nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian13:debug-nonroot +FROM gcr.io/distroless/cc-debian13:debug-nonroot@sha256:55dd32378f7562c890342098a04726f4ef386bb86c87bec3db6ed4eef27d99fb AS debug COPY --from=ch-builder /output/ / diff --git a/docker/server/Dockerfile.distroless b/docker/server/Dockerfile.distroless index bf6dbb903ebc..bfadfe523c36 100644 --- a/docker/server/Dockerfile.distroless +++ b/docker/server/Dockerfile.distroless @@ -3,8 +3,8 @@ # The entrypoint is the compiled `clickhouse docker-init` subcommand. # # Build targets: -# production — gcr.io/distroless/cc-debian12:nonroot (default) -# debug — gcr.io/distroless/cc-debian12:debug-nonroot (includes busybox shell) +# production — gcr.io/distroless/cc-debian13:nonroot (default) +# debug — gcr.io/distroless/cc-debian13:debug-nonroot (includes busybox shell) # # Usage: # docker build -f Dockerfile.distroless --target production -t clickhouse/clickhouse-server:distroless . @@ -87,10 +87,17 @@ RUN clickhouse local -q 'SELECT 1' >/dev/null 2>&1 && exit 0 || : \ && apt-get install --yes --no-install-recommends dirmngr gnupg2 \ && mkdir -p /etc/apt/sources.list.d \ && GNUPGHOME=$(mktemp -d) \ - && GNUPGHOME="$GNUPGHOME" gpg --batch --no-default-keyring \ - --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ - --keyserver hkp://keyserver.ubuntu.com:80 \ - --recv-keys 3a9ea1193a97b548be1457d48919f6bd2b48d754 \ + && ( set +e; \ + for KEYSERVER in \ + hkp://keys.openpgp.org:80 \ + hkp://pgp.mit.edu:80 \ + hkp://keyserver.ubuntu.com:80; do \ + GNUPGHOME="$GNUPGHOME" gpg --batch --no-default-keyring \ + --keyring /usr/share/keyrings/clickhouse-keyring.gpg \ + --keyserver "$KEYSERVER" \ + --recv-keys 3a9ea1193a97b548be1457d48919f6bd2b48d754 && break; \ + done || exit 1 \ + ) \ && rm -rf "$GNUPGHOME" \ && chmod +r /usr/share/keyrings/clickhouse-keyring.gpg \ && echo "${REPOSITORY}" > /etc/apt/sources.list.d/clickhouse.list \ @@ -156,8 +163,8 @@ RUN { \ # ────────────────────────────────────────────────────────────────────────────── # Stage 2: Production distroless image. # ────────────────────────────────────────────────────────────────────────────── -# Pinned 2026-03-10. Refresh: docker pull gcr.io/distroless/cc-debian12:nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian12:nonroot -FROM gcr.io/distroless/cc-debian12:nonroot@sha256:7e5b8df2f4d36f5599ef4ab856d7d444922531709becb03f3368c6d797d0a5eb AS production +# Pinned 2026-04-26. Refresh: docker pull gcr.io/distroless/cc-debian13:nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian13:nonroot +FROM gcr.io/distroless/cc-debian13:nonroot@sha256:8f960b7fc6a5d6e28bb07f982655925d6206678bd9a6cde2ad00ddb5e2077d78 AS production COPY --from=ch-builder /output/ / @@ -179,8 +186,8 @@ ENTRYPOINT ["/usr/bin/clickhouse", "docker-init"] # Stage 3: Debug image — same as production but includes the busybox shell # at /busybox/sh for interactive troubleshooting. # ────────────────────────────────────────────────────────────────────────────── -# Pinned 2026-03-10. Refresh: docker pull gcr.io/distroless/cc-debian12:debug-nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian12:debug-nonroot -FROM gcr.io/distroless/cc-debian12:debug-nonroot@sha256:641f055b21555d5e4f77b7f1f3caca80840a76c4f7ae5df36a159a436941d2c2 AS debug +# Pinned 2026-04-26. Refresh: docker pull gcr.io/distroless/cc-debian13:debug-nonroot && docker inspect --format='{{index .RepoDigests 0}}' gcr.io/distroless/cc-debian13:debug-nonroot +FROM gcr.io/distroless/cc-debian13:debug-nonroot@sha256:55dd32378f7562c890342098a04726f4ef386bb86c87bec3db6ed4eef27d99fb AS debug COPY --from=ch-builder /output/ / diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index d5d6d8876817..7e6a1e356c19 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1505,16 +1505,26 @@ bool ColumnObject::isFinalized() const void ColumnObject::getExtremes(DB::Field & min, DB::Field & max) const { + min = Object(); + max = Object(); + if (empty()) + return; + + size_t min_idx = 0; + size_t max_idx = 0; + + size_t end = size(); + for (size_t i = 1; i < end; ++i) { - min = Object(); - max = Object(); - } - else - { - get(0, min); - get(0, max); + if (compareAt(i, min_idx, *this, /* nan_direction_hint = */ 1) < 0) + min_idx = i; + else if (compareAt(i, max_idx, *this, /* nan_direction_hint = */ -1) > 0) + max_idx = i; } + + get(min_idx, min); + get(max_idx, max); } void ColumnObject::prepareForSquashing(const std::vector & source_columns, size_t factor) diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 216407b8907d..d467b17a2e3c 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -132,7 +132,8 @@ static struct InitFiu REGULAR(patch_parts_reverse_column_order) \ ONCE(smt_commit_exception_before_op) \ ONCE(backup_add_empty_memory_table) \ - REGULAR(refresh_task_stop_racing_for_running_refresh) + REGULAR(refresh_task_stop_racing_for_running_refresh) \ + REGULAR(wide_part_writer_fail_in_add_streams) namespace FailPoints diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 5864b6a6e585..e21836f408bf 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -160,7 +160,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( for (const auto & type : flattened_column.types) { if (settings.native_format && settings.format_settings && settings.format_settings->native.encode_types_in_binary_format) - encodeDataType(type); + encodeDataType(type, *stream); else writeStringBinary(type->getName(), *stream); } diff --git a/src/DataTypes/Serializations/SerializationDynamicHelpers.cpp b/src/DataTypes/Serializations/SerializationDynamicHelpers.cpp index 7736a875540d..8c4f0dd67f1b 100644 --- a/src/DataTypes/Serializations/SerializationDynamicHelpers.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicHelpers.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int INCORRECT_DATA; } namespace @@ -157,6 +158,17 @@ void fillDynamicColumn( for (size_t i = 0; i != indexes_data.size(); ++i) { auto index = indexes_data[i]; + if (index > null_index) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Incorrect index {} in indexes column of flattened Dynamic column at row {}: " + "the index should be in range [0, {}] (there are {} types, index {} is reserved for NULL values)", + static_cast(index), + i, + null_index, + flattened_column.types.size(), + null_index); + if (index == null_index) { local_discriminators.push_back(ColumnVariant::NULL_DISCRIMINATOR); diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 2b48109804cb..694dca66a262 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -793,6 +793,15 @@ ReturnType parseDateTimeBestEffortImpl( } res = *res_maybe; adjust_time_zone(); + + /// After timezone adjustment, the value may have shifted outside the valid range. + /// For example, "2106-02-07 06:28:15-01:00" is within range before adjustment, + /// but after converting to UTC it exceeds UINT32_MAX. + if constexpr (!is_64) + { + if (res < 0 || static_cast(res) > UINT32_MAX) + return false; + } } else { diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 08db59bfdb44..1e8e0d8e5119 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -536,6 +537,12 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, QueryAnalysisPass query_analysis_pass(table_expression); query_analysis_pass.run(filter_query_tree, query_context); + /// Optimize logical expressions in the filter, e.g. convert OR-chains of + /// equalities into IN (important for row policies that produce many + /// permissive conditions like `x = 1 OR x = 2 OR ... OR x = N`). + LogicalExpressionOptimizerPass logical_expression_optimizer_pass; + logical_expression_optimizer_pass.run(filter_query_tree, query_context); + return buildFilterInfo(std::move(filter_query_tree), table_expression, planner_context, std::move(table_expression_required_names_without_filter)); } diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 0177940c1118..49fabdcfc503 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -221,14 +221,12 @@ struct StatisticsStringRef parq::Statistics s; if (min.ptr == nullptr) return s; - if (static_cast(min.len) <= options.max_statistics_size) + if (static_cast(min.len) <= options.max_statistics_size + && static_cast(max.len) <= options.max_statistics_size) { s.__set_min_value(std::string(reinterpret_cast(min.ptr), static_cast(min.len))); - s.__set_is_min_value_exact(true); - } - if (static_cast(max.len) <= options.max_statistics_size) - { s.__set_max_value(std::string(reinterpret_cast(max.ptr), static_cast(max.len))); + s.__set_is_min_value_exact(true); s.__set_is_max_value_exact(true); } return s; @@ -251,7 +249,7 @@ struct StatisticsStringRef int t = memcmp(a.ptr, b.ptr, std::min(a.len, b.len)); if (t != 0) return t; - return a.len - b.len; + return int(a.len) - int(b.len); } }; @@ -834,6 +832,10 @@ void writeColumnImpl( if (options.write_page_index) { bool all_null_page = data_count == 0; + bool has_stats = page_stats.__isset.min_value && page_stats.__isset.max_value; + if (!all_null_page && !has_stats) + s.indexes.column_index_valid = false; + s.indexes.column_index.min_values.push_back(page_stats.min_value); s.indexes.column_index.max_values.push_back(page_stats.max_value); if (has_null_count) @@ -1276,6 +1278,9 @@ static void writePageIndex(FileWriteState & file, WriteBuffer & out) chassert(rg.column_indexes.size() == rg.row_group.columns.size()); for (size_t j = 0; j < rg.column_indexes.size(); ++j) { + if (!rg.column_indexes.at(j).column_index_valid) + continue; + auto & column = rg.row_group.columns.at(j); column.__set_column_index_offset(file.offset); size_t length = serializeThriftStruct(rg.column_indexes.at(j).column_index, out); diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h index 1571cada52d6..7db9118333ba 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.h +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -76,6 +76,9 @@ struct ColumnChunkIndexes { parq::ColumnIndex column_index; // if write_page_index parq::OffsetIndex offset_index; // if write_page_index + /// Set to false when a non-null page has stats dropped (e.g. value exceeded max_statistics_size). + /// When false, the column index must not be written because it would contain invalid bounds. + bool column_index_valid = true; parq::BloomFilterHeader bloom_filter_header; PODArray bloom_filter_data; // if write_bloom_filter, and not flushed yet }; diff --git a/src/Processors/tests/gtest_write_parquet_page_index.cpp b/src/Processors/tests/gtest_write_parquet_page_index.cpp index 789f3beca67a..9674703dcbfd 100644 --- a/src/Processors/tests/gtest_write_parquet_page_index.cpp +++ b/src/Processors/tests/gtest_write_parquet_page_index.cpp @@ -367,5 +367,47 @@ TEST(Parquet, WriteParquetPageIndexArrowEncoder) /// arrow doesn't write statistics to data page headers /*expect_statistics_in_page_headers*/ false); } + +/// Regression test for https://github.com/ClickHouse/ClickHouse/issues/103039 +/// When a page has a short min and a long max (exceeding max_statistics_size=4096), +/// the column index must not be written because it would contain invalid bounds +/// (e.g. min_value="a", max_value="" which violates min <= max). +TEST(Parquet, WriteParquetPageIndexOversizedStringStats) +{ + FormatSettings format_settings; + format_settings.parquet.row_group_rows = 10000; + format_settings.parquet.use_custom_encoder = true; + format_settings.parquet.parallel_encoding = false; + format_settings.parquet.write_page_index = true; + format_settings.parquet.data_page_size = 32; + + std::vector> values; + std::vector col; + col.push_back("a"); + col.push_back(String(5000, 'z')); + values.push_back(col); + + auto source = multiColumnsSource( + {std::make_shared()}, values, 1); + String path = "/tmp/test_oversized_stats.parquet"; + writeParquet(source, format_settings, path); + + auto reader = parquet::ParquetFileReader::OpenFile(path); + auto metadata = reader->metadata(); + + ASSERT_EQ(metadata->num_row_groups(), 1); + auto row_group = metadata->RowGroup(0); + ASSERT_EQ(row_group->num_columns(), 1); + + auto column_chunk = row_group->ColumnChunk(0); + auto column_index_location = column_chunk->GetColumnIndexLocation(); + auto offset_index_location = column_chunk->GetOffsetIndexLocation(); + + ASSERT_FALSE(column_index_location.has_value()); + + ASSERT_TRUE(offset_index_location.has_value()); + ASSERT_GT(offset_index_location.value().offset, 0); + ASSERT_GT(offset_index_location.value().length, 0); +} } #endif diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 15b555466233..1a786d469330 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include namespace DB @@ -28,6 +29,12 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int INCORRECT_FILE_NAME; + extern const int FAULT_INJECTED; +} + +namespace FailPoints +{ + extern const char wide_part_writer_fail_in_add_streams[]; } namespace @@ -191,7 +198,12 @@ void MergeTreeDataPartWriterWide::addStreams( query_write_settings.use_adaptive_write_buffer = settings.use_adaptive_write_buffer_for_dynamic_subcolumns && ISerialization::isDynamicSubcolumn(substream_path, substream_path.size()); query_write_settings.adaptive_write_buffer_initial_size = settings.adaptive_write_buffer_initial_size; - column_streams[stream_name] = std::make_unique>( + fiu_do_on(FailPoints::wide_part_writer_fail_in_add_streams, + { + throw Exception(ErrorCodes::FAULT_INJECTED, "Injected failure in Wide part writer addStreams"); + }); + + column_streams.emplace(stream_name, std::make_unique>( stream_name, data_part_storage, stream_name, DATA_FILE_EXTENSION, @@ -200,7 +212,7 @@ void MergeTreeDataPartWriterWide::addStreams( max_compress_block_size, marks_compression_codec, settings.marks_compress_block_size, - query_write_settings); + query_write_settings)); if (columns_to_load_marks.contains(name_and_type.name)) cached_marks.emplace(stream_name, std::make_unique()); @@ -384,7 +396,7 @@ void MergeTreeDataPartWriterWide::writeSingleMark( void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stream_with_mark, size_t rows_in_mark) { - auto & stream = *column_streams[stream_with_mark.stream_name]; + auto & stream = *column_streams.at(stream_with_mark.stream_name); WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; writeBinaryLittleEndian(stream_with_mark.mark.offset_in_compressed_file, marks_out); @@ -423,7 +435,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( if (is_offsets && offset_columns.contains(stream_name)) return; - auto & stream = *column_streams[stream_name]; + auto & stream = *column_streams.at(stream_name); /// There could already be enough data to compress into the new block. if (stream.compressed_hashing.offset() >= min_compress_block_size) @@ -818,8 +830,9 @@ void MergeTreeDataPartWriterWide::finish(bool sync) void MergeTreeDataPartWriterWide::cancel() noexcept { - for (auto & stream : column_streams) - stream.second->cancel(); + for (auto & stream : column_streams) + if (stream.second) + stream.second->cancel(); column_streams.clear(); serialization_states.clear(); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 064736620152..d85cb2413c66 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1089,6 +1089,7 @@ const char * auto_contributors[] { "Rafael Acevedo", "Rafael David Tinoco", "Rafael Roquetto", + "Rahul", "Rajakavitha Kodhandapani", "Rajkumar", "Rajkumar Varada", diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference index e44ce99ed46f..e0b2f5655db0 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.reference +++ b/tests/queries/0_stateless/02735_parquet_encoder.reference @@ -43,7 +43,7 @@ datetime Nullable(DateTime64(3, \'UTC\')) (1000000,NULL,NULL,'0','-1294970296') (1000000,NULL,NULL,'-2147483296','2147481000') (100000,900000,NULL,'100009','999999') -[(2,NULL,NULL,'','[]')] +[(2,NULL,NULL,NULL,NULL)] 1 1 0 1 5090915589685802007 diff --git a/tests/queries/0_stateless/04076_json_in_min_max_index_bug.reference b/tests/queries/0_stateless/04076_json_in_min_max_index_bug.reference new file mode 100644 index 000000000000..702c2663b6d3 --- /dev/null +++ b/tests/queries/0_stateless/04076_json_in_min_max_index_bug.reference @@ -0,0 +1,51 @@ +3 +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Expression ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_json_minmax_idx) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: idx_j + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 + Ranges: 1 +2 +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Expression ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_json_minmax_idx) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: idx_j + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 + Ranges: 1 +1 +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + Projection)) + Expression ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_json_minmax_idx) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 3/3 + Skip + Name: idx_j + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 1/3 + Ranges: 1 diff --git a/tests/queries/0_stateless/04076_json_in_min_max_index_bug.sql b/tests/queries/0_stateless/04076_json_in_min_max_index_bug.sql new file mode 100644 index 000000000000..cb754f3faa1e --- /dev/null +++ b/tests/queries/0_stateless/04076_json_in_min_max_index_bug.sql @@ -0,0 +1,21 @@ +-- Tags: no-parallel-replicas +-- Tag no-parallel-replicas: output of explain is different + +DROP TABLE IF EXISTS t_json_minmax_idx; + +CREATE TABLE t_json_minmax_idx (id UInt32, j JSON, INDEX idx_j j TYPE minmax GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity=1; + +INSERT INTO t_json_minmax_idx VALUES (1, '{"a":"1"}'), (2, '{"a":"2"}'), (3, '{"a":"3"}'); + +SET enable_analyzer = 1; +SET optimize_move_to_prewhere = 1; +SET query_plan_optimize_prewhere = 1; + +SELECT id FROM t_json_minmax_idx WHERE j > '{"a":"2"}'::JSON ORDER BY id; +EXPLAIN indexes=1 SELECT id FROM t_json_minmax_idx WHERE j > '{"a":"2"}'::JSON ORDER BY id; +SELECT id FROM t_json_minmax_idx WHERE j = '{"a":"2"}'::JSON ORDER BY id; +EXPLAIN indexes=1 SELECT id FROM t_json_minmax_idx WHERE j = '{"a":"2"}'::JSON ORDER BY id; +SELECT id FROM t_json_minmax_idx WHERE j < '{"a":"2"}'::JSON ORDER BY id; +EXPLAIN indexes=1 SELECT id FROM t_json_minmax_idx WHERE j < '{"a":"2"}'::JSON ORDER BY id; + +DROP TABLE IF EXISTS t_json_minmax_idx; diff --git a/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.reference b/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.reference new file mode 100644 index 000000000000..08839f6bb296 --- /dev/null +++ b/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.reference @@ -0,0 +1 @@ +200 diff --git a/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.sql b/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.sql new file mode 100644 index 000000000000..88a001fc33f0 --- /dev/null +++ b/tests/queries/0_stateless/04077_wide_part_writer_cancel_on_exception.sql @@ -0,0 +1,28 @@ +-- Tags: no-parallel, no-random-merge-tree-settings +-- Regression test: MergeTreeDataPartWriterWide::cancel must not SIGSEGV +-- when addStreams fails mid-way leaving no null entries in column_streams. + +DROP TABLE IF EXISTS t_wide_cancel; + +CREATE TABLE t_wide_cancel (a UInt64, b String, c Float64) +ENGINE = MergeTree ORDER BY a +SETTINGS min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0; + +-- Prevent background merges from racing with the failpoint. +SYSTEM STOP MERGES t_wide_cancel; + +INSERT INTO t_wide_cancel SELECT number, toString(number), number FROM numbers(100); +INSERT INTO t_wide_cancel SELECT number, toString(number), number FROM numbers(100, 100); + +-- Force the Wide writer's addStreams to throw during OPTIMIZE (merge). +SYSTEM ENABLE FAILPOINT wide_part_writer_fail_in_add_streams; +SYSTEM START MERGES t_wide_cancel; + +OPTIMIZE TABLE t_wide_cancel FINAL; -- {serverError FAULT_INJECTED} + +SYSTEM DISABLE FAILPOINT wide_part_writer_fail_in_add_streams; + +-- The server must still be alive and the table readable. +SELECT count() FROM t_wide_cancel; + +DROP TABLE t_wide_cancel; diff --git a/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.reference b/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.reference new file mode 100644 index 000000000000..2634ecb23695 --- /dev/null +++ b/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.reference @@ -0,0 +1,4 @@ +2106-02-07 07:28:15.000000000 Nullable(DateTime64(9)) +1969-12-31 23:00:00.000000000 Nullable(DateTime64(9)) +2106-02-07 05:28:15 Nullable(DateTime) +1970-01-01 00:00:00 Nullable(DateTime) diff --git a/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.sql b/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.sql new file mode 100644 index 000000000000..ec4567169f5b --- /dev/null +++ b/tests/queries/0_stateless/04093_best_effort_datetime_timezone_boundary_range_check.sql @@ -0,0 +1,20 @@ +-- Tags: no-fasttest +-- Test for missing range check after adjust_time_zone in parseDateTimeBestEffortImpl +-- https://github.com/ClickHouse/ClickHouse/issues/102601 + +SET session_timezone = 'UTC'; +SET date_time_input_format = 'best_effort'; + +-- Upper boundary: 2106-02-07 06:28:15 UTC is exactly UINT32_MAX. +-- With -01:00 offset, UTC time is 2106-02-07 07:28:15, which exceeds UINT32_MAX. +-- Should be inferred as DateTime64, not DateTime with wrap-around. +SELECT d, toTypeName(d) FROM format(JSONEachRow, '{"d" : "2106-02-07 06:28:15-01:00"}'); + +-- Lower boundary: 1970-01-01 00:00:00 UTC is timestamp 0. +-- With +01:00 offset, UTC time is 1969-12-31 23:00:00, which is negative. +-- Should be inferred as DateTime64, not DateTime with clamped value. +SELECT d, toTypeName(d) FROM format(JSONEachRow, '{"d" : "1970-01-01 00:00:00+01:00"}'); + +-- Control: values that SHOULD remain DateTime (timezone adjustment stays within range) +SELECT d, toTypeName(d) FROM format(JSONEachRow, '{"d" : "2106-02-07 06:28:15+01:00"}'); +SELECT d, toTypeName(d) FROM format(JSONEachRow, '{"d" : "1970-01-01 01:00:00+01:00"}'); diff --git a/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.reference b/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.reference new file mode 100644 index 000000000000..664388937d73 --- /dev/null +++ b/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.reference @@ -0,0 +1,3 @@ +42 Int64 +hello String +2020-01-01 Date diff --git a/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.sh b/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.sh new file mode 100755 index 000000000000..fa66b56af53b --- /dev/null +++ b/tests/queries/0_stateless/04094_flattened_dynamic_native_encode_types_binary.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Test: flattened Dynamic serialization with encode_types_in_binary_format=1. +# encodeDataType at SerializationDynamic.cpp must use the two-arg overload +# that writes to the stream, not the one-arg overload that returns a String. + +$CLICKHOUSE_LOCAL -m -q " + CREATE TABLE test (d Dynamic(max_types=2)) ENGINE=Memory; + INSERT INTO test VALUES (42::Int64), ('hello'), ('2020-01-01'::Date); + SELECT * FROM test FORMAT Native SETTINGS + output_format_native_use_flattened_dynamic_and_json_serialization=1, + output_format_native_encode_types_in_binary_format=1; +" | $CLICKHOUSE_LOCAL --table test --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "SELECT d, dynamicType(d) FROM test" diff --git a/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.reference b/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.reference new file mode 100644 index 000000000000..30678f7b7969 --- /dev/null +++ b/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.reference @@ -0,0 +1,8 @@ +old analyzer +Filter column: in(id, (1, 3, 5)) (removed) +new analyzer +Filter column: in(id, __set_UInt64_) (removed) +result +1 +3 +5 diff --git a/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.sql b/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.sql new file mode 100644 index 000000000000..2e8f5a9b76bc --- /dev/null +++ b/tests/queries/0_stateless/04098_row_policy_disjunction_optimization.sql @@ -0,0 +1,36 @@ +-- Tags: no-parallel, no-parallel-replicas + +DROP TABLE IF EXISTS t_row_policy_or; +CREATE TABLE t_row_policy_or (id UInt64, value String) ENGINE = MergeTree ORDER BY id; +INSERT INTO t_row_policy_or SELECT number, toString(number) FROM numbers(10); + +DROP ROW POLICY IF EXISTS 04098_p1 ON t_row_policy_or; +DROP ROW POLICY IF EXISTS 04098_p2 ON t_row_policy_or; +DROP ROW POLICY IF EXISTS 04098_p3 ON t_row_policy_or; + +CREATE ROW POLICY 04098_p1 ON t_row_policy_or USING id = 1 AS permissive TO ALL; +CREATE ROW POLICY 04098_p2 ON t_row_policy_or USING id = 3 AS permissive TO ALL; +CREATE ROW POLICY 04098_p3 ON t_row_policy_or USING id = 5 AS permissive TO ALL; + +-- With the old analyzer the OR chain is converted to IN by LogicalExpressionsOptimizer. +-- With the new analyzer the same should happen via LogicalExpressionOptimizerPass +-- applied to the row policy filter in buildFilterInfo. + +SET enable_analyzer = 0; +SELECT 'old analyzer'; +SELECT trim(BOTH ' ' FROM explain) FROM (EXPLAIN actions = 1 SELECT id FROM t_row_policy_or SETTINGS optimize_move_to_prewhere = 0) +WHERE explain LIKE '%Filter column: in(%'; + +SET enable_analyzer = 1; +SELECT 'new analyzer'; +SELECT trim(BOTH ' ' FROM replaceRegexpOne(explain, '__set_UInt64_\\d+_\\d+', '__set_UInt64_')) +FROM (EXPLAIN actions = 1 SELECT id FROM t_row_policy_or SETTINGS optimize_move_to_prewhere = 0) +WHERE explain LIKE '%Filter column: in(%'; + +SELECT 'result'; +SELECT id FROM t_row_policy_or ORDER BY id; + +DROP ROW POLICY 04098_p1 ON t_row_policy_or; +DROP ROW POLICY 04098_p2 ON t_row_policy_or; +DROP ROW POLICY 04098_p3 ON t_row_policy_or; +DROP TABLE t_row_policy_or; diff --git a/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.reference b/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.reference new file mode 100644 index 000000000000..336d78135719 --- /dev/null +++ b/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.reference @@ -0,0 +1 @@ +Incorrect index 10 in indexes column of flattened Dynamic column at row 2: the index should be in range [0, 2] (there are 2 types, index 2 is reserved for NULL values) diff --git a/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.sh b/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.sh new file mode 100755 index 000000000000..027009815edb --- /dev/null +++ b/tests/queries/0_stateless/04108_dynamic_flattened_malformed_index.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Test that reading a Native file with a corrupted index in flattened Dynamic +# serialization produces an informative error message instead of a crash. +# +# The file data_native/dynamic_flattened_bad_index.native is a pregenerated +# Native file with flattened Dynamic serialization for Dynamic(max_types=2) +# containing 3 rows: (42::Int64, 'hello', NULL). The NULL index byte (value 2) +# was changed to 10, which exceeds the valid range [0, 2]. + +$CLICKHOUSE_LOCAL --table test --input-format Native -q "SELECT * FROM test" < "${CUR_DIR}/data_native/dynamic_flattened_bad_index.native" 2>&1 | grep -o 'Incorrect index.*reserved for NULL values)' diff --git a/tests/queries/0_stateless/data_native/dynamic_flattened_bad_index.native b/tests/queries/0_stateless/data_native/dynamic_flattened_bad_index.native new file mode 100644 index 000000000000..787e73ad1e27 Binary files /dev/null and b/tests/queries/0_stateless/data_native/dynamic_flattened_bad_index.native differ