From 803fcbb9d647878431f8d0bbcb23591e281e68fb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 1 Jul 2026 18:00:30 -0400 Subject: [PATCH] test(parquet): cover selective list padding --- parquet/src/arrow/arrow_reader/read_plan.rs | 4 ++-- parquet/src/arrow/arrow_writer/mod.rs | 17 ++++++++++++++++ .../arrow/record_reader/definition_levels.rs | 20 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/read_plan.rs b/parquet/src/arrow/arrow_reader/read_plan.rs index 7ca54f961e2a..3d80526c8282 100644 --- a/parquet/src/arrow/arrow_reader/read_plan.rs +++ b/parquet/src/arrow/arrow_reader/read_plan.rs @@ -541,13 +541,13 @@ mod tests { let data: Vec = (0..TOTAL_ROWS as i32).collect(); let levels = vec![0; TOTAL_ROWS]; - let leaf = make_int32_page_reader(&data, &levels, &levels, 0, 0); + let leaf = make_int32_page_reader(&data, &levels, &levels, 0, 0, None); let struct_type = ArrowType::Struct(Fields::from(vec![Field::new( "c0", ArrowType::Int32, false, )])); - let struct_reader = StructArrayReader::new(struct_type, vec![leaf], 0, 0, false); + let struct_reader = StructArrayReader::new(struct_type, vec![leaf], 0, 0, false, None); let mut predicate = ArrowPredicateFn::new(ProjectionMask::all(), |batch| { Ok((0..batch.num_rows()) diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 063d5abcf1c4..d558886ba5fa 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -3860,6 +3860,23 @@ mod tests { one_column_roundtrip(Arc::new(list), true); } + #[test] + fn list_utf8_view_selective_padding_roundtrip() { + let item = Arc::new(Field::new_list_field(DataType::Utf8View, true)); + let mut builder = ListBuilder::new(StringViewBuilder::new()).with_field(item); + builder.values().append_value("a"); + builder.values().append_null(); + builder.append(true); + // The null parent list covers selective padding dropping values below + // the list definition level while preserving the preceding item null. + builder.append(false); + // The long string covers the non-inlined Utf8View buffer path. + builder.values().append_value("large payload over 12 bytes"); + builder.append(true); + + one_column_roundtrip(Arc::new(builder.finish()), true); + } + #[test] fn struct_single_column() { let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index 0720c6cdbe0d..a104c93a2c13 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -504,6 +504,26 @@ mod tests { use crate::encodings::rle::RleEncoder; use rand::{Rng, rng}; + #[test] + fn test_build_validity_bitmap_unfiltered_word_chunk() { + // 65 levels forces the unfiltered path to process one full u64 word + // with append_word, plus a remainder bit. + let def_levels = (0..65) + .map(|i| if i % 3 == 0 { 2 } else { 1 }) + .collect::>(); + let mut bitmap = BooleanBufferBuilder::new(0); + + assert_eq!( + build_filtered_validity_bitmap(&def_levels, None, None, 2, &mut bitmap), + def_levels.len() + ); + + let bitmap = bitmap.finish(); + for (idx, def) in def_levels.iter().enumerate() { + assert_eq!(bitmap.value(idx), *def >= 2); + } + } + #[test] fn test_packed_decoder() { let mut rng = rng();