diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index e985c183ed20..95d433a4619c 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -3862,6 +3862,23 @@ mod tests { one_column_roundtrip(Arc::new(list), true); } + #[test] + fn list_utf8_view_selective_padding_roundtrip() { + let item = Arc::new(Field::new_list_field(DataType::Utf8View, true)); + let mut builder = ListBuilder::new(StringViewBuilder::new()).with_field(item); + builder.values().append_value("a"); + builder.values().append_null(); + builder.append(true); + // The null parent list covers selective padding dropping values below + // the list definition level while preserving the preceding item null. + builder.append(false); + // The long string covers the non-inlined Utf8View buffer path. + builder.values().append_value("large payload over 12 bytes"); + builder.append(true); + + one_column_roundtrip(Arc::new(builder.finish()), true); + } + #[test] fn struct_single_column() { let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index 0720c6cdbe0d..a104c93a2c13 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -504,6 +504,26 @@ mod tests { use crate::encodings::rle::RleEncoder; use rand::{Rng, rng}; + #[test] + fn test_build_validity_bitmap_unfiltered_word_chunk() { + // 65 levels forces the unfiltered path to process one full u64 word + // with append_word, plus a remainder bit. + let def_levels = (0..65) + .map(|i| if i % 3 == 0 { 2 } else { 1 }) + .collect::>(); + let mut bitmap = BooleanBufferBuilder::new(0); + + assert_eq!( + build_filtered_validity_bitmap(&def_levels, None, None, 2, &mut bitmap), + def_levels.len() + ); + + let bitmap = bitmap.finish(); + for (idx, def) in def_levels.iter().enumerate() { + assert_eq!(bitmap.value(idx), *def >= 2); + } + } + #[test] fn test_packed_decoder() { let mut rng = rng();