From 7cab9b6be2d1e038d7cf9ec5cf64223b0e38f0e0 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 29 Jun 2026 11:38:43 +0900 Subject: [PATCH] fix: read `Dict(FixedSizeBinary)` from parquet without dict encoding --- parquet/src/arrow/arrow_writer/mod.rs | 18 ++++++------------ parquet/src/arrow/buffer/dictionary_buffer.rs | 12 +++++++++++- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 063d5abcf1c4..271afedd08a5 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -2933,16 +2933,6 @@ mod tests { K::Native: FromPrimitive + ToPrimitive + TryFrom, <::Native as TryFrom>::Error: std::fmt::Debug, { - let field = Field::new( - "a", - DataType::Dictionary( - Box::new(K::DATA_TYPE), - Box::new(DataType::FixedSizeBinary(4)), - ), - false, - ); - let schema = Schema::new(vec![field]); - let keys: Vec = vec![ K::Native::try_from(0u8).unwrap(), K::Native::try_from(0u8).unwrap(), @@ -2954,8 +2944,12 @@ mod tests { ) .unwrap(); - let data = DictionaryArray::::new(keys, Arc::new(values)); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data)]).unwrap(); + let data = Arc::new(DictionaryArray::::new(keys, Arc::new(values))) as ArrayRef; + one_column_roundtrip(Arc::clone(&data), true); + + let field = Field::new("a", data.data_type().clone(), false); + let schema = Schema::new(vec![field]); + let batch = RecordBatch::try_new(Arc::new(schema), vec![data]).unwrap(); roundtrip(batch, None); } diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs index abf76530296c..ba9390746bed 100644 --- a/parquet/src/arrow/buffer/dictionary_buffer.rs +++ b/parquet/src/arrow/buffer/dictionary_buffer.rs @@ -185,8 +185,18 @@ impl DictionaryBuffer { ArrowType::Dictionary(k, v) => (k, v.as_ref().clone()), _ => unreachable!(), }; + let array = if let ArrowType::FixedSizeBinary(size) = value_type { + let array = values.into_array(null_buffer, ArrowType::Binary); + let array = array.as_binary::(); + Arc::new(FixedSizeBinaryArray::new( + size, + array.values().clone(), + array.nulls().cloned(), + )) as _ + } else { + values.into_array(null_buffer, value_type) + }; - let array = values.into_array(null_buffer, value_type); pack_values(key_type, &array) } }