From 2ba8294f8b2d4aa251a42f743db18ee8b5560ff4 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Thu, 2 Jul 2026 20:05:22 +0300 Subject: [PATCH 1/8] fix(arrow-array): disallow creating `MapArray` with nullable key field --- arrow-array/src/array/map_array.rs | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 0f7c43543046..c3b809083ecd 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -61,6 +61,7 @@ impl MapArray { /// * `entries.null_count() != 0` /// * `entries.columns().len() != 2` /// * `field.data_type() != entries.data_type()` + /// * the keys field is nullable pub fn try_new( field: FieldRef, offsets: OffsetBuffer, @@ -109,6 +110,16 @@ impl MapArray { ))); } + // The Arrow spec requires the "key" field to be non-nullable + // + if entries.fields()[0].is_nullable() { + return Err(ArrowError::InvalidArgumentError( + "MapArray keys field cannot be nullable".to_string(), + )); + } + // No need to verify if key contain nulls since `StructArray` + // already disallow nulls for non nullable fields + Ok(Self { data_type: DataType::Map(field, ordered), nulls, @@ -931,6 +942,27 @@ mod tests { ); } + #[test] + fn test_try_new_nullable_keys_field() { + // https://github.com/apache/arrow-rs/issues/10268 + let keys = Int32Array::from(vec![Some(1), None]); + let values = Int32Array::from(vec![None, Some(2)]); + let fields = Fields::from(vec![ + Field::new("keys", DataType::Int32, true), + Field::new("values", DataType::Int32, true), + ]); + let entries = + StructArray::new(fields.clone(), vec![Arc::new(keys), Arc::new(values)], None); + let field = Arc::new(Field::new("entries", DataType::Struct(fields), false)); + + let err = MapArray::try_new(field, OffsetBuffer::from_lengths([2]), entries, None, false) + .unwrap_err(); + assert_eq!( + err.to_string(), + "Invalid argument error: MapArray keys field cannot be nullable" + ); + } + #[test] fn test_from_vec_of_maps() { for ordered in [true, false] { From cc9ab6a4f0be38ce3ee196c7a40a342a084344c7 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 22:15:21 +0300 Subject: [PATCH 2/8] fix in array data as well + more validations --- arrow-data/src/data.rs | 413 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 412 insertions(+), 1 deletion(-) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 0ce98aa090df..d2e115b88d29 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -975,6 +975,14 @@ impl ArrayData { ))); } } + DataType::Map(f, _) => { + if f.is_nullable() { + return Err(ArrowError::InvalidArgumentError( + "The nullable should be set to false for the map entries field." + .to_string(), + )); + } + } _ => {} }; @@ -1124,7 +1132,7 @@ impl ArrayData { /// Validates the layout of `child_data` ArrayData structures fn validate_child_data(&self) -> Result<(), ArrowError> { match &self.data_type { - DataType::List(field) | DataType::Map(field, _) => { + DataType::List(field) => { let values_data = self.get_single_valid_child_data(field.data_type())?; self.validate_offsets::(values_data.len)?; Ok(()) @@ -1134,6 +1142,30 @@ impl ArrayData { self.validate_offsets::(values_data.len)?; Ok(()) } + DataType::Map(field, _) => { + let DataType::Struct(entries_fields) = field.data_type() else { + return Err(ArrowError::InvalidArgumentError(format!( + "Map field should be a entries struct data type, got {:?} instead", + field.data_type() + ))); + }; + if entries_fields.len() != 2 { + return Err(ArrowError::InvalidArgumentError(format!( + "Map entries data type should be a struct containing 2 fields, got {} fields", + entries_fields.len() + ))); + } + + // Key field + if entries_fields[0].is_nullable() { + return Err(ArrowError::InvalidArgumentError( + "Map key field must not be nullable".to_string(), + )); + } + let values_data = self.get_single_valid_child_data(field.data_type())?; + self.validate_offsets::(values_data.len)?; + Ok(()) + } DataType::ListView(field) => { let values_data = self.get_single_valid_child_data(field.data_type())?; self.validate_offsets_and_sizes::(values_data.len)?; @@ -2287,6 +2319,7 @@ pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize { #[cfg(test)] mod tests { use super::*; + use arrow_buffer::{OffsetBuffer, ScalarBuffer}; use arrow_schema::{Field, Fields}; // See arrow/tests/array_data_validation.rs for test of array validation @@ -2727,4 +2760,382 @@ mod tests { assert!(array.is_null(i)); } } + + #[test] + fn should_fail_validation_when_having_map_field_type_is_not_struct() { + let map_field = Field::new("key", DataType::Int32, false); + + let map_field_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + let results = test_both_builder_and_array_data( + DataType::Map(map_field.into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![map_field_data], + ); + + for result in results { + let array_data_err = result.expect_err("should fail for non struct field"); + + match array_data_err { + ArrowError::InvalidArgumentError(msg) => { + assert_eq!( + msg, + "Map field should be a entries struct data type, got Int32 instead" + ) + } + _ => panic!("unexpected error type {array_data_err}"), + }; + } + } + + #[test] + fn should_fail_validation_when_having_map_entries_only_have_1_field() { + let struct_data_type = DataType::Struct(Fields::from(vec![Field::new( + "key", + DataType::Int32, + false, + )])); + + let key_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let struct_data = { + let builder = ArrayDataBuilder::new(struct_data_type.clone()) + .len(2) + .nulls(None) + .child_data(vec![key_array_data]); + + builder.build().unwrap() + }; + + let results = test_both_builder_and_array_data( + DataType::Map(Field::new("entries", struct_data_type, false).into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![struct_data], + ); + + for result in results { + let array_data_err = result.expect_err("should fail for nullable key"); + + match array_data_err { + ArrowError::InvalidArgumentError(msg) => { + assert_eq!( + msg, + "Map entries data type should be a struct containing 2 fields, got 1 fields" + ) + } + _ => panic!("unexpected error type {array_data_err}"), + }; + } + } + + #[test] + fn should_fail_validation_when_having_map_entries_have_3_fields() { + let struct_data_type = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, false), + Field::new("values", DataType::Utf8, true), + Field::new("other", DataType::Int64, true), + ])); + + let key_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let values_array_data = { + let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) + .into_inner() + .into_inner(); + let empty_bytes = Buffer::default(); + + let builder = ArrayDataBuilder::new(DataType::Utf8) + .len(2) + .buffers(vec![offsets, empty_bytes]) + .nulls(None); + + builder.build().unwrap() + }; + + let other_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int64) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let struct_data = { + let builder = ArrayDataBuilder::new(struct_data_type.clone()) + .len(2) + .nulls(None) + .child_data(vec![key_array_data, values_array_data, other_array_data]); + + builder.build().unwrap() + }; + + let results = test_both_builder_and_array_data( + DataType::Map(Field::new("entries", struct_data_type, false).into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![struct_data], + ); + + for result in results { + let array_data_err = result.expect_err("should fail for nullable key"); + + match array_data_err { + ArrowError::InvalidArgumentError(msg) => { + assert_eq!( + msg, + "Map entries data type should be a struct containing 2 fields, got 3 fields" + ) + } + _ => panic!("unexpected error type {array_data_err}"), + }; + } + } + + #[test] + fn should_fail_validation_when_having_nullable_map_keys() { + let struct_data_type = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, true), + Field::new("values", DataType::Utf8, true), + ])); + + let key_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let values_array_data = { + let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) + .into_inner() + .into_inner(); + let empty_bytes = Buffer::default(); + + let builder = ArrayDataBuilder::new(DataType::Utf8) + .len(2) + .buffers(vec![offsets, empty_bytes]) + .nulls(None); + + builder.build().unwrap() + }; + + let struct_data = { + let builder = ArrayDataBuilder::new(struct_data_type.clone()) + .len(2) + .nulls(None) + .child_data(vec![key_array_data, values_array_data]); + + builder.build().unwrap() + }; + + let results = test_both_builder_and_array_data( + DataType::Map(Field::new("entries", struct_data_type, false).into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![struct_data], + ); + + for result in results { + let array_data_err = result.expect_err("should fail for nullable key"); + + match array_data_err { + ArrowError::InvalidArgumentError(msg) => { + assert_eq!(msg, "Map key field must not be nullable") + } + _ => panic!("unexpected error type {array_data_err}"), + }; + } + } + + #[test] + fn should_fail_validation_when_having_entries_is_nullable_for_map() { + let struct_data_type = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, false), + Field::new("values", DataType::Utf8, true), + ])); + + let key_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let values_array_data = { + let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) + .into_inner() + .into_inner(); + let empty_bytes = Buffer::default(); + + let builder = ArrayDataBuilder::new(DataType::Utf8) + .len(2) + .buffers(vec![offsets, empty_bytes]) + .nulls(None); + + builder.build().unwrap() + }; + + let struct_data = { + let builder = ArrayDataBuilder::new(struct_data_type.clone()) + .len(2) + .nulls(None) + .child_data(vec![key_array_data, values_array_data]); + + builder.build().unwrap() + }; + + let results = test_both_builder_and_array_data( + DataType::Map(Field::new("entries", struct_data_type, true).into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![struct_data], + ); + + for result in results { + let array_data_err = result.expect_err("should fail for nullable entries"); + + match array_data_err { + ArrowError::InvalidArgumentError(msg) => assert_eq!( + msg, + "The nullable should be set to false for the map entries field." + ), + _ => panic!("unexpected error type {array_data_err}"), + }; + } + } + #[test] + fn should_allow_to_create_map_from_data() { + let struct_data_type = DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, false), + Field::new("values", DataType::Utf8, true), + ])); + + let key_array_data = { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(2) + .nulls(None) + .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + + builder.build().unwrap() + }; + + let values_array_data = { + let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) + .into_inner() + .into_inner(); + let empty_bytes = Buffer::default(); + + let builder = ArrayDataBuilder::new(DataType::Utf8) + .len(2) + .buffers(vec![offsets, empty_bytes]) + .nulls(None); + + builder.build().unwrap() + }; + + let struct_data = { + let builder = ArrayDataBuilder::new(struct_data_type.clone()) + .len(2) + .nulls(None) + .child_data(vec![key_array_data, values_array_data]); + + builder.build().unwrap() + }; + + let results = test_both_builder_and_array_data( + DataType::Map(Field::new("entries", struct_data_type, false).into(), false), + 1, + None, + 0, + vec![ + OffsetBuffer::::from_lengths(vec![2]) + .into_inner() + .into(), + ], + vec![struct_data], + ); + + for result in results { + result.expect("should be able to create map ArrayData"); + } + } + + fn test_both_builder_and_array_data( + data_type: DataType, + len: usize, + null_bit_buffer: Option, + offset: usize, + buffers: Vec, + child_data: Vec, + ) -> [Result; 2] { + let from_builder_res = ArrayData::builder(data_type.clone()) + .len(len) + .add_buffers(buffers.clone()) + .null_bit_buffer(null_bit_buffer.clone()) + .offset(offset) + .child_data(child_data.clone()) + .build(); + + let from_try_new_res = + ArrayData::try_new(data_type, len, null_bit_buffer, offset, buffers, child_data); + + [from_builder_res, from_try_new_res] + } } From 83a1ce4ff81b876941756bf5519550c0222c52d4 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 22:17:53 +0300 Subject: [PATCH 3/8] add 2 more tests --- arrow-data/src/data.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index d2e115b88d29..2a7e17637741 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -3117,6 +3117,25 @@ mod tests { } } + #[test] + fn empty_and_null_map_array_should_pass_validation() { + let dt = DataType::Map( + Field::new( + "entries", + DataType::Struct(Fields::from(vec![ + Field::new("key", DataType::Int32, false), + Field::new("values", DataType::Utf8, true), + ])), + false, + ) + .into(), + false, + ); + + ArrayData::new_empty(&dt).validate_full().unwrap(); + ArrayData::new_null(&dt, 1).validate_full().unwrap(); + } + fn test_both_builder_and_array_data( data_type: DataType, len: usize, From 112bfd24dbf2df02a26a269927c2885216dfbd89 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 22:21:03 +0300 Subject: [PATCH 4/8] lint --- arrow-data/src/data.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 2a7e17637741..570deded054e 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -975,13 +975,10 @@ impl ArrayData { ))); } } - DataType::Map(f, _) => { - if f.is_nullable() { - return Err(ArrowError::InvalidArgumentError( - "The nullable should be set to false for the map entries field." - .to_string(), - )); - } + DataType::Map(f, _) if f.is_nullable() => { + return Err(ArrowError::InvalidArgumentError( + "The nullable should be set to false for the map entries field.".to_string(), + )); } _ => {} }; From 20f5ef84bff592487decc8fd236345977cff11de Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 22:45:18 +0300 Subject: [PATCH 5/8] fix: when force_validate feature is true, skip panic --- arrow-data/src/data.rs | 67 ++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 570deded054e..a19c2727b526 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -294,24 +294,18 @@ impl ArrayData { buffers: Vec, child_data: Vec, ) -> Self { - let mut skip_validation = UnsafeFlag::new(); - // SAFETY: caller responsible for ensuring data is valid - unsafe { skip_validation.set(true) }; - - ArrayDataBuilder { + let builder = Self::inner_new_builder( data_type, len, null_count, null_bit_buffer, - nulls: None, offset, buffers, child_data, - align_buffers: false, - skip_validation, - } - .build() - .unwrap() + ); + + // SAFETY: caller responsible for ensuring data is valid + unsafe { builder.build_unchecked() } } /// Create a new ArrayData, validating that the provided buffers form a valid @@ -349,25 +343,47 @@ impl ArrayData { ))); } } - // Safety justification: `validate_full` is called below - let new_self = unsafe { - Self::new_unchecked( - data_type, - len, - None, - null_bit_buffer, - offset, - buffers, - child_data, - ) - }; + + let builder = Self::inner_new_builder( + data_type, + len, + None, + null_bit_buffer, + offset, + buffers, + child_data, + ); + + assert!(!builder.skip_validation.get()); // As the data is not trusted, do a full validation of its contents // We don't need to validate children as we can assume that the // [`ArrayData`] in `child_data` have already been validated through // a call to `ArrayData::try_new` or created using unsafe - new_self.validate_data()?; - Ok(new_self) + builder.build() + } + + fn inner_new_builder( + data_type: DataType, + len: usize, + null_count: Option, + null_bit_buffer: Option, + offset: usize, + buffers: Vec, + child_data: Vec, + ) -> ArrayDataBuilder { + ArrayDataBuilder { + data_type, + len, + null_count, + null_bit_buffer, + nulls: None, + offset, + buffers, + child_data, + align_buffers: false, + skip_validation: UnsafeFlag::new(), + } } /// Return the constituent parts of this ArrayData @@ -3057,6 +3073,7 @@ mod tests { }; } } + #[test] fn should_allow_to_create_map_from_data() { let struct_data_type = DataType::Struct(Fields::from(vec![ From 7dac9e5b4cbdd6b35d06fa6c0e04476d8c4fe851 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 23:14:29 +0300 Subject: [PATCH 6/8] fix tests to not have nulls in keys --- arrow-ipc/src/reader.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index 6cb1d7e6b021..5ee7da20153c 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -2742,14 +2742,14 @@ mod tests { let value_dict_keys = Int8Array::from_iter_values([0, 1, 1, 2, 3, 1]); let value_dict_array = DictionaryArray::new(value_dict_keys, values.clone()); - let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 1, 1, 3]); + let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 2, 2, 3]); let key_dict_array = DictionaryArray::new(key_dict_keys, values); #[allow(deprecated)] let keys_field = Arc::new(Field::new_dict( "keys", DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)), - true, // It is technically not legal for this field to be null. + false, 1, false, )); @@ -2954,13 +2954,13 @@ mod tests { let bin_view_array = Arc::new(BinaryViewArray::from_iter(bin_values)); let utf8_view_array = Arc::new(StringViewArray::from_iter(utf8_values)); - let key_dict_keys = Int8Array::from_iter_values([0, 0, 1, 2, 0, 1, 3]); + let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 2, 0, 2, 3]); let key_dict_array = DictionaryArray::new(key_dict_keys, utf8_view_array.clone()); #[allow(deprecated)] let keys_field = Arc::new(Field::new_dict( "keys", DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8View)), - true, + false, 1, false, )); From b13d6c59c0dfdb7fe0f9c50865520886a62196e0 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 23:20:34 +0300 Subject: [PATCH 7/8] remove bad test --- arrow-row/src/lib.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 83ebe79f726e..05e20be56ffd 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -6394,34 +6394,6 @@ mod tests { assert_eq!(&map, &back[0]); } - // Test Map - both keys and values are Null type - #[test] - fn test_map_null_keys_and_null_values() { - let null_keys = Arc::new(NullArray::new(3)) as ArrayRef; - let null_values = Arc::new(NullArray::new(3)) as ArrayRef; - - let offsets = OffsetBuffer::new(vec![0, 1, 1, 3].into()); - let entries_fields = vec![ - Arc::new(Field::new("keys", DataType::Null, true)), - Arc::new(Field::new("values", DataType::Null, true)), - ]; - let struct_field = Arc::new(Field::new( - "entries", - DataType::Struct(entries_fields.clone().into()), - false, - )); - let entries = StructArray::new(entries_fields.into(), vec![null_keys, null_values], None); - - let map: ArrayRef = Arc::new(MapArray::new(struct_field, offsets, entries, None, false)); - - let converter = RowConverter::new(vec![SortField::new(map.data_type().clone())]).unwrap(); - let rows = converter.convert_columns(&[Arc::clone(&map)]).unwrap(); - let back = converter.convert_rows(&rows).unwrap(); - assert_eq!(back.len(), 1); - back[0].to_data().validate_full().unwrap(); - assert_eq!(&map, &back[0]); - } - // Test Map all empty maps #[test] fn test_map_null_all_empty() { From a7f5c12f47d78b433afb5ee674b71c8ffa8d7fba Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 4 Jul 2026 23:28:07 +0300 Subject: [PATCH 8/8] extract helpers --- arrow-data/src/data.rs | 147 +++++++++++------------------------------ 1 file changed, 37 insertions(+), 110 deletions(-) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index a19c2727b526..72dc09c7bd83 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -2778,14 +2778,8 @@ mod tests { fn should_fail_validation_when_having_map_field_type_is_not_struct() { let map_field = Field::new("key", DataType::Int32, false); - let map_field_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + let map_field_data = valid_non_nullable_int32_array_data(2); - builder.build().unwrap() - }; let results = test_both_builder_and_array_data( DataType::Map(map_field.into(), false), 1, @@ -2822,14 +2816,7 @@ mod tests { false, )])); - let key_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); - - builder.build().unwrap() - }; + let key_array_data = valid_non_nullable_int32_array_data(2); let struct_data = { let builder = ArrayDataBuilder::new(struct_data_type.clone()) @@ -2873,40 +2860,14 @@ mod tests { let struct_data_type = DataType::Struct(Fields::from(vec![ Field::new("key", DataType::Int32, false), Field::new("values", DataType::Utf8, true), - Field::new("other", DataType::Int64, true), + Field::new("other", DataType::Int32, true), ])); - let key_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); - - builder.build().unwrap() - }; - - let values_array_data = { - let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) - .into_inner() - .into_inner(); - let empty_bytes = Buffer::default(); - - let builder = ArrayDataBuilder::new(DataType::Utf8) - .len(2) - .buffers(vec![offsets, empty_bytes]) - .nulls(None); - - builder.build().unwrap() - }; + let key_array_data = valid_non_nullable_int32_array_data(2); - let other_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int64) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); + let values_array_data = valid_string_array_data(2); - builder.build().unwrap() - }; + let other_array_data = key_array_data.clone(); let struct_data = { let builder = ArrayDataBuilder::new(struct_data_type.clone()) @@ -2952,28 +2913,8 @@ mod tests { Field::new("values", DataType::Utf8, true), ])); - let key_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); - - builder.build().unwrap() - }; - - let values_array_data = { - let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) - .into_inner() - .into_inner(); - let empty_bytes = Buffer::default(); - - let builder = ArrayDataBuilder::new(DataType::Utf8) - .len(2) - .buffers(vec![offsets, empty_bytes]) - .nulls(None); - - builder.build().unwrap() - }; + let key_array_data = valid_non_nullable_int32_array_data(2); + let values_array_data = valid_string_array_data(2); let struct_data = { let builder = ArrayDataBuilder::new(struct_data_type.clone()) @@ -3016,28 +2957,9 @@ mod tests { Field::new("values", DataType::Utf8, true), ])); - let key_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); - - builder.build().unwrap() - }; - - let values_array_data = { - let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) - .into_inner() - .into_inner(); - let empty_bytes = Buffer::default(); - - let builder = ArrayDataBuilder::new(DataType::Utf8) - .len(2) - .buffers(vec![offsets, empty_bytes]) - .nulls(None); + let key_array_data = valid_non_nullable_int32_array_data(2); - builder.build().unwrap() - }; + let values_array_data = valid_string_array_data(2); let struct_data = { let builder = ArrayDataBuilder::new(struct_data_type.clone()) @@ -3081,28 +3003,8 @@ mod tests { Field::new("values", DataType::Utf8, true), ])); - let key_array_data = { - let builder = ArrayDataBuilder::new(DataType::Int32) - .len(2) - .nulls(None) - .buffers(vec![ScalarBuffer::::from(vec![1, 2]).into_inner()]); - - builder.build().unwrap() - }; - - let values_array_data = { - let offsets = OffsetBuffer::::from_lengths(vec![0, 0]) - .into_inner() - .into_inner(); - let empty_bytes = Buffer::default(); - - let builder = ArrayDataBuilder::new(DataType::Utf8) - .len(2) - .buffers(vec![offsets, empty_bytes]) - .nulls(None); - - builder.build().unwrap() - }; + let key_array_data = valid_non_nullable_int32_array_data(2); + let values_array_data = valid_string_array_data(2); let struct_data = { let builder = ArrayDataBuilder::new(struct_data_type.clone()) @@ -3131,6 +3033,31 @@ mod tests { } } + fn valid_string_array_data(length: usize) -> ArrayData { + let offsets = OffsetBuffer::::from_lengths(vec![0; length]) + .into_inner() + .into_inner(); + let empty_bytes = Buffer::default(); + + let builder = ArrayDataBuilder::new(DataType::Utf8) + .len(length) + .buffers(vec![offsets, empty_bytes]) + .nulls(None); + + builder.build().unwrap() + } + + fn valid_non_nullable_int32_array_data(length: usize) -> ArrayData { + let builder = ArrayDataBuilder::new(DataType::Int32) + .len(length) + .nulls(None) + .buffers(vec![ + ScalarBuffer::::from(vec![1; length]).into_inner(), + ]); + + builder.build().unwrap() + } + #[test] fn empty_and_null_map_array_should_pass_validation() { let dt = DataType::Map(