diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 0ce98aa090df..19e3037846ab 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -294,24 +294,18 @@ impl ArrayData { buffers: Vec, child_data: Vec, ) -> Self { - let mut skip_validation = UnsafeFlag::new(); - // SAFETY: caller responsible for ensuring data is valid - unsafe { skip_validation.set(true) }; - - ArrayDataBuilder { + let builder = Self::inner_new_builder( data_type, len, null_count, null_bit_buffer, - nulls: None, offset, buffers, child_data, - align_buffers: false, - skip_validation, - } - .build() - .unwrap() + ); + + // SAFETY: caller responsible for ensuring data is valid + unsafe { builder.build_unchecked() } } /// Create a new ArrayData, validating that the provided buffers form a valid @@ -337,7 +331,7 @@ impl ArrayData { ) -> Result { // we must check the length of `null_bit_buffer` first // because we use this buffer to calculate `null_count` - // in `Self::new_unchecked`. + // in `ArrayDataBuilder::build`. if let Some(null_bit_buffer) = null_bit_buffer.as_ref() { let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?; let needed_len = bit_util::ceil(len_plus_offset, 8); @@ -349,25 +343,47 @@ impl ArrayData { ))); } } - // Safety justification: `validate_full` is called below - let new_self = unsafe { - Self::new_unchecked( - data_type, - len, - None, - null_bit_buffer, - offset, - buffers, - child_data, - ) - }; + + let builder = Self::inner_new_builder( + data_type, + len, + None, + null_bit_buffer, + offset, + buffers, + child_data, + ); + + assert!(!builder.skip_validation.get()); // As the data is not trusted, do a full validation of its contents // We don't need to validate children as we can assume that the // [`ArrayData`] in `child_data` have already been validated through // a call to `ArrayData::try_new` or created using unsafe - new_self.validate_data()?; - Ok(new_self) + builder.build() + } + + fn inner_new_builder( + data_type: DataType, + len: usize, + null_count: Option, + null_bit_buffer: Option, + offset: usize, + buffers: Vec, + child_data: Vec, + ) -> ArrayDataBuilder { + ArrayDataBuilder { + data_type, + len, + null_count, + null_bit_buffer, + nulls: None, + offset, + buffers, + child_data, + align_buffers: false, + skip_validation: UnsafeFlag::new(), + } } /// Return the constituent parts of this ArrayData @@ -2727,4 +2743,27 @@ mod tests { assert!(array.is_null(i)); } } + + // Even when `force_validate` feature is on + #[test] + fn test_dont_panic_on_bad_input_when_using_try_new() { + let empty_bytes = Buffer::default(); + + let array_data = ArrayData::try_new( + DataType::Utf8, + 1, // len + None, + 0, + // the offsets says that we have 2 bytes but the buffer is empty + vec![Buffer::from_vec(vec![0i32, 2i32]), empty_bytes], + vec![], + ); + + let res = array_data.expect_err("should get error"); + + assert_eq!( + res.to_string(), + format!("Invalid argument error: Last offset 2 of Utf8 is larger than values length 0",) + ); + } }