Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 65 additions & 26 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,24 +294,18 @@ impl ArrayData {
buffers: Vec<Buffer>,
child_data: Vec<ArrayData>,
) -> Self {
let mut skip_validation = UnsafeFlag::new();
// SAFETY: caller responsible for ensuring data is valid
unsafe { skip_validation.set(true) };

ArrayDataBuilder {
let builder = Self::inner_new_builder(
data_type,
len,
null_count,
null_bit_buffer,
nulls: None,
offset,
buffers,
child_data,
align_buffers: false,
skip_validation,
}
.build()
.unwrap()
);

// SAFETY: caller responsible for ensuring data is valid
unsafe { builder.build_unchecked() }
}

/// Create a new ArrayData, validating that the provided buffers form a valid
Expand All @@ -337,7 +331,7 @@ impl ArrayData {
) -> Result<Self, ArrowError> {
// we must check the length of `null_bit_buffer` first
// because we use this buffer to calculate `null_count`
// in `Self::new_unchecked`.
// in `ArrayDataBuilder::build`.
if let Some(null_bit_buffer) = null_bit_buffer.as_ref() {
let len_plus_offset = checked_len_plus_offset(&data_type, len, offset)?;
let needed_len = bit_util::ceil(len_plus_offset, 8);
Expand All @@ -349,25 +343,47 @@ impl ArrayData {
)));
}
}
// Safety justification: `validate_full` is called below
let new_self = unsafe {
Self::new_unchecked(
data_type,
len,
None,
null_bit_buffer,
offset,
buffers,
child_data,
)
};

let builder = Self::inner_new_builder(
data_type,
len,
None,
null_bit_buffer,
offset,
buffers,
child_data,
);

assert!(!builder.skip_validation.get());

// As the data is not trusted, do a full validation of its contents
// We don't need to validate children as we can assume that the
// [`ArrayData`] in `child_data` have already been validated through
// a call to `ArrayData::try_new` or created using unsafe
new_self.validate_data()?;
Ok(new_self)
builder.build()
}

fn inner_new_builder(
data_type: DataType,
len: usize,
null_count: Option<usize>,
null_bit_buffer: Option<Buffer>,
offset: usize,
buffers: Vec<Buffer>,
child_data: Vec<ArrayData>,
) -> ArrayDataBuilder {
ArrayDataBuilder {
data_type,
len,
null_count,
null_bit_buffer,
nulls: None,
offset,
buffers,
child_data,
align_buffers: false,
skip_validation: UnsafeFlag::new(),
}
}

/// Return the constituent parts of this ArrayData
Expand Down Expand Up @@ -2727,4 +2743,27 @@ mod tests {
assert!(array.is_null(i));
}
}

// Even when `force_validate` feature is on
#[test]
fn test_dont_panic_on_bad_input_when_using_try_new() {
let empty_bytes = Buffer::default();

let array_data = ArrayData::try_new(
DataType::Utf8,
1, // len
None,
0,
// the offsets says that we have 2 bytes but the buffer is empty
vec![Buffer::from_vec(vec![0i32, 2i32]), empty_bytes],
vec![],
);

let res = array_data.expect_err("should get error");

assert_eq!(
res.to_string(),
format!("Invalid argument error: Last offset 2 of Utf8 is larger than values length 0",)
);
}
}
Loading