-
Notifications
You must be signed in to change notification settings - Fork 1.2k
fix(arrow-avro): bound untrusted OCF block size and item counts #10237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b8be5a6
6672301
d2bb616
83f3909
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2286,35 +2286,58 @@ fn process_blockwise( | |
| match block_count.cmp(&0) { | ||
| Ordering::Equal => break, | ||
| Ordering::Less => { | ||
| let count = (-block_count) as usize; | ||
| // `unsigned_abs` avoids overflowing `-block_count` for `i64::MIN` (#10235) | ||
| let count = block_count.unsigned_abs() as usize; | ||
| // A negative count is followed by a long of the size in bytes | ||
| let size_in_bytes = buf.get_long()? as usize; | ||
| let raw_size = buf.get_long()?; | ||
| let size_in_bytes = usize::try_from(raw_size).map_err(|_| { | ||
| AvroError::ParseError(format!("Block size cannot be negative, got {raw_size}")) | ||
| })?; | ||
| match negative_behavior { | ||
| NegativeBlockBehavior::ProcessItems => { | ||
| // Process items one-by-one after reading size | ||
| for _ in 0..count { | ||
| on_item(buf)?; | ||
| } | ||
| total = process_block_items(buf, count, total, &mut on_item)?; | ||
| } | ||
| NegativeBlockBehavior::SkipBySize => { | ||
| // Skip the entire block payload at once | ||
| let _ = buf.get_fixed(size_in_bytes)?; | ||
| total = total.saturating_add(count); | ||
| } | ||
| } | ||
| total += count; | ||
| } | ||
| Ordering::Greater => { | ||
| let count = block_count as usize; | ||
| for _ in 0..count { | ||
| on_item(buf)?; | ||
| } | ||
| total += count; | ||
| total = process_block_items(buf, count, total, &mut on_item)?; | ||
| } | ||
| } | ||
| } | ||
| Ok(total) | ||
| } | ||
|
|
||
| /// Decode `count` items, capping the running total at `i32::MAX` (the largest index | ||
| /// an Arrow list/map offset holds). Otherwise a crafted `i64::MAX` count of a zero-byte | ||
| /// item like `null` spins the loop forever (#10235); byte-consuming items self-terminate | ||
| /// on cursor exhaustion, so valid blocks (including `array<null>`) are unaffected. | ||
| #[inline] | ||
| fn process_block_items( | ||
| buf: &mut AvroCursor, | ||
| count: usize, | ||
| total: usize, | ||
| on_item: &mut impl FnMut(&mut AvroCursor) -> Result<(), AvroError>, | ||
| ) -> Result<usize, AvroError> { | ||
| let new_total = total.checked_add(count).filter(|&t| t <= i32::MAX as usize); | ||
|
Comment on lines
+2317
to
+2328
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm not sure about hard limiting to
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went through the decode paths of the other implementations. None of them bound the count by remaining bytes - the negative-block byte size is read and discarded everywhere - so that original check was wrong (a valid On LargeList: fair point it is coupled to the offset type, but the reader only materializes into If you would rather decouple it, I can pull it into a named |
||
| let Some(new_total) = new_total else { | ||
| return Err(AvroError::ParseError(format!( | ||
| "Array/map item count {count} exceeds the maximum {} addressable by 32-bit offsets", | ||
| i32::MAX | ||
| ))); | ||
| }; | ||
| for _ in 0..count { | ||
| on_item(buf)?; | ||
| } | ||
| Ok(new_total) | ||
| } | ||
|
|
||
| #[inline] | ||
| fn flush_values<T>(values: &mut Vec<T>) -> Vec<T> { | ||
| std::mem::replace(values, Vec::with_capacity(DEFAULT_CAPACITY)) | ||
|
|
@@ -3434,6 +3457,62 @@ mod tests { | |
| assert_eq!(values.value(2), 3); | ||
| } | ||
|
|
||
| /// Zig-zag + unsigned-LEB128 encode, correct for all `i64` including `MIN`/`MAX` | ||
| /// (`encode_avro_long` loops forever on those two values). | ||
| fn encode_avro_long_extreme(value: i64) -> Vec<u8> { | ||
| let mut n = ((value << 1) ^ (value >> 63)) as u64; | ||
| let mut out = Vec::new(); | ||
| while n >= 0x80 { | ||
| out.push((n as u8) | 0x80); | ||
| n >>= 7; | ||
| } | ||
| out.push(n as u8); | ||
| out | ||
| } | ||
|
|
||
| // `array<null>` is the worst case: items consume no bytes, so an unbounded | ||
| // `block_count` spins the item loop without ever advancing the cursor (#10235). | ||
| fn array_of_null_decoder() -> Decoder { | ||
| let list_dt = avro_from_codec(Codec::List(Arc::new(avro_from_codec(Codec::Null)))); | ||
| Decoder::try_new(&list_dt).unwrap() | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_array_of_null_decodes() { | ||
| // A valid `array<null>` still decodes: zero-byte items make the count exceed the | ||
| // bytes left, so the bound must not reject it (#10235 review). | ||
| let mut decoder = array_of_null_decoder(); | ||
| let mut data = encode_avro_long(3); // three null items | ||
| data.extend_from_slice(&encode_avro_long(0)); // empty-block terminator | ||
| decoder.decode(&mut AvroCursor::new(&data)).unwrap(); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_array_block_count_i64_max_errors() { | ||
| // A positive `i64::MAX` block count must error rather than spin the item loop. | ||
| let mut decoder = array_of_null_decoder(); | ||
| let mut data = encode_avro_long_extreme(i64::MAX); // item count | ||
| data.extend_from_slice(&encode_avro_long(0)); // empty-block terminator | ||
| let err = decoder.decode(&mut AvroCursor::new(&data)).unwrap_err(); | ||
| assert!( | ||
| err.to_string().contains("exceeds the maximum"), | ||
| "unexpected error: {err}", | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_array_block_count_i64_min_errors() { | ||
| // `i64::MIN` previously overflowed `-block_count` before spinning the loop. | ||
| let mut decoder = array_of_null_decoder(); | ||
| let mut data = encode_avro_long_extreme(i64::MIN); // negative item count | ||
| data.extend_from_slice(&encode_avro_long(0)); // block size in bytes | ||
| let err = decoder.decode(&mut AvroCursor::new(&data)).unwrap_err(); | ||
| assert!( | ||
| err.to_string().contains("exceeds the maximum"), | ||
| "unexpected error: {err}", | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_nested_array_decoding() { | ||
| let inner_ty = avro_from_codec(Codec::List(Arc::new(avro_from_codec(Codec::Int32)))); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.