diff --git a/Cargo.toml b/Cargo.toml index 6b9104efcced..08d7e013472c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,7 +82,7 @@ include = [ "NOTICE.txt", ] edition = "2024" -rust-version = "1.85" +rust-version = "1.88" [workspace.dependencies] arrow = { version = "59.0.0", path = "./arrow", default-features = false } diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs index 52708da7810f..08f0b968ab82 100644 --- a/arrow-array/src/arithmetic.rs +++ b/arrow-array/src/arithmetic.rs @@ -413,40 +413,19 @@ native_type_float_op!( f16::from_bits(-1 as _), f16::from_bits(i16::MAX as _) ); -// from_bits is not yet stable as const fn, see https://github.com/rust-lang/rust/issues/72447 native_type_float_op!( f32, 0., 1., - unsafe { - // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` - #[allow(unnecessary_transmutes)] - std::mem::transmute(-1_i32) - }, - unsafe { - // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` - #[allow(unnecessary_transmutes)] - std::mem::transmute(i32::MAX) - } + f32::from_bits(-1_i32 as _), + f32::from_bits(i32::MAX as _) ); native_type_float_op!( f64, 0., 1., - unsafe { - // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` - #[allow(unnecessary_transmutes)] - std::mem::transmute(-1_i64) - }, - unsafe { - // Need to allow in clippy because - // current MSRV (Minimum Supported Rust Version) is `1.85.0` but this item is stable since `1.87.0` - #[allow(unnecessary_transmutes)] - std::mem::transmute(i64::MAX) - } + f64::from_bits(-1_i64 as _), + f64::from_bits(i64::MAX as _) ); #[cfg(test)] diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs index 93924ac76bb2..e58296817405 100644 --- a/arrow-array/src/array/byte_array.rs +++ b/arrow-array/src/array/byte_array.rs @@ -135,15 +135,15 @@ impl GenericByteArray { // Verify that each pair of offsets is a valid slices of values T::validate(&offsets, &values)?; - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for {}{}Array, expected {len} got {}", - T::Offset::PREFIX, - T::PREFIX, - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for {}{}Array, expected {len} got {}", + T::Offset::PREFIX, + T::PREFIX, + n.len(), + ))); } Ok(Self { diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs index be6e79922183..d6feed625dd4 100644 --- a/arrow-array/src/array/byte_view_array.rs +++ b/arrow-array/src/array/byte_view_array.rs @@ -213,15 +213,15 @@ impl GenericByteViewArray { T::validate(&views, &buffers)?; - if let Some(n) = nulls.as_ref() { - if n.len() != views.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for {}ViewArray, expected {} got {}", - T::PREFIX, - views.len(), - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != views.len() + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for {}ViewArray, expected {} got {}", + T::PREFIX, + views.len(), + n.len(), + ))); } Ok(Self { diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs index 9bf3b6ef38a0..f3d994cc2b13 100644 --- a/arrow-array/src/array/fixed_size_binary_array.rs +++ b/arrow-array/src/array/fixed_size_binary_array.rs @@ -150,14 +150,14 @@ impl FixedSizeBinaryArray { let len = match values.len().checked_div(value_size) { Some(len) => { - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}", - len, - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for FixedSizeBinaryArray, expected {} got {}", + len, + n.len(), + ))); } len diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index 01ccafd9cc57..e3817dbd54f4 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -175,7 +175,7 @@ impl FixedSizeListArray { Self::try_new_with_length(field, size, values, nulls, len) } else { - if values.len() % s != 0 { + if !values.len().is_multiple_of(s) { return Err(ArrowError::InvalidArgumentError(format!( "Incorrect length of values buffer for FixedSizeListArray, \ expected a multiple of {s} got {}", @@ -186,15 +186,15 @@ impl FixedSizeListArray { let len = values.len() / s; // Check that the null buffer length is correct (if it exists). - if let Some(null_buffer) = &nulls { - if s * null_buffer.len() != values.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of values buffer for FixedSizeListArray, \ + if let Some(null_buffer) = &nulls + && s * null_buffer.len() != values.len() + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of values buffer for FixedSizeListArray, \ expected {} got {}", - s * null_buffer.len(), - values.len(), - ))); - } + s * null_buffer.len(), + values.len(), + ))); } Self::try_new_with_length(field, size, values, nulls, len) @@ -227,13 +227,13 @@ impl FixedSizeListArray { ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {size}")) })?; - if let Some(null_buffer) = &nulls { - if null_buffer.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Invalid null buffer for FixedSizeListArray, expected {len} found {}", - null_buffer.len() - ))); - } + if let Some(null_buffer) = &nulls + && null_buffer.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Invalid null buffer for FixedSizeListArray, expected {len} found {}", + null_buffer.len() + ))); } if s == 0 && !values.is_empty() { diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs index f74ff7d3f364..37c0d7df1dd9 100644 --- a/arrow-array/src/array/list_array.rs +++ b/arrow-array/src/array/list_array.rs @@ -223,14 +223,14 @@ impl GenericListArray { ))); } - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for {}ListArray, expected {len} got {}", - OffsetSize::PREFIX, - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for {}ListArray, expected {len} got {}", + OffsetSize::PREFIX, + n.len(), + ))); } if !field.is_nullable() && values.is_nullable() { return Err(ArrowError::InvalidArgumentError(format!( diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs index a032d8715a90..8e7765be703b 100644 --- a/arrow-array/src/array/list_view_array.rs +++ b/arrow-array/src/array/list_view_array.rs @@ -146,14 +146,14 @@ impl GenericListViewArray { nulls: Option, ) -> Result { let len = offsets.len(); - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}", - OffsetSize::PREFIX, - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for {}ListViewArray, expected {len} got {}", + OffsetSize::PREFIX, + n.len(), + ))); } if len != sizes.len() { return Err(ArrowError::InvalidArgumentError(format!( diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index 0f7c43543046..4497ac2e7f09 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -79,13 +79,13 @@ impl MapArray { ))); } - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for MapArray, expected {len} got {}", - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for MapArray, expected {len} got {}", + n.len(), + ))); } if field.is_nullable() || entries.null_count() != 0 { return Err(ArrowError::InvalidArgumentError( diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index b51f5f518668..a1fe18951d1a 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -655,14 +655,14 @@ impl PrimitiveArray { values: ScalarBuffer, nulls: Option, ) -> Result { - if let Some(n) = nulls.as_ref() { - if n.len() != values.len() { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for PrimitiveArray, expected {} got {}", - values.len(), - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != values.len() + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for PrimitiveArray, expected {} got {}", + values.len(), + n.len(), + ))); } Ok(Self { diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index 59513b9ff012..bf7dc934dcf0 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -138,13 +138,13 @@ impl StructArray { ))); } - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect number of nulls for StructArray, expected {len} got {}", - n.len(), - ))); - } + if let Some(n) = nulls.as_ref() + && n.len() != len + { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect number of nulls for StructArray, expected {len} got {}", + n.len(), + ))); } for (f, a) in fields.iter().zip(&arrays) { @@ -166,17 +166,15 @@ impl StructArray { ))); } - if !f.is_nullable() { - if let Some(a) = a.logical_nulls() { - if !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() - && a.null_count() > 0 - { - return Err(ArrowError::InvalidArgumentError(format!( - "Found unmasked nulls for non-nullable StructArray field {:?}", - f.name() - ))); - } - } + if !f.is_nullable() + && let Some(a) = a.logical_nulls() + && !nulls.as_ref().map(|n| n.contains(&a)).unwrap_or_default() + && a.null_count() > 0 + { + return Err(ArrowError::InvalidArgumentError(format!( + "Found unmasked nulls for non-nullable StructArray field {:?}", + f.name() + ))); } } diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs index 2d60187344cf..4886d56e7ffa 100644 --- a/arrow-array/src/builder/generic_bytes_view_builder.rs +++ b/arrow-array/src/builder/generic_bytes_view_builder.rs @@ -359,36 +359,34 @@ impl GenericByteViewBuilder { .max_deduplication_len .map(|max_length| length <= max_length) .unwrap_or(true); - if can_deduplicate { - if let Some((mut ht, hasher)) = self.string_tracker.take() { - let hash_val = hasher.hash_one(v); - let hasher_fn = |v: &_| hasher.hash_one(v); - - let entry = ht.entry( - hash_val, - |idx| { - let stored_value = self.get_value(*idx); - v == stored_value - }, - hasher_fn, - ); - match entry { - Entry::Occupied(occupied) => { - // If the string already exists, we will directly use the view - let idx = occupied.get(); - self.views_buffer.push(self.views_buffer[*idx]); - self.null_buffer_builder.append_non_null(); - self.string_tracker = Some((ht, hasher)); - return Ok(()); - } - Entry::Vacant(vacant) => { - // o.w. we insert the (string hash -> view index) - // the idx is current length of views_builder, as we are inserting a new view - vacant.insert(self.views_buffer.len()); - } + if can_deduplicate && let Some((mut ht, hasher)) = self.string_tracker.take() { + let hash_val = hasher.hash_one(v); + let hasher_fn = |v: &_| hasher.hash_one(v); + + let entry = ht.entry( + hash_val, + |idx| { + let stored_value = self.get_value(*idx); + v == stored_value + }, + hasher_fn, + ); + match entry { + Entry::Occupied(occupied) => { + // If the string already exists, we will directly use the view + let idx = occupied.get(); + self.views_buffer.push(self.views_buffer[*idx]); + self.null_buffer_builder.append_non_null(); + self.string_tracker = Some((ht, hasher)); + return Ok(()); + } + Entry::Vacant(vacant) => { + // o.w. we insert the (string hash -> view index) + // the idx is current length of views_builder, as we are inserting a new view + vacant.insert(self.views_buffer.len()); } - self.string_tracker = Some((ht, hasher)); } + self.string_tracker = Some((ht, hasher)); } let required_cap = self.in_progress.len() + v.len(); diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs index fc101ae0b53f..975b906cf78c 100644 --- a/arrow-avro/src/codec.rs +++ b/arrow-avro/src/codec.rs @@ -218,12 +218,10 @@ impl AvroDataType { /// Returns an arrow [`Field`] with the given name pub(crate) fn field_with_name(&self, name: &str) -> Field { let mut nullable = self.nullability.is_some(); - if !nullable { - if let Codec::Union(children, _, _) = self.codec() { - // If any encoded branch is `null`, mark field as nullable - if children.iter().any(|c| matches!(c.codec(), Codec::Null)) { - nullable = true; - } + if !nullable && let Codec::Union(children, _, _) = self.codec() { + // If any encoded branch is `null`, mark field as nullable + if children.iter().any(|c| matches!(c.codec(), Codec::Null)) { + nullable = true; } } let data_type = self.codec.data_type(); @@ -282,13 +280,13 @@ impl AvroDataType { } out.push(cp as u8); } - if let Some(len) = expected_len { - if out.len() != len { - return Err(ArrowError::SchemaError(format!( - "Default length {} does not match expected fixed size {len}", - out.len(), - ))); - } + if let Some(len) = expected_len + && out.len() != len + { + return Err(ArrowError::SchemaError(format!( + "Default length {} does not match expected fixed size {len}", + out.len(), + ))); } Ok(out) } @@ -1362,16 +1360,16 @@ fn union_first_duplicate<'a>( ) -> Option { let mut seen = HashSet::with_capacity(branches.len()); for schema in branches { - if let Some(key) = branch_key_of(schema, enclosing_ns) { - if !seen.insert(key.clone()) { - let msg = match key { - UnionBranchKey::Named(full) => format!("named type {full}"), - UnionBranchKey::Primitive(p) => format!("primitive {}", p.as_ref()), - UnionBranchKey::Array => "array".to_string(), - UnionBranchKey::Map => "map".to_string(), - }; - return Some(msg); - } + if let Some(key) = branch_key_of(schema, enclosing_ns) + && !seen.insert(key.clone()) + { + let msg = match key { + UnionBranchKey::Named(full) => format!("named type {full}"), + UnionBranchKey::Primitive(p) => format!("primitive {}", p.as_ref()), + UnionBranchKey::Array => "array".to_string(), + UnionBranchKey::Map => "map".to_string(), + }; + return Some(msg); } } None @@ -1777,17 +1775,15 @@ impl<'a> Maker<'a> { } (None, _) => {} } - if matches!(field.codec, Codec::Int64) { - if let Some(unit) = t + if matches!(field.codec, Codec::Int64) + && let Some(unit) = t .attributes .additional .get("arrowTimeUnit") .and_then(|v| v.as_str()) - { - if unit == "nanosecond" { - field.codec = Codec::TimestampNanos(Some(self.tz)); - } - } + && unit == "nanosecond" + { + field.codec = Codec::TimestampNanos(Some(self.tz)); } if !t.attributes.additional.is_empty() { for (k, v) in &t.attributes.additional { diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs index c83739816492..7f5e713c8a77 100644 --- a/arrow-avro/src/reader/mod.rs +++ b/arrow-avro/src/reader/mod.rs @@ -1721,10 +1721,10 @@ mod test { } Value::Array(arr) => { for b in arr.iter_mut() { - if let Value::Object(map) = b { - if matches!(map.get("type"), Some(Value::String(t)) if t == "enum") { - map.insert("symbols".to_string(), symbols.clone()); - } + if let Value::Object(map) = b + && matches!(map.get("type"), Some(Value::String(t)) if t == "enum") + { + map.insert("symbols".to_string(), symbols.clone()); } } } @@ -8317,42 +8317,41 @@ mod test { } } fn reverse_items_union(f: &mut Value) { - if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) { - if let Some(items) = obj.get_mut("items").and_then(|v| v.as_array_mut()) { - items.reverse(); - } + if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) + && let Some(items) = obj.get_mut("items").and_then(|v| v.as_array_mut()) + { + items.reverse(); } } fn reverse_map_values_union(f: &mut Value) { - if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) { - if let Some(values) = obj.get_mut("values").and_then(|v| v.as_array_mut()) { - values.reverse(); - } + if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) + && let Some(values) = obj.get_mut("values").and_then(|v| v.as_array_mut()) + { + values.reverse(); } } fn reverse_nested_union_in_record(f: &mut Value, field_name: &str) { - if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) { - if let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) { - for ff in fields.iter_mut() { - if ff.get("name").and_then(|n| n.as_str()) == Some(field_name) { - if let Some(ty) = ff.get_mut("type") { - if let Some(arr) = ty.as_array_mut() { - arr.reverse(); - } - } - } + if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) + && let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) + { + for ff in fields.iter_mut() { + if ff.get("name").and_then(|n| n.as_str()) == Some(field_name) + && let Some(ty) = ff.get_mut("type") + && let Some(arr) = ty.as_array_mut() + { + arr.reverse(); } } } } fn rename_nested_field_with_alias(f: &mut Value, old: &str, new: &str) { - if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) { - if let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) { - for ff in fields.iter_mut() { - if ff.get("name").and_then(|n| n.as_str()) == Some(old) { - ff["name"] = Value::String(new.to_string()); - ff["aliases"] = Value::Array(vec![Value::String(old.to_string())]); - } + if let Some(obj) = f.get_mut("type").and_then(|t| t.as_object_mut()) + && let Some(fields) = obj.get_mut("fields").and_then(|v| v.as_array_mut()) + { + for ff in fields.iter_mut() { + if ff.get("name").and_then(|n| n.as_str()) == Some(old) { + ff["name"] = Value::String(new.to_string()); + ff["aliases"] = Value::Array(vec![Value::String(old.to_string())]); } } } diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs index 306c77718234..24561440b55e 100644 --- a/arrow-avro/src/reader/record.rs +++ b/arrow-avro/src/reader/record.rs @@ -281,19 +281,20 @@ enum Decoder { impl Decoder { fn try_new(data_type: &AvroDataType) -> Result { - if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() { - if info.writer_is_union && !info.reader_is_union { - let mut clone = data_type.clone(); - clone.resolution = None; // Build target base decoder without Union resolution - let target = Self::try_new_internal(&clone)?; - let decoder = Self::Union( - UnionDecoderBuilder::new() - .with_resolved_union(info.clone()) - .with_target(target) - .build()?, - ); - return Ok(decoder); - } + if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() + && info.writer_is_union + && !info.reader_is_union + { + let mut clone = data_type.clone(); + clone.resolution = None; // Build target base decoder without Union resolution + let target = Self::try_new_internal(&clone)?; + let decoder = Self::Union( + UnionDecoderBuilder::new() + .with_resolved_union(info.clone()) + .with_target(target) + .build()?, + ); + return Ok(decoder); } Self::try_new_internal(data_type) } @@ -550,10 +551,10 @@ impl Decoder { let mut builder = UnionDecoderBuilder::new() .with_fields(fields.clone()) .with_branches(decoders); - if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() { - if info.reader_is_union { - builder = builder.with_resolved_union(info.clone()); - } + if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() + && info.reader_is_union + { + builder = builder.with_resolved_union(info.clone()); } Self::Union(builder.build()?) } @@ -1631,13 +1632,13 @@ impl Decoder { ))); } let final_len = moff.len() - 1; - if let Some(n) = &nulls { - if n.len() != final_len { - return Err(AvroError::InvalidArgument(format!( - "Map array null buffer length {} != final map length {final_len}", - n.len() - ))); - } + if let Some(n) = &nulls + && n.len() != final_len + { + return Err(AvroError::InvalidArgument(format!( + "Map array null buffer length {} != final map length {final_len}", + n.len() + ))); } let entries_fields = match map_field.data_type() { DataType::Struct(fields) => fields.clone(), diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 1b0c2e26f773..28f21e463adf 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -517,10 +517,8 @@ impl AvroSchema { let opts = options.unwrap_or_default(); let order = opts.null_order.unwrap_or_default(); let strip = opts.strip_metadata; - if !strip { - if let Some(json) = schema.metadata.get(SCHEMA_METADATA_KEY) { - return Ok(AvroSchema::new(json.clone())); - } + if !strip && let Some(json) = schema.metadata.get(SCHEMA_METADATA_KEY) { + return Ok(AvroSchema::new(json.clone())); } let mut name_gen = NameGenerator::default(); let fields_json = schema diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index 52f1e3fbb510..591227a5c061 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -1035,7 +1035,7 @@ mod tests { .map(|i| (i as u8).wrapping_mul(37).wrapping_add(11)) .collect::>(); let base = bytes.as_ptr() as usize; - let shift = (0..8).find(|s| (base + s) % 8 != 0).unwrap(); + let shift = (0..8).find(|s| !(base + s).is_multiple_of(8)).unwrap(); let misaligned = &bytes[shift..]; // Case 1: fallback path with `remainder.is_empty() == true` diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs index 9c7fc1a90169..bfccdf55382e 100644 --- a/arrow-buffer/src/buffer/immutable.rs +++ b/arrow-buffer/src/buffer/immutable.rs @@ -209,19 +209,19 @@ impl Buffer { // For realloc to work, we cannot free the elements before the offset offset + self.len() }; - if desired_capacity < self.capacity() { - if let Some(bytes) = Arc::get_mut(&mut self.data) { - if bytes.try_realloc(desired_capacity).is_ok() { - // Realloc complete - update our pointer into `bytes`: - self.ptr = if is_empty { - bytes.as_ptr() - } else { - // SAFETY: we kept all elements leading up to the offset - unsafe { bytes.as_ptr().add(offset) } - } + if desired_capacity < self.capacity() + && let Some(bytes) = Arc::get_mut(&mut self.data) + { + if bytes.try_realloc(desired_capacity).is_ok() { + // Realloc complete - update our pointer into `bytes`: + self.ptr = if is_empty { + bytes.as_ptr() } else { - // Failure to reallocate is fine; we just failed to free up memory. + // SAFETY: we kept all elements leading up to the offset + unsafe { bytes.as_ptr().add(offset) } } + } else { + // Failure to reallocate is fine; we just failed to free up memory. } } } @@ -328,7 +328,7 @@ impl Buffer { /// If the offset is byte-aligned the returned buffer is a shallow clone, /// otherwise a new buffer is allocated and filled with a copy of the bits in the range. pub fn bit_slice(&self, offset: usize, len: usize) -> Self { - if offset % 8 == 0 { + if offset.is_multiple_of(8) { return self.slice_with_length(offset / 8, bit_util::ceil(len, 8)); } @@ -1047,7 +1047,7 @@ mod tests { // (since the `offset` value inside a Buffer is byte-granular, not bit-granular), so // checking the offset should always return 0 if so. If the offset IS byte-aligned, we // want to make sure it doesn't unnecessarily create a deep copy. - if offset % 8 == 0 { + if offset.is_multiple_of(8) { assert_eq!(new_buf.ptr_offset(), offset / 8); } else { assert_eq!(new_buf.ptr_offset(), 0); diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index aae6f66cf5bc..37e1720f82a1 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -393,10 +393,10 @@ impl Format { // Note since we may be looking at a sample of the data, we make the safe assumption that // they could be nullable for (i, column_type) in column_types.iter_mut().enumerate().take(header_length) { - if let Some(string) = record.get(i) { - if !self.null_regex.is_null(string) { - column_type.update(string) - } + if let Some(string) = record.get(i) + && !self.null_regex.is_null(string) + { + column_type.update(string) } } } diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 0ce98aa090df..7399184776c8 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -819,10 +819,10 @@ impl ArrayData { pub fn align_buffers(&mut self) { let layout = layout(&self.data_type); for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) { - if let BufferSpec::FixedWidth { alignment, .. } = spec { - if buffer.as_ptr().align_offset(*alignment) != 0 { - *buffer = Buffer::from_slice_ref(buffer.as_ref()); - } + if let BufferSpec::FixedWidth { alignment, .. } = spec + && buffer.as_ptr().align_offset(*alignment) != 0 + { + *buffer = Buffer::from_slice_ref(buffer.as_ref()); } } // align children data recursively diff --git a/arrow-data/src/equal/boolean.rs b/arrow-data/src/equal/boolean.rs index 64b7125e3688..cbd18ad789b2 100644 --- a/arrow-data/src/equal/boolean.rs +++ b/arrow-data/src/equal/boolean.rs @@ -37,10 +37,10 @@ pub(super) fn boolean_equal( if !contains_nulls { // Optimize performance for starting offset at u8 boundary. - if lhs_start % 8 == 0 - && rhs_start % 8 == 0 - && lhs.offset() % 8 == 0 - && rhs.offset() % 8 == 0 + if lhs_start.is_multiple_of(8) + && rhs_start.is_multiple_of(8) + && lhs.offset().is_multiple_of(8) + && rhs.offset().is_multiple_of(8) { let quot = len / 8; if quot > 0 diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs index 5476d4ede9a4..60ef575d1fd7 100644 --- a/arrow-flight/src/sql/client.rs +++ b/arrow-flight/src/sql/client.rs @@ -587,10 +587,9 @@ where .await? .message() .await? + && let Some(handle) = self.unpack_prepared_statement_handle(&result)? { - if let Some(handle) = self.unpack_prepared_statement_handle(&result)? { - self.handle = handle; - } + self.handle = handle; } } Ok(()) diff --git a/arrow-flight/tests/common/trailers_layer.rs b/arrow-flight/tests/common/trailers_layer.rs index 0ccb7df86c74..f78d1120c57f 100644 --- a/arrow-flight/tests/common/trailers_layer.rs +++ b/arrow-flight/tests/common/trailers_layer.rs @@ -105,10 +105,10 @@ impl http_body::Body for WrappedBody { ) -> Poll, Self::Error>>> { let mut result = ready!(self.project().inner.poll_frame(cx)); - if let Some(Ok(frame)) = &mut result { - if let Some(trailers) = frame.trailers_mut() { - trailers.insert("test-trailer", HeaderValue::from_static("trailer_val")); - } + if let Some(Ok(frame)) = &mut result + && let Some(trailers) = frame.trailers_mut() + { + trailers.insert("test-trailer", HeaderValue::from_static("trailer_val")); } Poll::Ready(result) diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs index 16e61deadb0f..e3f89b58fcc7 100644 --- a/arrow-ipc/src/convert.rs +++ b/arrow-ipc/src/convert.rs @@ -215,10 +215,10 @@ pub fn fb_to_schema(fb: crate::Schema) -> Schema { let kv = md_fields.get(i); let k_str = kv.key(); let v_str = kv.value(); - if let Some(k) = k_str { - if let Some(v) = v_str { - metadata.insert(k.to_string(), v.to_string()); - } + if let Some(k) = k_str + && let Some(v) = v_str + { + metadata.insert(k.to_string(), v.to_string()); } } } @@ -294,25 +294,22 @@ pub fn try_schema_from_ipc_buffer(buffer: &[u8]) -> Result { /// Get the Arrow data type from the flatbuffer Field table pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> DataType { - if let Some(dictionary) = field.dictionary() { - if may_be_dictionary { - let int = dictionary.indexType().unwrap(); - let index_type = match (int.bitWidth(), int.is_signed()) { - (8, true) => DataType::Int8, - (8, false) => DataType::UInt8, - (16, true) => DataType::Int16, - (16, false) => DataType::UInt16, - (32, true) => DataType::Int32, - (32, false) => DataType::UInt32, - (64, true) => DataType::Int64, - (64, false) => DataType::UInt64, - _ => panic!("Unexpected bitwidth and signed"), - }; - return DataType::Dictionary( - Box::new(index_type), - Box::new(get_data_type(field, false)), - ); - } + if let Some(dictionary) = field.dictionary() + && may_be_dictionary + { + let int = dictionary.indexType().unwrap(); + let index_type = match (int.bitWidth(), int.is_signed()) { + (8, true) => DataType::Int8, + (8, false) => DataType::UInt8, + (16, true) => DataType::Int16, + (16, false) => DataType::UInt16, + (32, true) => DataType::Int32, + (32, false) => DataType::UInt32, + (64, true) => DataType::Int64, + (64, false) => DataType::UInt64, + _ => panic!("Unexpected bitwidth and signed"), + }; + return DataType::Dictionary(Box::new(index_type), Box::new(get_data_type(field, false))); } match field.type_type() { diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index 6ae64843731f..973cd62d5b15 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -1829,7 +1829,7 @@ pub fn write_message( write_options: &IpcWriteOptions, ) -> Result<(usize, usize), ArrowError> { let arrow_data_len = encoded.arrow_data.len(); - if arrow_data_len % usize::from(write_options.alignment) != 0 { + if !arrow_data_len.is_multiple_of(usize::from(write_options.alignment)) { return Err(ArrowError::MemoryError( "Arrow data not aligned".to_string(), )); diff --git a/arrow-json/src/reader/value_iter.rs b/arrow-json/src/reader/value_iter.rs index ebaba695adf3..e4fbc14a8379 100644 --- a/arrow-json/src/reader/value_iter.rs +++ b/arrow-json/src/reader/value_iter.rs @@ -66,10 +66,10 @@ impl Iterator for ValueIter { type Item = Result; fn next(&mut self) -> Option { - if let Some(max) = self.max_read_records { - if self.record_count >= max { - return None; - } + if let Some(max) = self.max_read_records + && self.record_count >= max + { + return None; } loop { diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index 45055c5a36a8..de87ada57610 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -317,10 +317,10 @@ pub fn make_encoder<'a>( }}; } - if let Some(factory) = options.encoder_factory() { - if let Some(encoder) = factory.make_default_encoder(field, array, options)? { - return Ok(encoder); - } + if let Some(factory) = options.encoder_factory() + && let Some(encoder) = factory.make_default_encoder(field, array, options)? + { + return Ok(encoder); } let nulls = array.nulls().cloned(); diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 46abd8ad3d00..b5762484eca0 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -25,7 +25,7 @@ authors = ["Apache Arrow "] license = "Apache-2.0" keywords = ["arrow"] edition = "2024" -rust-version = "1.85" +rust-version = "1.88" publish = false [lib] diff --git a/arrow-pyarrow-testing/Cargo.toml b/arrow-pyarrow-testing/Cargo.toml index c5a15b5cc923..217e2b144800 100644 --- a/arrow-pyarrow-testing/Cargo.toml +++ b/arrow-pyarrow-testing/Cargo.toml @@ -40,7 +40,7 @@ authors = ["Apache Arrow "] license = "Apache-2.0" keywords = ["arrow"] edition = "2024" -rust-version = "1.85" +rust-version = "1.88" publish = false diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 182481f42b4d..e90deb338a77 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -2254,13 +2254,13 @@ unsafe fn decode_column( let null_row_bytes: &[u8] = &null_rows[field_idx].data; for idx in 0..len { - if let Some((next_idx, bytes)) = field_row_iter.peek() { - if *next_idx == idx { - sparse_data.push(*bytes); + if let Some((next_idx, bytes)) = field_row_iter.peek() + && *next_idx == idx + { + sparse_data.push(*bytes); - field_row_iter.next(); - continue; - } + field_row_iter.next(); + continue; } sparse_data.push(null_row_bytes); } diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 0c7db39dfbb5..32e384756574 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -300,13 +300,13 @@ impl Schema { // merge metadata for (key, value) in metadata.into_iter() { - if let Some(old_val) = out_meta.get(&key) { - if old_val != &value { - return Err(ArrowError::SchemaError(format!( - "Fail to merge schema due to conflicting metadata. \ - Key '{key}' has different values '{old_val}' and '{value}'" - ))); - } + if let Some(old_val) = out_meta.get(&key) + && old_val != &value + { + return Err(ArrowError::SchemaError(format!( + "Fail to merge schema due to conflicting metadata. \ + Key '{key}' has different values '{old_val}' and '{value}'" + ))); } out_meta.insert(key, value); } diff --git a/arrow-select/src/coalesce.rs b/arrow-select/src/coalesce.rs index 0f1086be82aa..44aa6f217cfe 100644 --- a/arrow-select/src/coalesce.rs +++ b/arrow-select/src/coalesce.rs @@ -449,30 +449,30 @@ impl BatchCoalescer { } // Large batch optimization: bypass coalescing for oversized batches - if let Some(limit) = self.biggest_coalesce_batch_size { - if batch_size > limit { - // Case 1: No buffered data - emit large batch directly - // Example: [] + [1200] → output [1200], buffer [] - if self.buffered_rows == 0 { - self.completed.push_back(batch); - return Ok(()); - } - - // Case 2: Buffer too large - flush then emit to avoid oversized merge - // Example: [850] + [1200] → output [850], then output [1200] - // This prevents creating batches much larger than both target_batch_size - // and biggest_coalesce_batch_size, which could cause memory issues - if self.buffered_rows > limit { - self.finish_buffered_batch()?; - self.completed.push_back(batch); - return Ok(()); - } + if let Some(limit) = self.biggest_coalesce_batch_size + && batch_size > limit + { + // Case 1: No buffered data - emit large batch directly + // Example: [] + [1200] → output [1200], buffer [] + if self.buffered_rows == 0 { + self.completed.push_back(batch); + return Ok(()); + } - // Case 3: Small buffer - proceed with normal coalescing - // Example: [300] + [1200] → split and merge normally - // This ensures small batches still get properly coalesced - // while allowing some controlled growth beyond the limit + // Case 2: Buffer too large - flush then emit to avoid oversized merge + // Example: [850] + [1200] → output [850], then output [1200] + // This prevents creating batches much larger than both target_batch_size + // and biggest_coalesce_batch_size, which could cause memory issues + if self.buffered_rows > limit { + self.finish_buffered_batch()?; + self.completed.push_back(batch); + return Ok(()); } + + // Case 3: Small buffer - proceed with normal coalescing + // Example: [300] + [1200] → split and merge normally + // This ensures small batches still get properly coalesced + // while allowing some controlled growth beyond the limit } let (_schema, arrays, mut num_rows) = batch.into_parts(); diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs index 5b32f4e761f8..2f0418e1dc0f 100644 --- a/arrow-select/src/dictionary.rs +++ b/arrow-select/src/dictionary.rs @@ -82,9 +82,7 @@ pub fn garbage_collect_dictionary( pub fn garbage_collect_any_dictionary( dictionary: &dyn AnyDictionaryArray, ) -> Result { - // FIXME: this is a workaround for MSRV Rust versions below 1.86 where trait upcasting is not stable. - // From 1.86 onward, `&dyn AnyDictionaryArray` can be directly passed to `downcast_dictionary_array!`. - let dictionary = &*dictionary.slice(0, dictionary.len()); + let dictionary: &dyn Array = dictionary; downcast_dictionary_array!( dictionary => garbage_collect_dictionary(dictionary).map(|dict| Arc::new(dict) as ArrayRef), _ => unreachable!("have a dictionary array") diff --git a/arrow-string/src/concat_elements.rs b/arrow-string/src/concat_elements.rs index 8def0d5abb10..72898793b8bd 100644 --- a/arrow-string/src/concat_elements.rs +++ b/arrow-string/src/concat_elements.rs @@ -343,14 +343,14 @@ where self, null_buffer: Option, ) -> Result, ArrowError> { - if let Some(ref nulls) = null_buffer { - if nulls.len() != self.views.len() { - return Err(ArrowError::ComputeError(format!( - "Null buffer length ({}) must match row count ({})", - nulls.len(), - self.views.len() - ))); - } + if let Some(ref nulls) = null_buffer + && nulls.len() != self.views.len() + { + return Err(ArrowError::ComputeError(format!( + "Null buffer length ({}) must match row count ({})", + nulls.len(), + self.views.len() + ))); } let buffers = if self.data.is_empty() { diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs index 3b65ea7b52f9..812a7f897265 100644 --- a/arrow/src/tensor.rs +++ b/arrow/src/tensor.rs @@ -177,20 +177,20 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> { } Some(ref s) => { - if let Some(ref st) = strides { - if st.len() != s.len() { - return Err(ArrowError::InvalidArgumentError( - "shape and stride dimensions differ".to_string(), - )); - } + if let Some(ref st) = strides + && st.len() != s.len() + { + return Err(ArrowError::InvalidArgumentError( + "shape and stride dimensions differ".to_string(), + )); } - if let Some(ref n) = names { - if n.len() != s.len() { - return Err(ArrowError::InvalidArgumentError( - "number of dimensions and number of dimension names differ".to_string(), - )); - } + if let Some(ref n) = names + && n.len() != s.len() + { + return Err(ArrowError::InvalidArgumentError( + "number of dimensions and number of dimension names differ".to_string(), + )); } let total_elements: usize = s.iter().product(); diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 82cda7867f50..71e23820a5e7 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -1110,10 +1110,10 @@ fn typed_value_to_variant<'a>( /// verify that all data types in the struct are legal for a variant array. fn canonicalize_shredded_types(array: &dyn Array) -> Result { let new_type = canonicalize_and_verify_data_type(array.data_type())?; - if let Cow::Borrowed(_) = new_type { - if let Some(array) = array.as_struct_opt() { - return Ok(Arc::new(array.clone())); // bypass the unnecessary cast - } + if let Cow::Borrowed(_) = new_type + && let Some(array) = array.as_struct_opt() + { + return Ok(Arc::new(array.clone())); // bypass the unnecessary cast } cast(array, new_type.as_ref()) } diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index fc01ca8d3b20..7d24d1514b99 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -337,26 +337,26 @@ fn shredded_get_path( // // For shredded/partially-shredded targets (`typed_value` present), recurse into each field // separately to take advantage of deeper shredding in child fields. - if !as_field.has_valid_extension_type::() { - if let DataType::Struct(fields) = as_field.data_type() { - if target.typed_value_column().is_none() { - return shred_basic_variant(target, VariantPath::default(), Some(as_field)); - } - - let children = fields - .iter() - .map(|field| { - let path = &[VariantPathElement::from(field.name().as_str())]; - shredded_get_path(&target, path, Some(field), cast_options) - }) - .collect::>>()?; - - return Ok(Arc::new(StructArray::try_new( - fields.clone(), - children, - target.nulls().cloned(), - )?)); + if !as_field.has_valid_extension_type::() + && let DataType::Struct(fields) = as_field.data_type() + { + if target.typed_value_column().is_none() { + return shred_basic_variant(target, VariantPath::default(), Some(as_field)); } + + let children = fields + .iter() + .map(|field| { + let path = &[VariantPathElement::from(field.name().as_str())]; + shredded_get_path(&target, path, Some(field), cast_options) + }) + .collect::>>()?; + + return Ok(Arc::new(StructArray::try_new( + fields.clone(), + children, + target.nulls().cloned(), + )?)); } // Not a struct, so directly shred the variant as the requested type diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index c9f175c3a610..dc1d83370377 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -1772,7 +1772,7 @@ impl From for Variant<'_, '_> { impl From> for Variant<'_, '_> { fn from(value: DateTime) -> Self { - if value.nanosecond() % 1000 > 0 { + if !value.nanosecond().is_multiple_of(1000) { Variant::TimestampNanos(value) } else { Variant::TimestampMicros(value) @@ -1782,7 +1782,7 @@ impl From> for Variant<'_, '_> { impl From for Variant<'_, '_> { fn from(value: NaiveDateTime) -> Self { - if value.nanosecond() % 1000 > 0 { + if !value.nanosecond().is_multiple_of(1000) { Variant::TimestampNtzNanos(value) } else { Variant::TimestampNtzMicros(value) diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index d5d08d204c83..a1226a8197f7 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -297,12 +297,12 @@ impl<'m> VariantMetadata<'m> { }, )?; - if let Some(prev_val) = prev_value { - if current_value <= prev_val { - return Err(ArrowError::InvalidArgumentError( - "dictionary values are not unique and ordered".to_string(), - )); - } + if let Some(prev_val) = prev_value + && current_value <= prev_val + { + return Err(ArrowError::InvalidArgumentError( + "dictionary values are not unique and ordered".to_string(), + )); } prev_value = Some(current_value); diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index bb91584cefa6..cf563d9e3575 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -266,12 +266,12 @@ impl<'m, 'v> VariantObject<'m, 'v> { for field_id in field_ids_iter { let next_field_name = self.metadata.get(field_id)?; - if let Some(current_name) = current_field_name { - if next_field_name < current_name { - return Err(ArrowError::InvalidArgumentError( - "field names not sorted".to_string(), - )); - } + if let Some(current_name) = current_field_name + && next_field_name < current_name + { + return Err(ArrowError::InvalidArgumentError( + "field names not sorted".to_string(), + )); } current_field_name = Some(next_field_name); } diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs index e895f7992148..91e5076f45bf 100644 --- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs +++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs @@ -462,15 +462,13 @@ impl ColumnValueDecoder for ValueDecoder { Some(x) => assert_eq!(x, self.byte_length), None => { out.byte_length = Some(self.byte_length); - // TODO: collapse to a let-chain once MSRV ≥ 1.88 - // (`if out.buffer.is_empty() && let Some(cap) = out.values_capacity.take()`) - if out.buffer.is_empty() { - if let Some(values_capacity) = out.values_capacity.take() { - // now that the byte length per output element is known, - // allocate the actual needed space. - let byte_capacity = values_capacity.saturating_mul(self.byte_length); - out.buffer = Vec::with_capacity(byte_capacity); - } + if out.buffer.is_empty() + && let Some(values_capacity) = out.values_capacity.take() + { + // now that the byte length per output element is known, + // allocate the actual needed space. + let byte_capacity = values_capacity.saturating_mul(self.byte_length); + out.buffer = Vec::with_capacity(byte_capacity); } } } diff --git a/parquet/src/arrow/arrow_reader/read_plan.rs b/parquet/src/arrow/arrow_reader/read_plan.rs index 7ca54f961e2a..0c6b9af3bccc 100644 --- a/parquet/src/arrow/arrow_reader/read_plan.rs +++ b/parquet/src/arrow/arrow_reader/read_plan.rs @@ -268,11 +268,11 @@ impl ReadPlanBuilder { // reader would have produced — rows past the early break are marked // "not selected". When no limit is set the loop always exhausts and // no padding is needed. - if let Some(expected) = expected_rows { - if processed_rows < expected { - let pad_len = expected - processed_rows; - filters.push(BooleanArray::new(BooleanBuffer::new_unset(pad_len), None)); - } + if let Some(expected) = expected_rows + && processed_rows < expected + { + let pad_len = expected - processed_rows; + filters.push(BooleanArray::new(BooleanBuffer::new_unset(pad_len), None)); } // If the predicate selected all rows and there is no prior selection, @@ -541,13 +541,13 @@ mod tests { let data: Vec = (0..TOTAL_ROWS as i32).collect(); let levels = vec![0; TOTAL_ROWS]; - let leaf = make_int32_page_reader(&data, &levels, &levels, 0, 0); + let leaf = make_int32_page_reader(&data, &levels, &levels, 0, 0, None); let struct_type = ArrowType::Struct(Fields::from(vec![Field::new( "c0", ArrowType::Int32, false, )])); - let struct_reader = StructArrayReader::new(struct_type, vec![leaf], 0, 0, false); + let struct_reader = StructArrayReader::new(struct_type, vec![leaf], 0, 0, false, None); let mut predicate = ArrowPredicateFn::new(ProjectionMask::all(), |batch| { Ok((0..batch.num_rows()) diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs index 8abdbcc77da0..75a006b4f4af 100644 --- a/parquet/src/arrow/arrow_writer/levels.rs +++ b/parquet/src/arrow/arrow_writer/levels.rs @@ -870,11 +870,12 @@ impl LevelInfoBuilder { let len = range.end - range.start; // Fast path: entire leaf array is null - if let Some(nulls) = &info.logical_nulls { - if !matches!(info.def_levels, LevelData::Absent) && nulls.null_count() == nulls.len() { - info.extend_uniform_levels(info.max_def_level - 1, info.max_rep_level, len); - return; - } + if let Some(nulls) = &info.logical_nulls + && !matches!(info.def_levels, LevelData::Absent) + && nulls.null_count() == nulls.len() + { + info.extend_uniform_levels(info.max_def_level - 1, info.max_rep_level, len); + return; } if matches!(info.def_levels, LevelData::Absent) { diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index e985c183ed20..85f6ba930d21 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -368,45 +368,45 @@ impl ArrowWriter { ), }; - if let Some(max_rows) = self.max_row_group_row_count { - if in_progress.buffered_rows + batch.num_rows() > max_rows { - let to_write = max_rows - in_progress.buffered_rows; - let a = batch.slice(0, to_write); - let b = batch.slice(to_write, batch.num_rows() - to_write); - self.write(&a)?; - return self.write(&b); - } + if let Some(max_rows) = self.max_row_group_row_count + && in_progress.buffered_rows + batch.num_rows() > max_rows + { + let to_write = max_rows - in_progress.buffered_rows; + let a = batch.slice(0, to_write); + let b = batch.slice(to_write, batch.num_rows() - to_write); + self.write(&a)?; + return self.write(&b); } // Check byte limit: if we have buffered data, use measured average row size // to split batch proactively before exceeding byte limit - if let Some(max_bytes) = self.max_row_group_bytes { - if in_progress.buffered_rows > 0 { - let current_bytes = in_progress.get_estimated_total_bytes(); + if let Some(max_bytes) = self.max_row_group_bytes + && in_progress.buffered_rows > 0 + { + let current_bytes = in_progress.get_estimated_total_bytes(); - if current_bytes >= max_bytes { - self.flush()?; - return self.write(batch); - } + if current_bytes >= max_bytes { + self.flush()?; + return self.write(batch); + } - if let Some(avg_row_bytes) = current_bytes - .checked_div(in_progress.buffered_rows) - .filter(|avg_row_bytes| *avg_row_bytes > 0) - { - // At this point, `current_bytes < max_bytes` (checked above) - let remaining_bytes = max_bytes - current_bytes; - let rows_that_fit = remaining_bytes.checked_div(avg_row_bytes).unwrap_or(0); - - if batch.num_rows() > rows_that_fit { - if rows_that_fit > 0 { - let a = batch.slice(0, rows_that_fit); - let b = batch.slice(rows_that_fit, batch.num_rows() - rows_that_fit); - self.write(&a)?; - return self.write(&b); - } else { - self.flush()?; - return self.write(batch); - } + if let Some(avg_row_bytes) = current_bytes + .checked_div(in_progress.buffered_rows) + .filter(|avg_row_bytes| *avg_row_bytes > 0) + { + // At this point, `current_bytes < max_bytes` (checked above) + let remaining_bytes = max_bytes - current_bytes; + let rows_that_fit = remaining_bytes.checked_div(avg_row_bytes).unwrap_or(0); + + if batch.num_rows() > rows_that_fit { + if rows_that_fit > 0 { + let a = batch.slice(0, rows_that_fit); + let b = batch.slice(rows_that_fit, batch.num_rows() - rows_that_fit); + self.write(&a)?; + return self.write(&b); + } else { + self.flush()?; + return self.write(batch); } } } diff --git a/parquet/src/arrow/async_reader/store.rs b/parquet/src/arrow/async_reader/store.rs index d47ca744d8f6..a23840368b1e 100644 --- a/parquet/src/arrow/async_reader/store.rs +++ b/parquet/src/arrow/async_reader/store.rs @@ -225,14 +225,13 @@ impl AsyncFileReader for ParquetObjectReader { // When page_index_policy is Skip (default), use the reader's preload flags. // When page_index_policy is Optional or Required, override the preload flags // to ensure the specified policy takes precedence. - if let Some(options) = options { - if options.column_index_policy() != PageIndexPolicy::Skip - || options.offset_index_policy() != PageIndexPolicy::Skip - { - metadata = metadata - .with_column_index_policy(options.column_index_policy()) - .with_offset_index_policy(options.offset_index_policy()); - } + if let Some(options) = options + && (options.column_index_policy() != PageIndexPolicy::Skip + || options.offset_index_policy() != PageIndexPolicy::Skip) + { + metadata = metadata + .with_column_index_policy(options.column_index_policy()) + .with_offset_index_policy(options.offset_index_policy()); } let metadata = if let Some(file_size) = self.file_size { diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs index ab67694c2977..1f940f7cfd29 100644 --- a/parquet/src/arrow/buffer/offset_buffer.rs +++ b/parquet/src/arrow/buffer/offset_buffer.rs @@ -62,15 +62,13 @@ impl OffsetBuffer { /// UTF-8. This should be done by calling [`Self::check_valid_utf8`] after /// all data has been written pub fn try_push(&mut self, data: &[u8], validate_utf8: bool) -> Result<()> { - if validate_utf8 { - if let Some(&b) = data.first() { - // A valid code-point iff it does not start with 0b10xxxxxx - // Bit-magic taken from `std::str::is_char_boundary` - if (b as i8) < -0x40 { - return Err(ParquetError::General( - "encountered non UTF-8 data".to_string(), - )); - } + if validate_utf8 && let Some(&b) = data.first() { + // A valid code-point iff it does not start with 0b10xxxxxx + // Bit-magic taken from `std::str::is_char_boundary` + if (b as i8) < -0x40 { + return Err(ParquetError::General( + "encountered non UTF-8 data".to_string(), + )); } } diff --git a/parquet/src/arrow/push_decoder/reader_builder/mod.rs b/parquet/src/arrow/push_decoder/reader_builder/mod.rs index dacf1a2caad9..23cddd389abe 100644 --- a/parquet/src/arrow/push_decoder/reader_builder/mod.rs +++ b/parquet/src/arrow/push_decoder/reader_builder/mod.rs @@ -158,10 +158,10 @@ impl RowBudget { *offset = offset.saturating_sub(rows_before_budget - rows_after_budget); } - if rows_after_budget != 0 { - if let Some(limit) = &mut self.limit { - *limit -= rows_after_budget; - } + if rows_after_budget != 0 + && let Some(limit) = &mut self.limit + { + *limit -= rows_after_budget; } self diff --git a/parquet/src/column/chunker/cdc.rs b/parquet/src/column/chunker/cdc.rs index b40dd74a8d83..3f5b6afeceb0 100644 --- a/parquet/src/column/chunker/cdc.rs +++ b/parquet/src/column/chunker/cdc.rs @@ -811,17 +811,17 @@ mod arrow_tests { let arr: BooleanArray = (0..length) .map(|i| { let val = test_hash(seed, i as u64); - if val % 10 == 0 { + if val.is_multiple_of(10) { None } else { - Some(val % 2 == 0) + Some(val.is_multiple_of(2)) } }) .collect(); Arc::new(arr) } else { let arr: BooleanArray = (0..length) - .map(|i| Some(test_hash(seed, i as u64) % 2 == 0)) + .map(|i| Some(test_hash(seed, i as u64).is_multiple_of(2))) .collect(); Arc::new(arr) } @@ -839,7 +839,7 @@ mod arrow_tests { (0..length) .map(|i| { let val = test_hash(seed, i as u64); - if val % 10 == 0 { + if val.is_multiple_of(10) { None } else { Some(format!("str_{val}")) @@ -858,7 +858,7 @@ mod arrow_tests { (0..length) .map(|i| { let val = test_hash(seed, i as u64); - if val % 10 == 0 { + if val.is_multiple_of(10) { None } else { Some(format!("bin_{val}").into_bytes()) @@ -877,7 +877,7 @@ mod arrow_tests { let mut builder = arrow_array::builder::FixedSizeBinaryBuilder::new(size); for i in 0..length { let val = test_hash(seed, i as u64); - if nullable && val % 10 == 0 { + if nullable && val.is_multiple_of(10) { builder.append_null(); } else { let s = format!("bin_{val}"); diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs index 81960abbe496..0bf5dd822dbf 100644 --- a/parquet/src/column/reader.rs +++ b/parquet/src/column/reader.rs @@ -333,12 +333,12 @@ where .then_some(metadata.num_levels)? }); - if let Some(rows) = rows { - if rows <= remaining_records { - self.page_reader.skip_next_page()?; - remaining_records -= rows; - continue; - } + if let Some(rows) = rows + && rows <= remaining_records + { + self.page_reader.skip_next_page()?; + remaining_records -= rows; + continue; } // because self.num_buffered_values == self.num_decoded_values means // we need reads a new page and set up the decoders for levels diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs index 0fb960412295..1e117799adcf 100644 --- a/parquet/src/encodings/decoding.rs +++ b/parquet/src/encodings/decoding.rs @@ -703,14 +703,14 @@ where self.first_value = Some(T::T::from_i64(first_value).ok_or_else(|| general_err!("first value too large"))?); - if self.block_size % 128 != 0 { + if !self.block_size.is_multiple_of(128) { return Err(general_err!( "'block_size' must be a multiple of 128, got {}", self.block_size )); } - if self.block_size % self.mini_blocks_per_block != 0 { + if !self.block_size.is_multiple_of(self.mini_blocks_per_block) { return Err(general_err!( "'block_size' must be a multiple of 'mini_blocks_per_block' got {} and {}", self.block_size, @@ -724,7 +724,7 @@ where self.mini_block_remaining = 0; self.mini_block_bit_widths.clear(); - if self.values_per_mini_block % 32 != 0 { + if !self.values_per_mini_block.is_multiple_of(32) { return Err(general_err!( "'values_per_mini_block' must be a multiple of 32 got {}", self.values_per_mini_block diff --git a/parquet/src/encodings/decoding/byte_stream_split_decoder.rs b/parquet/src/encodings/decoding/byte_stream_split_decoder.rs index b72ae8f62c34..2e26e4d3aae6 100644 --- a/parquet/src/encodings/decoding/byte_stream_split_decoder.rs +++ b/parquet/src/encodings/decoding/byte_stream_split_decoder.rs @@ -160,7 +160,7 @@ impl VariableWidthByteStreamSplitDecoder { impl Decoder for VariableWidthByteStreamSplitDecoder { fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> { // Rough check that all data elements are the same length - if data.len() % self.type_width != 0 { + if !data.len().is_multiple_of(self.type_width) { return Err(general_err!( "Input data length is not a multiple of type width {}", self.type_width diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 844ae747c7c9..d695610f20e2 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -569,10 +569,10 @@ impl ParquetMetaDataReader { /// file footer (8 bytes). Otherwise returns `8`. #[cfg(all(feature = "async", feature = "arrow"))] fn get_prefetch_size(&self) -> usize { - if let Some(prefetch) = self.prefetch_hint { - if prefetch > FOOTER_SIZE { - return prefetch; - } + if let Some(prefetch) = self.prefetch_hint + && prefetch > FOOTER_SIZE + { + return prefetch; } FOOTER_SIZE } diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs index d5a0112a5e1a..65ad0f51d99b 100644 --- a/parquet/src/file/metadata/thrift/mod.rs +++ b/parquet/src/file/metadata/thrift/mod.rs @@ -223,15 +223,15 @@ fn convert_stats( }; fn check_len(min: &Option<&[u8]>, max: &Option<&[u8]>, len: usize) -> Result<()> { - if let Some(min) = min { - if min.len() < len { - return Err(general_err!("Insufficient bytes to parse min statistic",)); - } + if let Some(min) = min + && min.len() < len + { + return Err(general_err!("Insufficient bytes to parse min statistic",)); } - if let Some(max) = max { - if max.len() < len { - return Err(general_err!("Insufficient bytes to parse max statistic",)); - } + if let Some(max) = max + && max.len() < len + { + return Err(general_err!("Insufficient bytes to parse max statistic",)); } Ok(()) } diff --git a/parquet/src/file/page_index/column_index.rs b/parquet/src/file/page_index/column_index.rs index 2f90b3d8e565..842e9cde9872 100644 --- a/parquet/src/file/page_index/column_index.rs +++ b/parquet/src/file/page_index/column_index.rs @@ -113,27 +113,29 @@ impl PrimitiveColumnIndex { max_bytes.len() ))); } - if let Some(ref nc) = null_counts { - if nc.len() != len { - return Err(ParquetError::General(format!( - "ColumnIndex null_counts length mismatch: expected {len}, got {}", - nc.len() - ))); - } + if let Some(ref nc) = null_counts + && nc.len() != len + { + return Err(ParquetError::General(format!( + "ColumnIndex null_counts length mismatch: expected {len}, got {}", + nc.len() + ))); } - if let Some(ref rep) = repetition_level_histograms { - if len != 0 && rep.len() % len != 0 { - return Err(ParquetError::General( - "Invalid repetition_level_histograms length".to_string(), - )); - } + if let Some(ref rep) = repetition_level_histograms + && len != 0 + && rep.len() % len != 0 + { + return Err(ParquetError::General( + "Invalid repetition_level_histograms length".to_string(), + )); } - if let Some(ref def) = definition_level_histograms { - if len != 0 && def.len() % len != 0 { - return Err(ParquetError::General( - "Invalid definition_level_histograms length".to_string(), - )); - } + if let Some(ref def) = definition_level_histograms + && len != 0 + && def.len() % len != 0 + { + return Err(ParquetError::General( + "Invalid definition_level_histograms length".to_string(), + )); } let mut min_values = Vec::with_capacity(len); @@ -322,27 +324,29 @@ impl ByteArrayColumnIndex { max_values.len() ))); } - if let Some(ref nc) = null_counts { - if nc.len() != len { - return Err(ParquetError::General(format!( - "ColumnIndex null_counts length mismatch: expected {len}, got {}", - nc.len() - ))); - } - } - if let Some(ref rep) = repetition_level_histograms { - if len != 0 && rep.len() % len != 0 { - return Err(ParquetError::General( - "Invalid repetition_level_histograms length".to_string(), - )); - } + if let Some(ref nc) = null_counts + && nc.len() != len + { + return Err(ParquetError::General(format!( + "ColumnIndex null_counts length mismatch: expected {len}, got {}", + nc.len() + ))); } - if let Some(ref def) = definition_level_histograms { - if len != 0 && def.len() % len != 0 { - return Err(ParquetError::General( - "Invalid definition_level_histograms length".to_string(), - )); - } + if let Some(ref rep) = repetition_level_histograms + && len != 0 + && rep.len() % len != 0 + { + return Err(ParquetError::General( + "Invalid repetition_level_histograms length".to_string(), + )); + } + if let Some(ref def) = definition_level_histograms + && len != 0 + && def.len() % len != 0 + { + return Err(ParquetError::General( + "Invalid definition_level_histograms length".to_string(), + )); } let min_len = min_values.iter().map(|&v| v.len()).sum(); diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index 19191f601846..42b0e124a5dc 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -1755,10 +1755,10 @@ impl ColumnProperties { /// If bloom filter is enabled and NDV was not explicitly set, resolve it to the /// given `default_ndv` (typically derived from `max_row_group_row_count`). fn resolve_bloom_filter_ndv(&mut self, default_ndv: u64) { - if !self.bloom_filter_ndv_is_set { - if let Some(ref mut bf) = self.bloom_filter_properties { - bf.ndv = default_ndv; - } + if !self.bloom_filter_ndv_is_set + && let Some(ref mut bf) = self.bloom_filter_properties + { + bf.ndv = default_ndv; } } } diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 113e5203c94b..7c9b33132e69 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -283,10 +283,10 @@ impl SerializedFileReader { fn get_midpoint_offset(meta: &RowGroupMetaData) -> i64 { let col = meta.column(0); let mut offset = col.data_page_offset(); - if let Some(dic_offset) = col.dictionary_page_offset() { - if offset > dic_offset { - offset = dic_offset - } + if let Some(dic_offset) = col.dictionary_page_offset() + && offset > dic_offset + { + offset = dic_offset }; offset + meta.compressed_size() / 2 } diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 9682fd54b8df..20c7c7a888db 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -155,19 +155,19 @@ pub(crate) fn from_thrift_page_stats( }; fn check_len(min: &Option>, max: &Option>, len: usize) -> Result<()> { - if let Some(min) = min { - if min.len() < len { - return Err(ParquetError::General( - "Insufficient bytes to parse min statistic".to_string(), - )); - } + if let Some(min) = min + && min.len() < len + { + return Err(ParquetError::General( + "Insufficient bytes to parse min statistic".to_string(), + )); } - if let Some(max) = max { - if max.len() < len { - return Err(ParquetError::General( - "Insufficient bytes to parse max statistic".to_string(), - )); - } + if let Some(max) = max + && max.len() < len + { + return Err(ParquetError::General( + "Insufficient bytes to parse max statistic".to_string(), + )); } Ok(()) } diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 8ec16ba36739..45eae1dd12a3 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -1028,10 +1028,10 @@ impl PageWriter for SerializedPageWriter<'_, W> { spec.bytes_written = self.sink.bytes_written() as u64 - start_pos; spec.num_values = page.num_values(); - if let Some(page_encryptor) = self.page_encryptor_mut() { - if page.compressed_page().is_data_page() { - page_encryptor.increment_page(); - } + if let Some(page_encryptor) = self.page_encryptor_mut() + && page.compressed_page().is_data_page() + { + page_encryptor.increment_page(); } Ok(spec) } diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 1f9b8590fcf6..84fe17a70d1a 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -1279,12 +1279,15 @@ fn build_tree<'a>( /// Checks if the logical type is valid. fn check_logical_type(logical_type: &Option) -> Result<()> { - if let Some(LogicalType::Integer(IntType { bit_width, .. })) = logical_type { - if *bit_width != 8 && *bit_width != 16 && *bit_width != 32 && *bit_width != 64 { - return Err(general_err!( - "Bit width must be 8, 16, 32, or 64 for Integer logical type" - )); - } + if let Some(LogicalType::Integer(IntType { bit_width, .. })) = logical_type + && *bit_width != 8 + && *bit_width != 16 + && *bit_width != 32 + && *bit_width != 64 + { + return Err(general_err!( + "Bit width must be 8, 16, 32, or 64 for Integer logical type" + )); } Ok(()) } diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index d5f5945a12a9..b8ee65967879 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -1371,7 +1371,7 @@ mod tests { StandardUniform: Distribution, { assert!(num_bits <= 32); - assert!(total % 2 == 0); + assert!(total.is_multiple_of(2)); let aligned_value_byte_width = std::mem::size_of::(); let value_byte_width = ceil(num_bits, 8); diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index e332ea21aa4a..7501700868b8 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -617,17 +617,17 @@ impl Type { match leaf_type { Type::Array(first_type, _length) => { - if let Type::TypePath(_) = **first_type { - if last_part == "u8" { - return BasicType::FIXED_LEN_BYTE_ARRAY; - } + if let Type::TypePath(_) = **first_type + && last_part == "u8" + { + return BasicType::FIXED_LEN_BYTE_ARRAY; } } Type::Vec(first_type) | Type::Slice(first_type) => { - if let Type::TypePath(_) = **first_type { - if last_part == "u8" { - return BasicType::BYTE_ARRAY; - } + if let Type::TypePath(_) = **first_type + && last_part == "u8" + { + return BasicType::BYTE_ARRAY; } } _ => (), @@ -658,12 +658,11 @@ impl Type { let leaf_type = self.leaf_type_recursive(); // `[u8; N]` => Some(N) - if let Type::Array(first_type, length) = leaf_type { - if let Type::TypePath(_) = **first_type { - if last_part == "u8" { - return Some(length.clone()); - } - } + if let Type::Array(first_type, length) = leaf_type + && let Type::TypePath(_) = **first_type + && last_part == "u8" + { + return Some(length.clone()); } match last_part.trim() { @@ -679,17 +678,17 @@ impl Type { match leaf_type { Type::Array(first_type, _length) => { - if let Type::TypePath(_) = **first_type { - if last_part == "u8" { - return quote! { None }; - } + if let Type::TypePath(_) = **first_type + && last_part == "u8" + { + return quote! { None }; } } Type::Vec(first_type) | Type::Slice(first_type) => { - if let Type::TypePath(_) = **first_type { - if last_part == "u8" { - return quote! { None }; - } + if let Type::TypePath(_) = **first_type + && last_part == "u8" + { + return quote! { None }; } } _ => (),