From e8110ec460fa0c1ea5dd00bfdd086e20b0719e83 Mon Sep 17 00:00:00 2001 From: Richard Date: Wed, 1 Jul 2026 18:21:33 -0400 Subject: [PATCH 1/2] create dictionary reader config and default unsafeflag to false --- arrow-flight/src/decode.rs | 3 +- .../integration_test.rs | 2 +- .../integration_test.rs | 2 +- arrow-ipc/src/reader.rs | 32 ++++++++++++++++--- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs index 1f64924932d4..cc93ad7ab008 100644 --- a/arrow-flight/src/decode.rs +++ b/arrow-flight/src/decode.rs @@ -313,7 +313,8 @@ impl FlightDataDecoder { dictionary_batch, &state.schema, &mut state.dictionaries_by_field, - &message.version(), + arrow_ipc::reader::DictionaryConfig::new(message.version()) + .with_skip_validation(self.skip_validation.clone()), ) .map_err(|e| { FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}")) diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs index 05ca5627ecd8..dbd83902de08 100644 --- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs @@ -308,7 +308,7 @@ async fn receive_batch_flight_data( .expect("Error parsing dictionary"), &schema, dictionaries_by_id, - &message.version(), + reader::DictionaryConfig::new(message.version()), ) .expect("Error reading dictionary"); diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs index ae316886381a..975420479ea4 100644 --- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs @@ -356,7 +356,7 @@ async fn dictionary_from_message( ipc_batch, &schema_ref, dictionaries_by_id, - &message.version(), + reader::DictionaryConfig::new(message.version()), ); dictionary_batch_result .map_err(|e| Status::internal(format!("Could not convert to Dictionary: {e:?}"))) diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index e6f45c4cd075..d89eb1fdc0b2 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -778,23 +778,47 @@ pub fn read_record_batch( .read_record_batch() } -/// Read the dictionary from the buffer and provided metadata, +/// Configuration for [`read_dictionary`] +pub struct DictionaryConfig { + /// The IPC metadata version + pub metadata: MetadataVersion, + /// Whether to skip validation of the dictionary data + pub skip_validation: UnsafeFlag, +} + +impl DictionaryConfig { + /// Create a new `DictionaryConfig` with the given metadata version and validation enabled + pub fn new(metadata: MetadataVersion) -> Self { + Self { + metadata, + skip_validation: UnsafeFlag::new(), + } + } + + /// Set the skip validation flag + pub fn with_skip_validation(mut self, skip_validation: UnsafeFlag) -> Self { + self.skip_validation = skip_validation; + self + } +} + +/// Read the dictionary from the buffer and provided config, /// updating the `dictionaries_by_id` with the resulting dictionary pub fn read_dictionary( buf: &Buffer, batch: crate::DictionaryBatch, schema: &Schema, dictionaries_by_id: &mut HashMap, - metadata: &MetadataVersion, + config: DictionaryConfig, ) -> Result<(), ArrowError> { read_dictionary_impl( buf, batch, schema, dictionaries_by_id, - metadata, + &config.metadata, false, - UnsafeFlag::new(), + config.skip_validation, ) } From 82fd3b896620c9a418dd0509e35f46346cfbe7d0 Mon Sep 17 00:00:00 2001 From: Richard Date: Wed, 1 Jul 2026 18:36:11 -0400 Subject: [PATCH 2/2] make method public approach --- arrow-flight/src/decode.rs | 7 ++-- .../integration_test.rs | 2 +- .../integration_test.rs | 2 +- arrow-ipc/src/reader.rs | 35 ++++--------------- 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs index cc93ad7ab008..3460bf7080e9 100644 --- a/arrow-flight/src/decode.rs +++ b/arrow-flight/src/decode.rs @@ -308,13 +308,14 @@ impl FlightDataDecoder { ) })?; - arrow_ipc::reader::read_dictionary( + arrow_ipc::reader::read_dictionary_impl( &buffer, dictionary_batch, &state.schema, &mut state.dictionaries_by_field, - arrow_ipc::reader::DictionaryConfig::new(message.version()) - .with_skip_validation(self.skip_validation.clone()), + &message.version(), + false, + self.skip_validation.clone(), ) .map_err(|e| { FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}")) diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs index dbd83902de08..05ca5627ecd8 100644 --- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs @@ -308,7 +308,7 @@ async fn receive_batch_flight_data( .expect("Error parsing dictionary"), &schema, dictionaries_by_id, - reader::DictionaryConfig::new(message.version()), + &message.version(), ) .expect("Error reading dictionary"); diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs index 975420479ea4..ae316886381a 100644 --- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs +++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs @@ -356,7 +356,7 @@ async fn dictionary_from_message( ipc_batch, &schema_ref, dictionaries_by_id, - reader::DictionaryConfig::new(message.version()), + &message.version(), ); dictionary_batch_result .map_err(|e| Status::internal(format!("Could not convert to Dictionary: {e:?}"))) diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs index d89eb1fdc0b2..dda655504658 100644 --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -778,51 +778,28 @@ pub fn read_record_batch( .read_record_batch() } -/// Configuration for [`read_dictionary`] -pub struct DictionaryConfig { - /// The IPC metadata version - pub metadata: MetadataVersion, - /// Whether to skip validation of the dictionary data - pub skip_validation: UnsafeFlag, -} - -impl DictionaryConfig { - /// Create a new `DictionaryConfig` with the given metadata version and validation enabled - pub fn new(metadata: MetadataVersion) -> Self { - Self { - metadata, - skip_validation: UnsafeFlag::new(), - } - } - - /// Set the skip validation flag - pub fn with_skip_validation(mut self, skip_validation: UnsafeFlag) -> Self { - self.skip_validation = skip_validation; - self - } -} - -/// Read the dictionary from the buffer and provided config, +/// Read the dictionary from the buffer and provided metadata, /// updating the `dictionaries_by_id` with the resulting dictionary pub fn read_dictionary( buf: &Buffer, batch: crate::DictionaryBatch, schema: &Schema, dictionaries_by_id: &mut HashMap, - config: DictionaryConfig, + metadata: &MetadataVersion, ) -> Result<(), ArrowError> { read_dictionary_impl( buf, batch, schema, dictionaries_by_id, - &config.metadata, + metadata, false, - config.skip_validation, + UnsafeFlag::new(), ) } -fn read_dictionary_impl( +/// Low-level version of [`read_dictionary`] with alignment and validation controls +pub fn read_dictionary_impl( buf: &Buffer, batch: crate::DictionaryBatch, schema: &Schema,