8181 expression_to_pyarrow ,
8282 parquet_path_to_id_mapping ,
8383 schema_to_pyarrow ,
84+ write_file ,
8485)
8586from pyiceberg .manifest import DataFile , DataFileContent , FileFormat
8687from pyiceberg .partitioning import PartitionField , PartitionSpec
@@ -2744,7 +2745,10 @@ def test__to_requested_schema_timestamptz_to_timestamp_projection() -> None:
27442745 # table schema expects timestamp without timezone
27452746 table_schema = Schema (NestedField (1 , "ts_field" , TimestampType (), required = False ))
27462747
2747- actual_result = _to_requested_schema (table_schema , file_schema , batch , downcast_ns_timestamp_to_us = True )
2748+ # allow_timestamp_tz_mismatch=True enables reading timestamptz as timestamp
2749+ actual_result = _to_requested_schema (
2750+ table_schema , file_schema , batch , downcast_ns_timestamp_to_us = True , allow_timestamp_tz_mismatch = True
2751+ )
27482752 expected = pa .record_batch (
27492753 [
27502754 pa .array (
@@ -2762,6 +2766,66 @@ def test__to_requested_schema_timestamptz_to_timestamp_projection() -> None:
27622766 assert expected .equals (actual_result )
27632767
27642768
2769+ def test__to_requested_schema_timestamptz_to_timestamp_write_rejects () -> None :
2770+ """Test that the write path (default) rejects timestamptz to timestamp casting.
2771+
2772+ This ensures we enforce the Iceberg spec distinction between timestamp and timestamptz on writes,
2773+ while the read path can be more permissive (like Spark) via allow_timestamp_tz_mismatch=True.
2774+ """
2775+ # file is written with timestamp with timezone
2776+ file_schema = Schema (NestedField (1 , "ts_field" , TimestamptzType (), required = False ))
2777+ batch = pa .record_batch (
2778+ [
2779+ pa .array (
2780+ [
2781+ datetime (2025 , 8 , 14 , 12 , 0 , 0 , tzinfo = timezone .utc ),
2782+ datetime (2025 , 8 , 14 , 13 , 0 , 0 , tzinfo = timezone .utc ),
2783+ ],
2784+ type = pa .timestamp ("us" , tz = "UTC" ),
2785+ )
2786+ ],
2787+ names = ["ts_field" ],
2788+ )
2789+
2790+ # table schema expects timestamp without timezone
2791+ table_schema = Schema (NestedField (1 , "ts_field" , TimestampType (), required = False ))
2792+
2793+ # allow_timestamp_tz_mismatch=False (default, used in write path) should raise
2794+ with pytest .raises (ValueError , match = "Unsupported schema projection" ):
2795+ _to_requested_schema (
2796+ table_schema , file_schema , batch , downcast_ns_timestamp_to_us = True , allow_timestamp_tz_mismatch = False
2797+ )
2798+
2799+
2800+ def test_write_file_rejects_timestamptz_to_timestamp (tmp_path : Path ) -> None :
2801+ """Test that write_file rejects writing timestamptz data to a timestamp column."""
2802+ from pyiceberg .table import WriteTask
2803+
2804+ # Table expects timestamp (no tz), but data has timestamptz
2805+ table_schema = Schema (NestedField (1 , "ts_field" , TimestampType (), required = False ))
2806+ task_schema = Schema (NestedField (1 , "ts_field" , TimestamptzType (), required = False ))
2807+
2808+ arrow_data = pa .table ({"ts_field" : [datetime (2025 , 8 , 14 , 12 , 0 , 0 , tzinfo = timezone .utc )]})
2809+
2810+ table_metadata = TableMetadataV2 (
2811+ location = f"file://{ tmp_path } " ,
2812+ last_column_id = 1 ,
2813+ format_version = 2 ,
2814+ schemas = [table_schema ],
2815+ partition_specs = [PartitionSpec ()],
2816+ )
2817+
2818+ task = WriteTask (
2819+ write_uuid = uuid .uuid4 (),
2820+ task_id = 0 ,
2821+ record_batches = arrow_data .to_batches (),
2822+ schema = task_schema ,
2823+ )
2824+
2825+ with pytest .raises (ValueError , match = "Unsupported schema projection" ):
2826+ list (write_file (io = PyArrowFileIO (), table_metadata = table_metadata , tasks = iter ([task ])))
2827+
2828+
27652829def test__to_requested_schema_timestamps (
27662830 arrow_table_schema_with_all_timestamp_precisions : pa .Schema ,
27672831 arrow_table_with_all_timestamp_precisions : pa .Table ,
0 commit comments