Skip to content

Commit 7615c87

Browse files
committed
Add mapping
1 parent 20a138e commit 7615c87

2 files changed

Lines changed: 10 additions & 7 deletions

File tree

pyiceberg/avro/codecs/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,6 @@
4747
"zstandard": ZStandardCodec,
4848
"deflate": DeflateCodec,
4949
}
50+
51+
# Map to convert the naming from Iceberg to Avro
52+
CODEC_MAPPING_ICEBERG_TO_AVRO: Dict[str, str] = {"gzip": "deflate", "zstd": "zstandard"}

pyiceberg/avro/file.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
TypeVar,
3636
)
3737

38-
from pyiceberg.avro.codecs import AVRO_CODEC_KEY, KNOWN_CODECS
38+
from pyiceberg.avro.codecs import AVRO_CODEC_KEY, CODEC_MAPPING_ICEBERG_TO_AVRO, KNOWN_CODECS
3939
from pyiceberg.avro.codecs.codec import Codec
4040
from pyiceberg.avro.decoder import BinaryDecoder, new_decoder
4141
from pyiceberg.avro.encoder import BinaryEncoder
@@ -279,13 +279,13 @@ def __exit__(
279279
def _write_header(self) -> None:
280280
from pyiceberg.table import TableProperties
281281

282-
codec = self.metadata.get(AVRO_CODEC_KEY, TableProperties.WRITE_AVRO_COMPRESSION_DEFAULT)
283-
if codec == "gzip":
284-
codec = "deflate"
282+
codec_name = self.metadata.get(AVRO_CODEC_KEY, TableProperties.WRITE_AVRO_COMPRESSION_DEFAULT)
283+
if avro_codec_name := CODEC_MAPPING_ICEBERG_TO_AVRO.get(codec_name):
284+
codec_name = avro_codec_name
285285

286286
json_schema = json.dumps(AvroSchemaConversion().iceberg_to_avro(self.file_schema, schema_name=self.schema_name))
287287

288-
meta = {**self.metadata, _SCHEMA_KEY: json_schema, AVRO_CODEC_KEY: codec}
288+
meta = {**self.metadata, _SCHEMA_KEY: json_schema, AVRO_CODEC_KEY: codec_name}
289289
header = AvroFileHeader(MAGIC, meta, self.sync_bytes)
290290
construct_writer(META_SCHEMA).write(self.encoder, header)
291291

@@ -299,8 +299,8 @@ def compression_codec(self) -> Optional[Type[Codec]]:
299299

300300
codec_name = self.metadata.get(AVRO_CODEC_KEY, TableProperties.WRITE_AVRO_COMPRESSION_DEFAULT)
301301

302-
if codec_name == "gzip":
303-
codec_name = "deflate"
302+
if avro_codec_name := CODEC_MAPPING_ICEBERG_TO_AVRO.get(codec_name):
303+
codec_name = avro_codec_name
304304

305305
if codec_name not in KNOWN_CODECS:
306306
raise ValueError(f"Unsupported codec: {codec_name}")

0 commit comments

Comments
 (0)