Skip to content

Commit 0763831

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-add-deletion-vectors
2 parents f0254d3 + 278f764 commit 0763831

32 files changed

Lines changed: 1383 additions & 495 deletions

.github/dependabot.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ updates:
2222
- package-ecosystem: "pip"
2323
directory: "/"
2424
schedule:
25-
interval: "daily"
25+
interval: "weekly"
2626
open-pull-requests-limit: 50
2727
- package-ecosystem: "github-actions"
2828
directory: "/"
2929
schedule:
30-
interval: "daily"
30+
interval: "weekly"

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v2.23.1
65+
uses: pypa/cibuildwheel@v2.23.2
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v2.23.1
60+
uses: pypa/cibuildwheel@v2.23.2
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

dev/provision.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@
339339
CREATE TABLE {catalog_name}.default.test_table_empty_list_and_map (
340340
col_list array<int>,
341341
col_map map<int, int>,
342+
col_struct struct<test:int>,
342343
col_list_with_struct array<struct<test:int>>
343344
)
344345
USING iceberg
@@ -351,8 +352,8 @@
351352
spark.sql(
352353
f"""
353354
INSERT INTO {catalog_name}.default.test_table_empty_list_and_map
354-
VALUES (null, null, null),
355-
(array(), map(), array(struct(1)))
355+
VALUES (null, null, null, null),
356+
(array(), map(), struct(1), array(struct(1)))
356357
"""
357358
)
358359

poetry.lock

Lines changed: 399 additions & 387 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/avro/reader.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ class TimestampReader(IntegerReader):
175175
"""
176176

177177

178+
class TimestampNanoReader(IntegerReader):
179+
"""Reads a nanosecond granularity timestamp from the stream.
180+
181+
Long is decoded as python integer which represents
182+
the number of nanoseconds from the unix epoch, 1 January 1970.
183+
"""
184+
185+
178186
class TimestamptzReader(IntegerReader):
179187
"""Reads a microsecond granularity timestamptz from the stream.
180188
@@ -185,6 +193,16 @@ class TimestamptzReader(IntegerReader):
185193
"""
186194

187195

196+
class TimestamptzNanoReader(IntegerReader):
197+
"""Reads a microsecond granularity timestamptz from the stream.
198+
199+
Long is decoded as python integer which represents
200+
the number of nanoseconds from the unix epoch, 1 January 1970.
201+
202+
Adjusted to UTC.
203+
"""
204+
205+
188206
class StringReader(Reader):
189207
def read(self, decoder: BinaryDecoder) -> str:
190208
return decoder.read_utf8()

pyiceberg/avro/resolver.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
StringReader,
4545
StructReader,
4646
TimeReader,
47+
TimestampNanoReader,
4748
TimestampReader,
49+
TimestamptzNanoReader,
4850
TimestamptzReader,
4951
UnknownReader,
5052
UUIDReader,
@@ -64,6 +66,8 @@
6466
OptionWriter,
6567
StringWriter,
6668
StructWriter,
69+
TimestampNanoWriter,
70+
TimestamptzNanoWriter,
6771
TimestamptzWriter,
6872
TimestampWriter,
6973
TimeWriter,
@@ -99,7 +103,9 @@
99103
PrimitiveType,
100104
StringType,
101105
StructType,
106+
TimestampNanoType,
102107
TimestampType,
108+
TimestamptzNanoType,
103109
TimestamptzType,
104110
TimeType,
105111
UnknownType,
@@ -184,9 +190,15 @@ def visit_time(self, time_type: TimeType) -> Writer:
184190
def visit_timestamp(self, timestamp_type: TimestampType) -> Writer:
185191
return TimestampWriter()
186192

193+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType) -> Writer:
194+
return TimestampNanoWriter()
195+
187196
def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> Writer:
188197
return TimestamptzWriter()
189198

199+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType) -> Writer:
200+
return TimestamptzNanoWriter()
201+
190202
def visit_string(self, string_type: StringType) -> Writer:
191203
return StringWriter()
192204

@@ -332,9 +344,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Wri
332344
def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Writer:
333345
return TimestampWriter()
334346

347+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Writer:
348+
return TimestampNanoWriter()
349+
335350
def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Writer:
336351
return TimestamptzWriter()
337352

353+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Writer:
354+
return TimestamptzNanoWriter()
355+
338356
def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Writer:
339357
return StringWriter()
340358

@@ -465,9 +483,15 @@ def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Rea
465483
def visit_timestamp(self, timestamp_type: TimestampType, partner: Optional[IcebergType]) -> Reader:
466484
return TimestampReader()
467485

486+
def visit_timestamp_ns(self, timestamp_ns_type: TimestampNanoType, partner: Optional[IcebergType]) -> Reader:
487+
return TimestampNanoReader()
488+
468489
def visit_timestamptz(self, timestamptz_type: TimestamptzType, partner: Optional[IcebergType]) -> Reader:
469490
return TimestamptzReader()
470491

492+
def visit_timestamptz_ns(self, timestamptz_ns_type: TimestamptzNanoType, partner: Optional[IcebergType]) -> Reader:
493+
return TimestamptzNanoReader()
494+
471495
def visit_string(self, string_type: StringType, partner: Optional[IcebergType]) -> Reader:
472496
return StringReader()
473497

pyiceberg/avro/writer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,24 @@ def write(self, encoder: BinaryEncoder, val: int) -> None:
9595
encoder.write_int(val)
9696

9797

98+
@dataclass(frozen=True)
99+
class TimestampNanoWriter(Writer):
100+
def write(self, encoder: BinaryEncoder, val: int) -> None:
101+
encoder.write_int(val)
102+
103+
98104
@dataclass(frozen=True)
99105
class TimestamptzWriter(Writer):
100106
def write(self, encoder: BinaryEncoder, val: int) -> None:
101107
encoder.write_int(val)
102108

103109

110+
@dataclass(frozen=True)
111+
class TimestamptzNanoWriter(Writer):
112+
def write(self, encoder: BinaryEncoder, val: int) -> None:
113+
encoder.write_int(val)
114+
115+
104116
@dataclass(frozen=True)
105117
class StringWriter(Writer):
106118
def write(self, encoder: BinaryEncoder, val: Any) -> None:

pyiceberg/catalog/rest.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def _fetch_access_token(self, session: Session, credential: str) -> str:
370370
except HTTPError as exc:
371371
self._handle_non_200_response(exc, {400: OAuthError, 401: OAuthError})
372372

373-
return TokenResponse(**response.json()).access_token
373+
return TokenResponse.model_validate_json(response.text).access_token
374374

375375
def _fetch_config(self) -> None:
376376
params = {}
@@ -383,7 +383,7 @@ def _fetch_config(self) -> None:
383383
response.raise_for_status()
384384
except HTTPError as exc:
385385
self._handle_non_200_response(exc, {})
386-
config_response = ConfigResponse(**response.json())
386+
config_response = ConfigResponse.model_validate_json(response.text)
387387

388388
config = config_response.defaults
389389
config.update(self.properties)
@@ -443,14 +443,14 @@ def _handle_non_200_response(self, exc: HTTPError, error_handler: Dict[int, Type
443443
try:
444444
if exception == OAuthError:
445445
# The OAuthErrorResponse has a different format
446-
error = OAuthErrorResponse(**exc.response.json())
446+
error = OAuthErrorResponse.model_validate_json(exc.response.text)
447447
response = str(error.error)
448448
if description := error.error_description:
449449
response += f": {description}"
450450
if uri := error.error_uri:
451451
response += f" ({uri})"
452452
else:
453-
error = ErrorResponse(**exc.response.json()).error
453+
error = ErrorResponse.model_validate_json(exc.response.text).error
454454
response = f"{error.type}: {error.message}"
455455
except JSONDecodeError:
456456
# In the case we don't have a proper response
@@ -588,7 +588,7 @@ def _create_table(
588588
response.raise_for_status()
589589
except HTTPError as exc:
590590
self._handle_non_200_response(exc, {409: TableAlreadyExistsError})
591-
return TableResponse(**response.json())
591+
return TableResponse.model_validate_json(response.text)
592592

593593
@retry(**_RETRY_ARGS)
594594
def create_table(
@@ -662,7 +662,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
662662
except HTTPError as exc:
663663
self._handle_non_200_response(exc, {409: TableAlreadyExistsError})
664664

665-
table_response = TableResponse(**response.json())
665+
table_response = TableResponse.model_validate_json(response.text)
666666
return self._response_to_table(self.identifier_to_tuple(identifier), table_response)
667667

668668
@retry(**_RETRY_ARGS)
@@ -674,7 +674,7 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
674674
response.raise_for_status()
675675
except HTTPError as exc:
676676
self._handle_non_200_response(exc, {404: NoSuchNamespaceError})
677-
return [(*table.namespace, table.name) for table in ListTablesResponse(**response.json()).identifiers]
677+
return [(*table.namespace, table.name) for table in ListTablesResponse.model_validate_json(response.text).identifiers]
678678

679679
@retry(**_RETRY_ARGS)
680680
def load_table(self, identifier: Union[str, Identifier]) -> Table:
@@ -684,7 +684,7 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
684684
except HTTPError as exc:
685685
self._handle_non_200_response(exc, {404: NoSuchTableError})
686686

687-
table_response = TableResponse(**response.json())
687+
table_response = TableResponse.model_validate_json(response.text)
688688
return self._response_to_table(self.identifier_to_tuple(identifier), table_response)
689689

690690
@retry(**_RETRY_ARGS)
@@ -735,7 +735,7 @@ def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]:
735735
response.raise_for_status()
736736
except HTTPError as exc:
737737
self._handle_non_200_response(exc, {404: NoSuchNamespaceError})
738-
return [(*view.namespace, view.name) for view in ListViewsResponse(**response.json()).identifiers]
738+
return [(*view.namespace, view.name) for view in ListViewsResponse.model_validate_json(response.text).identifiers]
739739

740740
@retry(**_RETRY_ARGS)
741741
def commit_table(
@@ -781,7 +781,7 @@ def commit_table(
781781
504: CommitStateUnknownException,
782782
},
783783
)
784-
return CommitTableResponse(**response.json())
784+
return CommitTableResponse.model_validate_json(response.text)
785785

786786
@retry(**_RETRY_ARGS)
787787
def create_namespace(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None:
@@ -818,7 +818,7 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
818818
except HTTPError as exc:
819819
self._handle_non_200_response(exc, {})
820820

821-
return ListNamespaceResponse(**response.json()).namespaces
821+
return ListNamespaceResponse.model_validate_json(response.text).namespaces
822822

823823
@retry(**_RETRY_ARGS)
824824
def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
@@ -830,7 +830,7 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper
830830
except HTTPError as exc:
831831
self._handle_non_200_response(exc, {404: NoSuchNamespaceError})
832832

833-
return NamespaceResponse(**response.json()).properties
833+
return NamespaceResponse.model_validate_json(response.text).properties
834834

835835
@retry(**_RETRY_ARGS)
836836
def update_namespace_properties(
@@ -844,7 +844,7 @@ def update_namespace_properties(
844844
response.raise_for_status()
845845
except HTTPError as exc:
846846
self._handle_non_200_response(exc, {404: NoSuchNamespaceError})
847-
parsed_response = UpdateNamespacePropertiesResponse(**response.json())
847+
parsed_response = UpdateNamespacePropertiesResponse.model_validate_json(response.text)
848848
return PropertiesUpdateSummary(
849849
removed=parsed_response.removed,
850850
updated=parsed_response.updated,

0 commit comments

Comments
 (0)