Skip to content

Commit b076077

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-use-iceberg-rust
2 parents 3bc74ee + 7a6a7c8 commit b076077

11 files changed

Lines changed: 49 additions & 5849 deletions

File tree

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v2.23.1
65+
uses: pypa/cibuildwheel@v2.23.2
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v2.23.1
60+
uses: pypa/cibuildwheel@v2.23.2
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

dev/provision.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@
328328
CREATE TABLE {catalog_name}.default.test_table_empty_list_and_map (
329329
col_list array<int>,
330330
col_map map<int, int>,
331+
col_struct struct<test:int>,
331332
col_list_with_struct array<struct<test:int>>
332333
)
333334
USING iceberg
@@ -340,8 +341,8 @@
340341
spark.sql(
341342
f"""
342343
INSERT INTO {catalog_name}.default.test_table_empty_list_and_map
343-
VALUES (null, null, null),
344-
(array(), map(), array(struct(1)))
344+
VALUES (null, null, null, null),
345+
(array(), map(), struct(1), array(struct(1)))
345346
"""
346347
)
347348

poetry.lock

Lines changed: 0 additions & 5840 deletions
This file was deleted.

pyiceberg/io/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
S3_PROXY_URI = "s3.proxy-uri"
6464
S3_CONNECT_TIMEOUT = "s3.connect-timeout"
6565
S3_REQUEST_TIMEOUT = "s3.request-timeout"
66+
S3_SIGNER = "s3.signer"
6667
S3_SIGNER_URI = "s3.signer.uri"
6768
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
6869
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"

pyiceberg/io/fsspec.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
S3_REQUEST_TIMEOUT,
6868
S3_SECRET_ACCESS_KEY,
6969
S3_SESSION_TOKEN,
70+
S3_SIGNER,
7071
S3_SIGNER_ENDPOINT,
7172
S3_SIGNER_ENDPOINT_DEFAULT,
7273
S3_SIGNER_URI,
@@ -137,7 +138,7 @@ def _s3(properties: Properties) -> AbstractFileSystem:
137138
config_kwargs = {}
138139
register_events: Dict[str, Callable[[Properties], None]] = {}
139140

140-
if signer := properties.get("s3.signer"):
141+
if signer := properties.get(S3_SIGNER):
141142
logger.info("Loading signer %s", signer)
142143
if signer_func := SIGNERS.get(signer):
143144
signer_func_with_properties = partial(signer_func, properties)

pyiceberg/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,7 @@ class _BuildPositionAccessors(SchemaVisitor[Dict[Position, Accessor]]):
12401240
... 1: Accessor(position=1, inner=None),
12411241
... 5: Accessor(position=2, inner=Accessor(position=0, inner=None)),
12421242
... 6: Accessor(position=2, inner=Accessor(position=1, inner=None))
1243+
... 3: Accessor(position=2, inner=None),
12431244
... }
12441245
>>> result == expected
12451246
True
@@ -1255,8 +1256,7 @@ def struct(self, struct: StructType, field_results: List[Dict[Position, Accessor
12551256
if field_results[position]:
12561257
for inner_field_id, acc in field_results[position].items():
12571258
result[inner_field_id] = Accessor(position, inner=acc)
1258-
else:
1259-
result[field.field_id] = Accessor(position)
1259+
result[field.field_id] = Accessor(position)
12601260

12611261
return result
12621262

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ mkdocs = "1.6.1"
106106
griffe = "1.6.2"
107107
jinja2 = "3.1.6"
108108
mkdocstrings = "0.29.0"
109-
mkdocstrings-python = "1.16.7"
109+
mkdocstrings-python = "1.16.8"
110110
mkdocs-literate-nav = "0.6.2"
111111
mkdocs-autorefs = "1.4.1"
112112
mkdocs-gen-files = "0.5.0"

tests/expressions/test_expressions.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,23 @@ def test_notnull_bind_required() -> None:
168168
assert NotNull(Reference("a")).bind(schema) == AlwaysTrue()
169169

170170

171+
def test_notnull_bind_top_struct() -> None:
172+
schema = Schema(
173+
NestedField(
174+
3,
175+
"struct_col",
176+
required=False,
177+
field_type=StructType(
178+
NestedField(1, "id", IntegerType(), required=True),
179+
NestedField(2, "cost", DecimalType(38, 18), required=False),
180+
),
181+
),
182+
schema_id=1,
183+
)
184+
bound = BoundNotNull(BoundReference(schema.find_field(3), schema.accessor_for_field(3)))
185+
assert NotNull(Reference("struct_col")).bind(schema) == bound
186+
187+
171188
def test_isnan_inverse() -> None:
172189
assert ~IsNaN(Reference("f")) == NotNaN(Reference("f"))
173190

tests/integration/test_reads.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
LessThan,
4242
NotEqualTo,
4343
NotNaN,
44+
NotNull,
4445
)
4546
from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
4647
from pyiceberg.io.pyarrow import (
@@ -667,6 +668,24 @@ def test_filter_case_insensitive(catalog: Catalog) -> None:
667668
assert arrow_table["b"].to_pylist() == ["2"]
668669

669670

671+
@pytest.mark.integration
672+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
673+
def test_filters_on_top_level_struct(catalog: Catalog) -> None:
674+
test_empty_struct = catalog.load_table("default.test_table_empty_list_and_map")
675+
676+
arrow_table = test_empty_struct.scan().to_arrow()
677+
assert None in arrow_table["col_struct"].to_pylist()
678+
679+
arrow_table = test_empty_struct.scan(row_filter=NotNull("col_struct")).to_arrow()
680+
assert arrow_table["col_struct"].to_pylist() == [{"test": 1}]
681+
682+
arrow_table = test_empty_struct.scan(row_filter="col_struct is not null", case_sensitive=False).to_arrow()
683+
assert arrow_table["col_struct"].to_pylist() == [{"test": 1}]
684+
685+
arrow_table = test_empty_struct.scan(row_filter="COL_STRUCT is null", case_sensitive=False).to_arrow()
686+
assert arrow_table["col_struct"].to_pylist() == [None]
687+
688+
670689
@pytest.mark.integration
671690
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
672691
def test_upgrade_table_version(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)