Skip to content

Commit 54537c2

Browse files
committed
Merge branch 'main' into bugfix/upsert-complex-type
2 parents 0719ecf + 1588701 commit 54537c2

9 files changed

Lines changed: 206 additions & 158 deletions

File tree

dev/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,20 @@ WORKDIR ${SPARK_HOME}
3939
# Remember to also update `tests/conftest`'s spark setting
4040
ENV SPARK_VERSION=3.5.4
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
42-
ENV ICEBERG_VERSION=1.8.0
42+
ENV ICEBERG_VERSION=1.9.0-SNAPSHOT
4343
ENV PYICEBERG_VERSION=0.9.0
4444

4545
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4646
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
4747
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
4848

4949
# Download iceberg spark runtime
50-
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
50+
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.9.0-SNAPSHOT/iceberg-spark-runtime-3.5_2.12-1.9.0-20250408.001846-43.jar \
5151
-Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
5252

5353

5454
# Download AWS bundle
55-
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
55+
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-aws-bundle/1.9.0-SNAPSHOT/iceberg-aws-bundle-1.9.0-20250408.002722-86.jar \
5656
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5757

5858
COPY spark-defaults.conf /opt/spark/conf

poetry.lock

Lines changed: 159 additions & 144 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/expressions/parser.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
DelimitedList,
2323
Group,
2424
MatchFirst,
25+
ParseException,
2526
ParserElement,
2627
ParseResults,
28+
QuotedString,
2729
Suppress,
2830
Word,
2931
alphanums,
@@ -79,7 +81,16 @@
7981
LIKE = CaselessKeyword("like")
8082

8183
unquoted_identifier = Word(alphas + "_", alphanums + "_$")
82-
quoted_identifier = Suppress('"') + unquoted_identifier + Suppress('"')
84+
quoted_identifier = QuotedString('"', escChar="\\", unquoteResults=True)
85+
86+
87+
@quoted_identifier.set_parse_action
88+
def validate_quoted_identifier(result: ParseResults) -> str:
89+
if "." in result[0]:
90+
raise ParseException("Expected '\"', found '.'")
91+
return result[0]
92+
93+
8394
identifier = MatchFirst([unquoted_identifier, quoted_identifier]).set_results_name("identifier")
8495
column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column")
8596

pyiceberg/io/pyarrow.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,11 +1441,15 @@ def _task_to_record_batches(
14411441

14421442
# Apply the user filter
14431443
if pyarrow_filter is not None:
1444-
current_batch = current_batch.filter(pyarrow_filter)
1444+
# Temporary fix until PyArrow 21 is released ( https://github.com/apache/arrow/pull/46057 )
1445+
table = pa.Table.from_batches([current_batch])
1446+
table = table.filter(pyarrow_filter)
14451447
# skip empty batches
1446-
if current_batch.num_rows == 0:
1448+
if table.num_rows == 0:
14471449
continue
14481450

1451+
current_batch = table.combine_chunks().to_batches()[0]
1452+
14491453
result_batch = _to_requested_schema(
14501454
projected_schema,
14511455
file_project_schema,

pyiceberg/table/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,15 @@ def update_snapshot(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> U
438438
"""
439439
return UpdateSnapshot(self, io=self._table.io, snapshot_properties=snapshot_properties)
440440

441+
def update_statistics(self) -> UpdateStatistics:
442+
"""
443+
Create a new UpdateStatistics to update the statistics of the table.
444+
445+
Returns:
446+
A new UpdateStatistics
447+
"""
448+
return UpdateStatistics(transaction=self)
449+
441450
def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
442451
"""
443452
Shorthand API for appending a PyArrow table to a table transaction.

pyiceberg/transforms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,8 @@ class BucketTransform(Transform[S, int]):
234234
_num_buckets: PositiveInt = PrivateAttr()
235235

236236
def __init__(self, num_buckets: int, **data: Any) -> None:
237-
self._num_buckets = num_buckets
238237
super().__init__(f"bucket[{num_buckets}]", **data)
238+
self._num_buckets = num_buckets
239239

240240
@property
241241
def num_buckets(self) -> int:

pyproject.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ fastavro = "1.10.0"
9393
coverage = { version = "^7.4.2", extras = ["toml"] }
9494
requests-mock = "1.12.1"
9595
moto = { version = "^5.0.2", extras = ["server"] }
96-
typing-extensions = "4.12.2"
96+
typing-extensions = "4.13.1"
9797
pytest-mock = "3.14.0"
9898
pyspark = "3.5.5"
9999
cython = "3.0.12"
@@ -104,16 +104,16 @@ docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/924
104104
[tool.poetry.group.docs.dependencies]
105105
# for mkdocs
106106
mkdocs = "1.6.1"
107-
griffe = "1.7.1"
107+
griffe = "1.7.2"
108108
jinja2 = "3.1.6"
109109
mkdocstrings = "0.29.1"
110-
mkdocstrings-python = "1.16.8"
110+
mkdocstrings-python = "1.16.10"
111111
mkdocs-literate-nav = "0.6.2"
112112
mkdocs-autorefs = "1.4.1"
113113
mkdocs-gen-files = "0.5.0"
114-
mkdocs-material = "9.6.10"
114+
mkdocs-material = "9.6.11"
115115
mkdocs-material-extensions = "1.3.1"
116-
mkdocs-section-index = "0.3.9"
116+
mkdocs-section-index = "0.3.10"
117117

118118
[[tool.mypy.overrides]]
119119
module = "pytest_mock.*"

tests/expressions/test_parser.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,11 @@ def test_quoted_column_with_dots() -> None:
230230
with pytest.raises(ParseException) as exc_info:
231231
parser.parse("\"foo.bar\".baz = 'data'")
232232

233-
assert "Expected '\"', found '.'" in str(exc_info.value)
234-
235233
with pytest.raises(ParseException) as exc_info:
236234
parser.parse("'foo.bar'.baz = 'data'")
237235

238236
assert "Expected <= | <> | < | >= | > | == | = | !=, found '.'" in str(exc_info.value)
237+
238+
239+
def test_quoted_column_with_spaces() -> None:
240+
assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'")

tests/integration/test_statistics_operations.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,10 @@ def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
8282
update.remove_statistics(add_snapshot_id_1)
8383

8484
assert len(tbl.metadata.statistics) == 1
85+
86+
with tbl.transaction() as txn:
87+
with txn.update_statistics() as update:
88+
update.set_statistics(statistics_file_snap_1)
89+
update.set_statistics(statistics_file_snap_2)
90+
91+
assert len(tbl.metadata.statistics) == 2

0 commit comments

Comments
 (0)