Skip to content

Commit 3fece3a

Browse files
authored
Merge branch 'apache:main' into feature/add-files-branch
2 parents 505d35c + 7dbe25c commit 3fece3a

26 files changed

Lines changed: 610 additions & 585 deletions

.github/workflows/license_check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ on: pull_request
2222

2323
jobs:
2424
rat:
25-
runs-on: ubuntu-22.04
25+
runs-on: ubuntu-latest
2626
steps:
2727
- uses: actions/checkout@v5
2828
- run: dev/check-license

.github/workflows/pypi-build-artifacts.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
runs-on: ${{ matrix.os }}
3333
strategy:
3434
matrix:
35-
os: [ ubuntu-22.04, windows-2022, macos-13, macos-14 ]
35+
os: [ ubuntu-latest, windows-latest, macos-latest ]
3636

3737
steps:
3838
- uses: actions/checkout@v5
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v3.1.4
65+
uses: pypa/cibuildwheel@v3.2.0
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/python-ci-docs.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ concurrency:
3232

3333
jobs:
3434
docs:
35-
runs-on: ubuntu-22.04
35+
runs-on: ubuntu-latest
3636

3737
steps:
3838
- uses: actions/checkout@v5
39-
- name: Install poetry
40-
run: make install-poetry
4139
- uses: actions/setup-python@v6
4240
with:
4341
python-version: 3.12
42+
- name: Install poetry
43+
run: make install-poetry
4444
- name: Install
4545
run: make docs-install
4646
- name: Build docs

.github/workflows/python-ci.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,18 @@ concurrency:
4444

4545
jobs:
4646
lint-and-test:
47-
runs-on: ubuntu-22.04
47+
runs-on: ubuntu-latest
4848
strategy:
4949
matrix:
5050
python: ['3.9', '3.10', '3.11', '3.12']
5151

5252
steps:
5353
- uses: actions/checkout@v5
54-
- name: Install poetry
55-
run: make install-poetry
5654
- uses: actions/setup-python@v6
5755
with:
5856
python-version: ${{ matrix.python }}
57+
- name: Install poetry
58+
run: make install-poetry
5959
- name: Install system dependencies
6060
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
6161
- name: Install
@@ -68,7 +68,7 @@ jobs:
6868
run: COVERAGE_FAIL_UNDER=85 make coverage-report
6969

7070
integration-test:
71-
runs-on: ubuntu-22.04
71+
runs-on: ubuntu-latest
7272
strategy:
7373
matrix:
7474
python: ['3.9', '3.10', '3.11', '3.12']

.github/workflows/python-release-docs.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ concurrency:
2727

2828
jobs:
2929
docs:
30-
runs-on: ubuntu-22.04
30+
runs-on: ubuntu-latest
3131

3232
steps:
3333
- uses: actions/checkout@v5
34-
- name: Install poetry
35-
run: make install-poetry
3634
- uses: actions/setup-python@v6
3735
with:
3836
python-version: ${{ matrix.python }}
37+
- name: Install poetry
38+
run: make install-poetry
3939
- name: Install docs
4040
run: make docs-install
4141
- name: Build docs

.github/workflows/stale.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ permissions:
2929
jobs:
3030
stale:
3131
if: github.repository_owner == 'apache'
32-
runs-on: ubuntu-22.04
32+
runs-on: ubuntu-latest
3333
steps:
3434
- uses: actions/stale@v10.0.0
3535
with:

.github/workflows/svn-build-artifacts.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
runs-on: ${{ matrix.os }}
3333
strategy:
3434
matrix:
35-
os: [ ubuntu-22.04, windows-2022, macos-13, macos-14 ]
35+
os: [ ubuntu-latest, windows-latest, macos-latest ]
3636

3737
steps:
3838
- uses: actions/checkout@v5
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v3.1.4
60+
uses: pypa/cibuildwheel@v3.2.0
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

.gitignore

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,6 @@ coverage.xml
3737
bin/
3838
.vscode/
3939

40-
# Hive/metastore files
41-
metastore_db/
42-
43-
# Spark/metastore files
44-
spark-warehouse/
45-
derby.log
46-
4740
# Python stuff
4841
.mypy_cache/
4942
htmlcov

Makefile

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,27 @@
1818
# Configuration Variables
1919
# ========================
2020

21-
PYTEST_ARGS ?= -v # Override with e.g. PYTEST_ARGS="-vv --tb=short"
21+
PYTEST_ARGS ?= -v -x # Override with e.g. PYTEST_ARGS="-vv --tb=short"
2222
COVERAGE ?= 0 # Set COVERAGE=1 to enable coverage: make test COVERAGE=1
2323
COVERAGE_FAIL_UNDER ?= 85 # Minimum coverage % to pass: make coverage-report COVERAGE_FAIL_UNDER=70
24+
KEEP_COMPOSE ?= 0 # Set KEEP_COMPOSE=1 to keep containers after integration tests
25+
26+
PIP = python -m pip
27+
28+
POETRY_VERSION = 2.1.4
29+
POETRY = python -m poetry
2430

2531
ifeq ($(COVERAGE),1)
26-
TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m
32+
TEST_RUNNER = $(POETRY) run coverage run --parallel-mode --source=pyiceberg -m
2733
else
28-
TEST_RUNNER = poetry run
34+
TEST_RUNNER = $(POETRY) run
2935
endif
3036

31-
POETRY_VERSION = 2.1.4
37+
ifeq ($(KEEP_COMPOSE),1)
38+
CLEANUP_COMMAND = echo "Keeping containers running for debugging (KEEP_COMPOSE=1)"
39+
else
40+
CLEANUP_COMMAND = docker compose -f dev/docker-compose-integration.yml down -v --remove-orphans --timeout 0 2>/dev/null || true
41+
endif
3242

3343
# ============
3444
# Help Section
@@ -46,21 +56,21 @@ help: ## Display this help message
4656
##@ Setup
4757

4858
install-poetry: ## Ensure Poetry is installed at the specified version
49-
@if ! command -v poetry &> /dev/null; then \
59+
@if ! command -v ${POETRY} &> /dev/null; then \
5060
echo "Poetry not found. Installing..."; \
51-
pip install --user poetry==$(POETRY_VERSION); \
61+
${PIP} install poetry==$(POETRY_VERSION); \
5262
else \
53-
INSTALLED_VERSION=$$(pip show poetry | grep Version | awk '{print $$2}'); \
63+
INSTALLED_VERSION=$$(${PIP} show poetry | grep Version | awk '{print $$2}'); \
5464
if [ "$$INSTALLED_VERSION" != "$(POETRY_VERSION)" ]; then \
5565
echo "Updating Poetry to version $(POETRY_VERSION)..."; \
56-
pip install --user --upgrade poetry==$(POETRY_VERSION); \
66+
${PIP} install --upgrade poetry==$(POETRY_VERSION); \
5767
else \
5868
echo "Poetry version $(POETRY_VERSION) already installed."; \
5969
fi; \
6070
fi
6171

6272
install-dependencies: ## Install all dependencies including extras
63-
poetry install --all-extras
73+
$(POETRY) install --all-extras
6474

6575
install: install-poetry install-dependencies ## Install Poetry and dependencies
6676

@@ -74,7 +84,7 @@ check-license: ## Check license headers
7484
./dev/check-license
7585

7686
lint: ## Run code linters via pre-commit
77-
poetry run pre-commit run --all-files
87+
$(POETRY) run pre-commit run --all-files
7888

7989
# ===============
8090
# Testing Section
@@ -85,7 +95,7 @@ lint: ## Run code linters via pre-commit
8595
test: ## Run all unit tests (excluding integration)
8696
$(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" $(PYTEST_ARGS)
8797

88-
test-integration: test-integration-setup test-integration-exec ## Run integration tests
98+
test-integration: test-integration-setup test-integration-exec test-integration-cleanup ## Run integration tests
8999

90100
test-integration-setup: ## Start Docker services for integration tests
91101
docker compose -f dev/docker-compose-integration.yml kill
@@ -98,6 +108,12 @@ test-integration-setup: ## Start Docker services for integration tests
98108
test-integration-exec: ## Run integration tests (excluding provision)
99109
$(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS)
100110

111+
test-integration-cleanup: ## Clean up integration test environment
112+
@if [ "${KEEP_COMPOSE}" != "1" ]; then \
113+
echo "Cleaning up Docker containers..."; \
114+
fi
115+
$(CLEANUP_COMMAND)
116+
101117
test-integration-rebuild: ## Rebuild integration Docker services from scratch
102118
docker compose -f dev/docker-compose-integration.yml kill
103119
docker compose -f dev/docker-compose-integration.yml rm -f
@@ -119,10 +135,10 @@ test-coverage: COVERAGE=1
119135
test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report
120136

121137
coverage-report: ## Combine and report coverage
122-
poetry run coverage combine
123-
poetry run coverage report -m --fail-under=$(COVERAGE_FAIL_UNDER)
124-
poetry run coverage html
125-
poetry run coverage xml
138+
${POETRY} run coverage combine
139+
${POETRY} run coverage report -m --fail-under=$(COVERAGE_FAIL_UNDER)
140+
${POETRY} run coverage html
141+
${POETRY} run coverage xml
126142

127143
# ================
128144
# Documentation
@@ -131,13 +147,13 @@ coverage-report: ## Combine and report coverage
131147
##@ Documentation
132148

133149
docs-install: ## Install docs dependencies
134-
poetry install --with docs
150+
${POETRY} install --with docs
135151

136152
docs-serve: ## Serve local docs preview (hot reload)
137-
poetry run mkdocs serve -f mkdocs/mkdocs.yml
153+
${POETRY} run mkdocs serve -f mkdocs/mkdocs.yml
138154

139155
docs-build: ## Build the static documentation site
140-
poetry run mkdocs build -f mkdocs/mkdocs.yml --strict
156+
${POETRY} run mkdocs build -f mkdocs/mkdocs.yml --strict
141157

142158
# ===================
143159
# Project Maintenance

dev/Dockerfile

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,51 @@ ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$
3636
RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/spark-events
3737
WORKDIR ${SPARK_HOME}
3838

39-
# Remember to also update `tests/conftest`'s spark setting
4039
ENV SPARK_VERSION=3.5.6
41-
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
42-
ENV ICEBERG_VERSION=1.9.1
40+
ENV SCALA_VERSION=2.12
41+
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_${SCALA_VERSION}
42+
ENV ICEBERG_VERSION=1.10.0
4343
ENV PYICEBERG_VERSION=0.10.0
44+
ENV HADOOP_VERSION=3.3.4
45+
ENV AWS_SDK_VERSION=1.12.753
4446

45-
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
46-
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
47-
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
47+
# Try the primary Apache mirror (downloads.apache.org) first, then fall back to the archive
48+
RUN set -eux; \
49+
FILE=spark-${SPARK_VERSION}-bin-hadoop3.tgz; \
50+
URLS="https://downloads.apache.org/spark/spark-${SPARK_VERSION}/${FILE} https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${FILE}"; \
51+
for url in $URLS; do \
52+
echo "Attempting download: $url"; \
53+
if curl --retry 3 --retry-delay 5 -f -s -C - "$url" -o "$FILE"; then \
54+
echo "Downloaded from: $url"; \
55+
break; \
56+
else \
57+
echo "Failed to download from: $url"; \
58+
fi; \
59+
done; \
60+
if [ ! -f "$FILE" ]; then echo "Failed to download Spark from all mirrors" >&2; exit 1; fi; \
61+
tar xzf "$FILE" --directory /opt/spark --strip-components 1; \
62+
rm -rf "$FILE"
63+
64+
# Download Spark Connect server JAR
65+
RUN curl --retry 5 -s -L https://repo1.maven.org/maven2/org/apache/spark/spark-connect_${SCALA_VERSION}/${SPARK_VERSION}/spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar \
66+
-Lo /opt/spark/jars/spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar
4867

4968
# Download iceberg spark runtime
5069
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
5170
-Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
5271

53-
5472
# Download AWS bundle
5573
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
5674
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5775

76+
# Download hadoop-aws (required for S3 support)
77+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar \
78+
-Lo /opt/spark/jars/hadoop-aws-${HADOOP_VERSION}.jar
79+
80+
# Download AWS SDK bundle
81+
RUN curl --retry 5 -s https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_VERSION}/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar \
82+
-Lo /opt/spark/jars/aws-java-sdk-bundle-${AWS_SDK_VERSION}.jar
83+
5884
COPY spark-defaults.conf /opt/spark/conf
5985
ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}"
6086

0 commit comments

Comments
 (0)