Skip to content

Commit fa9dcc5

Browse files
committed
use spark connect for provision.py
1 parent 2bff5ef commit fa9dcc5

3 files changed

Lines changed: 8 additions & 18 deletions

File tree

Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,7 @@ test-integration-setup: ## Start Docker services for integration tests
102102
docker compose -f dev/docker-compose-integration.yml rm -f
103103
docker compose -f dev/docker-compose-integration.yml up -d
104104
sleep 10
105-
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
106-
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
105+
${TEST_RUNNER} python dev/provision.py
107106

108107
test-integration-exec: ## Run integration tests (excluding provision)
109108
$(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS)

dev/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ RUN pip3 install -q ipython
9292
RUN pip3 install "pyiceberg[s3fs,hive,pyarrow]==${PYICEBERG_VERSION}"
9393

9494
COPY entrypoint.sh .
95-
COPY provision.py .
9695

9796
ENTRYPOINT ["./entrypoint.sh"]
9897
CMD ["notebook"]

dev/provision.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,17 @@
2323
from pyiceberg.schema import Schema
2424
from pyiceberg.types import FixedType, NestedField, UUIDType
2525

26-
# The configuration is important, otherwise we get many small
27-
# parquet files with a single row. When a positional delete
28-
# hits the Parquet file with one row, the parquet file gets
29-
# dropped instead of having a merge-on-read delete file.
30-
spark = (
31-
SparkSession
32-
.builder
33-
.config("spark.sql.shuffle.partitions", "1")
34-
.config("spark.default.parallelism", "1")
35-
.getOrCreate()
36-
)
26+
# Create SparkSession against the remote Spark Connect server
27+
spark = SparkSession.builder.remote("sc://localhost:15002").getOrCreate()
28+
3729

3830
catalogs = {
3931
'rest': load_catalog(
4032
"rest",
4133
**{
4234
"type": "rest",
43-
"uri": "http://rest:8181",
44-
"s3.endpoint": "http://minio:9000",
35+
"uri": "http://localhost:8181",
36+
"s3.endpoint": "http://localhost:9000",
4537
"s3.access-key-id": "admin",
4638
"s3.secret-access-key": "password",
4739
},
@@ -50,8 +42,8 @@
5042
"hive",
5143
**{
5244
"type": "hive",
53-
"uri": "thrift://hive:9083",
54-
"s3.endpoint": "http://minio:9000",
45+
"uri": "thrift://localhost:9083",
46+
"s3.endpoint": "http://localhost:9000",
5547
"s3.access-key-id": "admin",
5648
"s3.secret-access-key": "password",
5749
},

0 commit comments

Comments
 (0)