Skip to content

Commit 40eb5d5

Browse files
authored
Merge branch 'main' into kevinjqliu/clean-up-spark
2 parents 1c1d75e + caf02ee commit 40eb5d5

1 file changed

Lines changed: 16 additions & 3 deletions

File tree

dev/Dockerfile

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,22 @@ ENV PYICEBERG_VERSION=0.10.0
4444
ENV HADOOP_VERSION=3.3.4
4545
ENV AWS_SDK_VERSION=1.12.753
4646

47-
RUN curl --retry 5 -s -C - https://downloads.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
48-
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
49-
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
47+
# Try the primary Apache mirror (downloads.apache.org) first, then fall back to the archive
48+
RUN set -eux; \
49+
FILE=spark-${SPARK_VERSION}-bin-hadoop3.tgz; \
50+
URLS="https://downloads.apache.org/spark/spark-${SPARK_VERSION}/${FILE} https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${FILE}"; \
51+
for url in $URLS; do \
52+
echo "Attempting download: $url"; \
53+
if curl --retry 3 --retry-delay 5 -f -s -C - "$url" -o "$FILE"; then \
54+
echo "Downloaded from: $url"; \
55+
break; \
56+
else \
57+
echo "Failed to download from: $url"; \
58+
fi; \
59+
done; \
60+
if [ ! -f "$FILE" ]; then echo "Failed to download Spark from all mirrors" >&2; exit 1; fi; \
61+
tar xzf "$FILE" --directory /opt/spark --strip-components 1; \
62+
rm -rf "$FILE"
5063

5164
# Download Spark Connect server JAR
5265
RUN curl --retry 5 -s -L https://repo1.maven.org/maven2/org/apache/spark/spark-connect_${SCALA_VERSION}/${SPARK_VERSION}/spark-connect_${SCALA_VERSION}-${SPARK_VERSION}.jar \

0 commit comments

Comments
 (0)