Skip to content

Commit adde347

Browse files
committed
chore: update spark test CI/CD pipeline.
1 parent 7bd1069 commit adde347

1 file changed

Lines changed: 40 additions & 33 deletions

File tree

.github/workflows/tests.yml

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -123,63 +123,70 @@ jobs:
123123

124124
test_spark:
125125
runs-on: ${{ matrix.os }}
126-
continue-on-error: True
126+
continue-on-error: true
127127
strategy:
128128
matrix:
129129
os: [ ubuntu-22.04 ]
130-
python-version: ["3.9", "3.10", "3.11"]
131-
pandas: ["pandas>1.1"]
132-
spark: ["3.4.4", "3.5.5"]
133-
hadoop: [ 3.7 ]
134-
numpy: ["numpy"]
135-
java_home: [ /usr/lib/jvm/java-11-openjdk-amd64 ]
130+
python-version: [ "3.9", "3.10", "3.11" ]
131+
pandas: [ "pandas>1.1" ]
132+
spark: [ "3.4.4", "3.5.0" ]
133+
hadoop: [ "3.3" ]
134+
numpy: [ "numpy" ]
135+
java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
136+
analytics: [ "false" ]
136137

137138
name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
138139
env:
139140
JAVA_HOME: ${{ matrix.java_home }}
140141
SPARK_VERSION: ${{ matrix.spark }}
141142
HADOOP_VERSION: ${{ matrix.hadoop }}
142-
SPARK_DIRECTORY: ${{ github.workspace }}/../
143-
SPARK_HOME: ${{ github.workspace }}/../spark/
143+
SPARK_HOME: ${{ github.workspace }}/spark
144144
YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
145+
145146
steps:
146147
- uses: actions/checkout@v4
147-
- name: Setup python
148+
149+
- name: Install system dependencies
150+
run: |
151+
sudo apt-get update
152+
sudo apt-get install -y openjdk-11-jdk curl tar
153+
154+
- name: Setup Python
148155
uses: actions/setup-python@v5
149156
with:
150157
python-version: ${{ matrix.python-version }}
151158
architecture: x64
152-
- uses: actions/cache@v4
153-
if: startsWith(runner.os, 'Linux')
159+
160+
- name: Cache pip dependencies
161+
uses: actions/cache@v4
154162
with:
155163
path: ~/.cache/pip
156-
key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
157-
restore-keys: |
158-
${{ runner.os }}-${{ matrix.pandas }}-pip-\
159-
- uses: actions/cache@v4
160-
if: startsWith(runner.os, 'macOS')
161-
with:
162-
path: ~/Library/Caches/pip
163-
key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
164+
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
164165
restore-keys: |
165-
${{ runner.os }}-${{ matrix.pandas }}-pip-
166-
- uses: actions/cache@v4
167-
if: startsWith(runner.os, 'Windows')
168-
with:
169-
path: ~\AppData\Local\pip\Cache
170-
key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
171-
restore-keys: |
172-
${{ runner.os }}-${{ matrix.pandas }}-pip-
173-
- run: |
166+
${{ runner.os }}-pip-
167+
168+
- name: Install Python Dependencies
169+
run: |
174170
pip install --upgrade pip setuptools wheel
175-
pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
171+
pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
172+
pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
176173
pip install ".[test]"
177-
pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
174+
175+
- name: Download and Install Spark
176+
run: |
177+
SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
178+
SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
179+
curl -sL "$SPARK_URL" | tar xz
180+
mv spark-* $SPARK_HOME
181+
echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
182+
echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
183+
178184
- if: ${{ matrix.spark != '3.0.1' }}
179185
run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
186+
180187
- run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
188+
181189
- run: make install
182-
- run: make install-spark-ci
183-
- run: pip install ".[spark]" # Make sure the proper version of pandas is install after everything
184190
- run: make test_spark
191+
185192

0 commit comments

Comments
 (0)