Skip to content

Commit cec7199

Browse files
committed
chore: fix spark CI/CD
1 parent 5e96926 commit cec7199

1 file changed

Lines changed: 25 additions & 44 deletions

File tree

.github/workflows/tests.yml

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -122,34 +122,26 @@ jobs:
122122
- run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }}
123123

124124
test_spark:
125-
runs-on: ${{ matrix.os }}
126-
continue-on-error: true
125+
runs-on: ubuntu-22.04
126+
continue-on-error: false
127127
strategy:
128128
matrix:
129-
os: [ ubuntu-22.04 ]
130-
python-version: [ "3.9", "3.10", "3.11" ]
131-
pandas: [ "pandas>1.1" ]
132-
spark: [ "3.4.4", "3.5.0" ]
133-
hadoop: [ "3.3" ]
134-
numpy: [ "numpy" ]
135-
java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
136-
analytics: [ "false" ]
137-
138-
name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
139-
env:
140-
JAVA_HOME: ${{ matrix.java_home }}
141-
SPARK_VERSION: ${{ matrix.spark }}
142-
HADOOP_VERSION: ${{ matrix.hadoop }}
143-
SPARK_HOME: ${{ github.workspace }}/spark
144-
YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
129+
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
130+
pyspark-version: [ "3.4.4", "3.5.0" ]
131+
132+
name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
145133

146134
steps:
147-
- uses: actions/checkout@v4
135+
- name: Checkout Code
136+
uses: actions/checkout@v4
148137

149-
- name: Install system dependencies
138+
- name: Install Java (OpenJDK 11)
150139
run: |
151140
sudo apt-get update
152-
sudo apt-get install -y openjdk-11-jdk curl tar
141+
sudo apt-get install -y openjdk-11-jdk
142+
echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> $GITHUB_ENV
143+
echo "PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
144+
java -version
153145
154146
- name: Setup Python
155147
uses: actions/setup-python@v5
@@ -161,32 +153,21 @@ jobs:
161153
uses: actions/cache@v4
162154
with:
163155
path: ~/.cache/pip
164-
key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
156+
key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pyspark-version }}-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
165157
restore-keys: |
166-
${{ runner.os }}-pip-
158+
pip-${{ runner.os }}-
167159
168-
- name: Install Python Dependencies
160+
- name: Install Dependencies
169161
run: |
170-
pip install --upgrade pip setuptools wheel
171-
pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
172-
pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
173-
pip install ".[test]"
162+
python -m pip install --upgrade pip setuptools wheel
163+
pip install pyarrow>4.0.0 pyspark=="${{ matrix.pyspark-version }}" --no-cache-dir
164+
echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
165+
echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
174166
175-
- name: Download and Install Spark
167+
- name: Run Tests
176168
run: |
177-
SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
178-
SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
179-
curl -sL "$SPARK_URL" | tar xz
180-
mv spark-* $SPARK_HOME
181-
echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
182-
echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
183-
184-
- if: ${{ matrix.spark != '3.0.1' }}
185-
run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
186-
187-
- run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
188-
189-
- run: make install
190-
- run: make test_spark
191-
169+
make install
170+
make install-spark-ci
171+
pip install ".[spark]"
172+
make test_spark
192173

0 commit comments

Comments
 (0)