@@ -123,63 +123,70 @@ jobs:
123123
124124 test_spark :
125125 runs-on : ${{ matrix.os }}
126- continue-on-error : True
126+ continue-on-error : true
127127 strategy :
128128 matrix :
129129 os : [ ubuntu-22.04 ]
130- python-version : ["3.9", "3.10", "3.11"]
131- pandas : ["pandas>1.1"]
132- spark : ["3.4.4", "3.5.5"]
133- hadoop : [ 3.7 ]
134- numpy : ["numpy"]
135- java_home : [ /usr/lib/jvm/java-11-openjdk-amd64 ]
130+ python-version : [ "3.9", "3.10", "3.11" ]
131+ pandas : [ "pandas>1.1" ]
132+ spark : [ "3.4.4", "3.5.0" ]
133+ hadoop : [ "3.3" ]
134+ numpy : [ "numpy" ]
135+ java_home : [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
136+ analytics : [ "false" ]
136137
137138 name : Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
138139 env :
139140 JAVA_HOME : ${{ matrix.java_home }}
140141 SPARK_VERSION : ${{ matrix.spark }}
141142 HADOOP_VERSION : ${{ matrix.hadoop }}
142- SPARK_DIRECTORY : ${{ github.workspace }}/../
143- SPARK_HOME : ${{ github.workspace }}/../spark/
143+ SPARK_HOME : ${{ github.workspace }}/spark
144144 YDATA_PROFILING_NO_ANALYTICS : ${{ matrix.analytics }}
145+
145146 steps :
146147 - uses : actions/checkout@v4
147- - name : Setup python
148+
149+ - name : Install system dependencies
150+ run : |
151+ sudo apt-get update
152+ sudo apt-get install -y openjdk-11-jdk curl tar
153+
154+ - name : Setup Python
148155 uses : actions/setup-python@v5
149156 with :
150157 python-version : ${{ matrix.python-version }}
151158 architecture : x64
152- - uses : actions/cache@v4
153- if : startsWith(runner.os, 'Linux')
159+
160+ - name : Cache pip dependencies
161+ uses : actions/cache@v4
154162 with :
155163 path : ~/.cache/pip
156- key : ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
157- restore-keys : |
158- ${{ runner.os }}-${{ matrix.pandas }}-pip-\
159- - uses : actions/cache@v4
160- if : startsWith(runner.os, 'macOS')
161- with :
162- path : ~/Library/Caches/pip
163- key : ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
164+ key : ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
164165 restore-keys : |
165- ${{ runner.os }}-${{ matrix.pandas }}-pip-
166- - uses : actions/cache@v4
167- if : startsWith(runner.os, 'Windows')
168- with :
169- path : ~\AppData\Local\pip\Cache
170- key : ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
171- restore-keys : |
172- ${{ runner.os }}-${{ matrix.pandas }}-pip-
173- - run : |
166+ ${{ runner.os }}-pip-
167+
168+ - name : Install Python Dependencies
169+ run : |
174170 pip install --upgrade pip setuptools wheel
175- pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
171+ pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
172+ pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
176173 pip install ".[test]"
177- pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
174+
175+ - name : Download and Install Spark
176+ run : |
177+ SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
178+ SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
179+ curl -sL "$SPARK_URL" | tar xz
180+ mv spark-* $SPARK_HOME
181+ echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
182+ echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
183+
178184 - if : ${{ matrix.spark != '3.0.1' }}
179185 run : echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
186+
180187 - run : echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
188+
181189 - run : make install
182- - run : make install-spark-ci
183- - run : pip install ".[spark]" # Make sure the proper version of pandas is install after everything
184190 - run : make test_spark
191+
185192
0 commit comments