@@ -122,34 +122,26 @@ jobs:
122122 - run : codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }}
123123
124124 test_spark :
125- runs-on : ${{ matrix.os }}
126- continue-on-error : true
125+ runs-on : ubuntu-22.04
126+ continue-on-error : false
127127 strategy :
128128 matrix :
129- os : [ ubuntu-22.04 ]
130- python-version : [ "3.9", "3.10", "3.11" ]
131- pandas : [ "pandas>1.1" ]
132- spark : [ "3.4.4", "3.5.0" ]
133- hadoop : [ "3.3" ]
134- numpy : [ "numpy" ]
135- java_home : [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
136- analytics : [ "false" ]
137-
138- name : Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
139- env :
140- JAVA_HOME : ${{ matrix.java_home }}
141- SPARK_VERSION : ${{ matrix.spark }}
142- HADOOP_VERSION : ${{ matrix.hadoop }}
143- SPARK_HOME : ${{ github.workspace }}/spark
144- YDATA_PROFILING_NO_ANALYTICS : ${{ matrix.analytics }}
129+ python-version : [ "3.9", "3.10", "3.11", "3.12" ]
130+ pyspark-version : [ "3.4.4", "3.5.0" ]
131+
132+ name : Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
145133
146134 steps :
147- - uses : actions/checkout@v4
135+ - name : Checkout Code
136+ uses : actions/checkout@v4
148137
149- - name : Install system dependencies
138+ - name : Install Java (OpenJDK 11)
150139 run : |
151140 sudo apt-get update
152- sudo apt-get install -y openjdk-11-jdk curl tar
141+ sudo apt-get install -y openjdk-11-jdk
142+ echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> $GITHUB_ENV
143+ echo "PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
144+ java -version
153145
154146 - name : Setup Python
155147 uses : actions/setup-python@v5
@@ -161,32 +153,21 @@ jobs:
161153 uses : actions/cache@v4
162154 with :
163155 path : ~/.cache/pip
164- key : ${{ runner.os }}-pip -${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
156+ key : pip- ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pyspark-version }} -${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
165157 restore-keys : |
166- ${{ runner.os }}-pip -
158+ pip- ${{ runner.os }}-
167159
168- - name : Install Python Dependencies
160+ - name : Install Dependencies
169161 run : |
170- pip install --upgrade pip setuptools wheel
171- pip install "${{ matrix.pandas }}" " ${{ matrix.numpy }}" --no-cache-dir
172- pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
173- pip install ".[test]"
162+ python -m pip install --upgrade pip setuptools wheel
163+ pip install pyarrow>4.0.0 pyspark==" ${{ matrix.pyspark-version }}" --no-cache-dir
164+ echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
165+ echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
174166
175- - name : Download and Install Spark
167+ - name : Run Tests
176168 run : |
177- SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
178- SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
179- curl -sL "$SPARK_URL" | tar xz
180- mv spark-* $SPARK_HOME
181- echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
182- echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
183-
184- - if : ${{ matrix.spark != '3.0.1' }}
185- run : echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
186-
187- - run : echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
188-
189- - run : make install
190- - run : make test_spark
191-
169+ make install
170+ make install-spark-ci
171+ pip install ".[spark]"
172+ make test_spark
192173
0 commit comments