From 652c0902192ce5dbf130cc5d59dbdb82a027f1f0 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 15:37:49 -0700
Subject: [PATCH 01/25] chore: update test pipeline to run higher spark version

---
 .github/workflows/tests.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4f7b054a0..f3cc345c1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,12 +127,12 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-22.04 ]
-        python-version: [3.8]
+        python-version: ["3.9", "3.10", "3.11"]
         pandas: ["pandas>1.1"]
-        spark: ["3.0.1"]
-        hadoop: [ 2.7 ]
+        spark: ["3.4.4", "3.5.5"]
+        hadoop: [ 3.7 ]
         numpy: ["numpy"]
-        java_home: [ /usr/lib/jvm/java-8-openjdk-amd64 ]
+        java_home: [ /usr/lib/jvm/java-11-openjdk-amd64 ]
 
     name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
     env:
@@ -172,7 +172,7 @@ jobs:
             ${{ runner.os }}-${{ matrix.pandas }}-pip-
       - run: |
           pip install --upgrade pip setuptools wheel
-          pip install pytest-spark>=0.6.0 pyarrow==1.0.1 pyspark=="${{ matrix.spark }}"
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
           pip install ".[test]"
           pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
       - if: ${{ matrix.spark != '3.0.1' }}

From 5e969267c49c5d50b5620be628e4e31c6fc830e5 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 15:46:55 -0700
Subject: [PATCH 02/25] chore: update spark test CI/CD pipeline.

---
 .github/workflows/tests.yml | 73 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f3cc345c1..98bac977f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -123,63 +123,70 @@ jobs:
 
   test_spark:
     runs-on: ${{ matrix.os }}
-    continue-on-error: True
+    continue-on-error: true
     strategy:
       matrix:
         os: [ ubuntu-22.04 ]
-        python-version: ["3.9", "3.10", "3.11"]
-        pandas: ["pandas>1.1"]
-        spark: ["3.4.4", "3.5.5"]
-        hadoop: [ 3.7 ]
-        numpy: ["numpy"]
-        java_home: [ /usr/lib/jvm/java-11-openjdk-amd64 ]
+        python-version: [ "3.9", "3.10", "3.11" ]
+        pandas: [ "pandas>1.1" ]
+        spark: [ "3.4.4", "3.5.0" ]
+        hadoop: [ "3.3" ]
+        numpy: [ "numpy" ]
+        java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
+        analytics: [ "false" ]
 
     name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
     env:
       JAVA_HOME: ${{ matrix.java_home }}
       SPARK_VERSION: ${{ matrix.spark }}
       HADOOP_VERSION: ${{ matrix.hadoop }}
-      SPARK_DIRECTORY: ${{ github.workspace }}/../
-      SPARK_HOME: ${{ github.workspace }}/../spark/
+      SPARK_HOME: ${{ github.workspace }}/spark
       YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
+
     steps:
       - uses: actions/checkout@v4
-      - name: Setup python
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y openjdk-11-jdk curl tar
+
+      - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
           architecture: x64
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'Linux')
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-\
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'macOS')
-        with:
-          path: ~/Library/Caches/pip
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
+          key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'Windows')
-        with:
-          path: ~\AppData\Local\pip\Cache
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-
-      - run: |
+            ${{ runner.os }}-pip-
+
+      - name: Install Python Dependencies
+        run: |
           pip install --upgrade pip setuptools wheel
-          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
+          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
           pip install ".[test]"
-          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
+
+      - name: Download and Install Spark
+        run: |
+          SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
+          SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
+          curl -sL "$SPARK_URL" | tar xz
+          mv spark-* $SPARK_HOME
+          echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
+          echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
+
       - if: ${{ matrix.spark != '3.0.1' }}
         run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
+
       - run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
+
       - run: make install
-      - run: make install-spark-ci
-      - run: pip install ".[spark]" # Make sure the proper version of pandas is install after everything
       - run: make test_spark
+  
 

From cec71998822a1d4fd3dfb00c8ae5eed391277736 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:03:18 -0700
Subject: [PATCH 03/25] chore: fix spark CI/CD

---
 .github/workflows/tests.yml | 69 ++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 98bac977f..ff4ff3447 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -122,34 +122,26 @@ jobs:
     - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }}
 
   test_spark:
-    runs-on: ${{ matrix.os }}
-    continue-on-error: true
+    runs-on: ubuntu-22.04
+    continue-on-error: false
     strategy:
       matrix:
-        os: [ ubuntu-22.04 ]
-        python-version: [ "3.9", "3.10", "3.11" ]
-        pandas: [ "pandas>1.1" ]
-        spark: [ "3.4.4", "3.5.0" ]
-        hadoop: [ "3.3" ]
-        numpy: [ "numpy" ]
-        java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
-        analytics: [ "false" ]
-
-    name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
-    env:
-      JAVA_HOME: ${{ matrix.java_home }}
-      SPARK_VERSION: ${{ matrix.spark }}
-      HADOOP_VERSION: ${{ matrix.hadoop }}
-      SPARK_HOME: ${{ github.workspace }}/spark
-      YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        pyspark-version: [ "3.4.4", "3.5.0" ]
+
+    name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout Code
+        uses: actions/checkout@v4
 
-      - name: Install system dependencies
+      - name: Install Java (OpenJDK 11)
         run: |
           sudo apt-get update
-          sudo apt-get install -y openjdk-11-jdk curl tar
+          sudo apt-get install -y openjdk-11-jdk
+          echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> $GITHUB_ENV
+          echo "PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
+          java -version
 
       - name: Setup Python
         uses: actions/setup-python@v5
@@ -161,32 +153,21 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
+          key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pyspark-version }}-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
           restore-keys: |
-            ${{ runner.os }}-pip-
+            pip-${{ runner.os }}-
 
-      - name: Install Python Dependencies
+      - name: Install Dependencies
         run: |
-          pip install --upgrade pip setuptools wheel
-          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
-          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
-          pip install ".[test]"
+          python -m pip install --upgrade pip setuptools wheel
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.pyspark-version }}" --no-cache-dir
+          echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
+          echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
 
-      - name: Download and Install Spark
+      - name: Run Tests
         run: |
-          SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
-          SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
-          curl -sL "$SPARK_URL" | tar xz
-          mv spark-* $SPARK_HOME
-          echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
-          echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
-
-      - if: ${{ matrix.spark != '3.0.1' }}
-        run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
-
-      - run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
-
-      - run: make install
-      - run: make test_spark
-  
+          make install
+          make install-spark-ci
+          pip install ".[spark]"
+          make test_spark
 

From 10e1cb9044d77493119791748333b5f95454ac26 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:12:59 -0700
Subject: [PATCH 04/25] chore: remove make spark-ci

---
 .github/workflows/tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ff4ff3447..fb3916342 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,7 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          make install-spark-ci
           pip install ".[spark]"
           make test_spark
 

From ebdb2d4a14ba6237910a68b0cda50b6a7c31c596 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:16:57 -0700
Subject: [PATCH 05/25] chore: add pytest to the dependencies

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index fb3916342..5c3ab8ac7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,6 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          pip install ".[spark]"
+          pip install ".[spark, test]"
           make test_spark
 

From abb17e42ad4d16d18d33344b5a8f2df2e562ef49 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:23:42 -0700
Subject: [PATCH 06/25] fix: fixing numba version due to visions

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 72f019a1f..83fdd8eea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,7 @@ dependencies = [
     "imagehash==4.3.1",
     "wordcloud>=1.9.3",
     "dacite>=1.8",
-    "numba>=0.56.0, <1",
+    "numba>=0.56.0, <=0.61",
 ]
 
 dynamic = ["version"]

From 0f085dcdc85f8417a29178cdc9853095f076e1d9 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:33:03 -0700
Subject: [PATCH 07/25] feat: update pyspark install

---
 .github/workflows/tests.yml | 2 +-
 pyproject.toml              | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5c3ab8ac7..08136a8f5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,6 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          pip install ".[spark, test]"
+          pip install ".[test]"
           make test_spark
 
diff --git a/pyproject.toml b/pyproject.toml
index 83fdd8eea..9f5814c8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,10 +84,10 @@ dev = [
 # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
 # set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
 spark = [
-    "pyspark>=2.3.0",
-    "pyarrow>=2.0.0",
-    "pandas>1.1, <2, !=1.4.0",
-    "numpy>=1.16.0,<1.24",
+    "pyspark>=3.0.*",
+    "pyarrow>=4.0.0",
+    "pandas>1.1",
+    "numpy>=1.16.0",
     "visions[type_image_path]>=0.7.5, <0.7.7",
 ]
 test = [

From 443e4421032dd11443c828f8ed75de77dc0b2cda Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:36:57 -0700
Subject: [PATCH 08/25] fix: pyproject

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9f5814c8d..508f50c18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ dev = [
 # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
 # set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
 spark = [
-    "pyspark>=3.0.*",
+    "pyspark>=3.0",
     "pyarrow>=4.0.0",
     "pandas>1.1",
     "numpy>=1.16.0",

From d55d4a65ff50452fb49e6735d69506caf255bd44 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:40:48 -0700
Subject: [PATCH 09/25] chore: fix makefile to run the tests

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 46a4a28d5..22a69c758 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ test:
 	ydata_profiling -h
 
 test_spark:
-	pytest --spark_home=${SPARK_HOME} tests/backends/spark_backend/
+	pytest tests/backends/spark_backend/
 	ydata_profiling -h
 
 test_cov:
@@ -36,7 +36,7 @@ install-docs: install ### Installs regular and docs dependencies
 
 install-spark-ci:
 	sudo apt-get update
-	sudo apt-get -y install openjdk-8-jdk
+	sudo apt-get -y install openjdk-11-jdk
 	curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
 	--output ${SPARK_DIRECTORY}/spark.tgz
 	cd ${SPARK_DIRECTORY} && tar -xvzf spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark

From a881b01ef6487e0d51f98e48f9dd44f480cde49c Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 17:09:19 -0700
Subject: [PATCH 10/25] chore: tests for pyspark versions bigger than 3.4

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 08136a8f5..17920d72b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,7 +127,7 @@ jobs:
     strategy:
       matrix:
         python-version: [ "3.9", "3.10", "3.11", "3.12" ]
-        pyspark-version: [ "3.4.4", "3.5.0" ]
+        pyspark-version: [ "3.5.0" ]
 
     name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 

From 7d702f5499288aa29725e27aacd882e69af1f2bc Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 21:51:45 -0700
Subject: [PATCH 11/25] fix: add other pyspark versions to the tests

---
 .github/workflows/tests.yml                   |  2 +-
 .../spark_backend/test_descriptions_spark.py  | 21 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 17920d72b..ce45742f6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,7 +127,7 @@ jobs:
     strategy:
       matrix:
         python-version: [ "3.9", "3.10", "3.11", "3.12" ]
-        pyspark-version: [ "3.5.0" ]
+        pyspark-version: [ "3.4" , "3.5" ]
 
     name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 
diff --git a/tests/backends/spark_backend/test_descriptions_spark.py b/tests/backends/spark_backend/test_descriptions_spark.py
index c11330017..20a6ac18d 100644
--- a/tests/backends/spark_backend/test_descriptions_spark.py
+++ b/tests/backends/spark_backend/test_descriptions_spark.py
@@ -7,6 +7,7 @@
 
 from ydata_profiling.config import SparkSettings
 from ydata_profiling.model.describe import describe
+from pyspark.sql.types import TimestampType
 
 check_is_NaN = "ydata_profiling.check_is_NaN"
 
@@ -41,15 +42,15 @@ def describe_data():
         "s1": np.ones(9),
         "s2": ["some constant text $ % value {obj} " for _ in range(1, 10)],
         "somedate": [
-            datetime.datetime(2011, 7, 4),
-            datetime.datetime(2022, 1, 1, 13, 57),
-            datetime.datetime(1990, 12, 9),
+            datetime.date(2011, 7, 4),
+            datetime.date(2011, 7, 2),
+            datetime.date(1990, 12, 9),
             pd.NaT,
-            datetime.datetime(1990, 12, 9),
-            datetime.datetime(1970, 12, 9),
-            datetime.datetime(1972, 1, 2),
-            datetime.datetime(1970, 12, 9),
-            datetime.datetime(1970, 12, 9),
+            datetime.date(1990, 12, 9),
+            datetime.date(1970, 12, 9),
+            datetime.date(1972, 1, 2),
+            datetime.date(1970, 12, 9),
+            datetime.date(1970, 12, 9),
         ],
         "bool_tf": [True, True, False, True, False, True, True, False, True],
         "bool_tf_with_nan": [
@@ -370,13 +371,15 @@ def test_describe_spark_df(
 
     if column == "mixed":
         describe_data[column] = [str(i) for i in describe_data[column]]
-    if column == "bool_tf_with_nan":
+    elif column == "bool_tf_with_nan":
         describe_data[column] = [
             True if i else False for i in describe_data[column]  # noqa: SIM210
         ]
+
     pdf= pd.DataFrame({column: describe_data[column]})# Convert to Pandas DataFrame
     # Ensure NaNs are replaced with None (Spark does not support NaN in non-float columns)
     pdf = pdf.where(pd.notna(pdf), None)
+
     sdf = spark_session.createDataFrame(pdf)
 
     results = describe(cfg, sdf, summarizer_spark, typeset)

From f8c06e8fb3e6aafddafa49777d701fa72822b44b Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 22:01:07 -0700
Subject: [PATCH 12/25] chore: use ubuntu-22.04

---
 .github/workflows/pull-request.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml
index 16c628479..e95859fa8 100644
--- a/.github/workflows/pull-request.yml
+++ b/.github/workflows/pull-request.yml
@@ -9,7 +9,7 @@ on:
 jobs:
   commitlint:
     name: Lint commit message
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4
@@ -21,7 +21,7 @@ jobs:
   lint:
     if: github.actor != 'renovate[bot]'
     name: Lint source code
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4
@@ -85,7 +85,7 @@ jobs:
 
   validate-docs:
     name: Validate Docs
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4

From 7bd1069391130137fd68d98483e7061373557fa6 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 15:37:49 -0700
Subject: [PATCH 13/25] chore: update test pipeline to run higher spark version

---
 .github/workflows/tests.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4f7b054a0..f3cc345c1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,12 +127,12 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-22.04 ]
-        python-version: [3.8]
+        python-version: ["3.9", "3.10", "3.11"]
         pandas: ["pandas>1.1"]
-        spark: ["3.0.1"]
-        hadoop: [ 2.7 ]
+        spark: ["3.4.4", "3.5.5"]
+        hadoop: [ 3.7 ]
         numpy: ["numpy"]
-        java_home: [ /usr/lib/jvm/java-8-openjdk-amd64 ]
+        java_home: [ /usr/lib/jvm/java-11-openjdk-amd64 ]
 
     name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
     env:
@@ -172,7 +172,7 @@ jobs:
             ${{ runner.os }}-${{ matrix.pandas }}-pip-
       - run: |
           pip install --upgrade pip setuptools wheel
-          pip install pytest-spark>=0.6.0 pyarrow==1.0.1 pyspark=="${{ matrix.spark }}"
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
           pip install ".[test]"
           pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
       - if: ${{ matrix.spark != '3.0.1' }}

From adde3473e94f95a035fda8239cc09b7794da6855 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 15:46:55 -0700
Subject: [PATCH 14/25] chore: update spark test CI/CD pipeline.

---
 .github/workflows/tests.yml | 73 ++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f3cc345c1..98bac977f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -123,63 +123,70 @@ jobs:
 
   test_spark:
     runs-on: ${{ matrix.os }}
-    continue-on-error: True
+    continue-on-error: true
     strategy:
       matrix:
         os: [ ubuntu-22.04 ]
-        python-version: ["3.9", "3.10", "3.11"]
-        pandas: ["pandas>1.1"]
-        spark: ["3.4.4", "3.5.5"]
-        hadoop: [ 3.7 ]
-        numpy: ["numpy"]
-        java_home: [ /usr/lib/jvm/java-11-openjdk-amd64 ]
+        python-version: [ "3.9", "3.10", "3.11" ]
+        pandas: [ "pandas>1.1" ]
+        spark: [ "3.4.4", "3.5.0" ]
+        hadoop: [ "3.3" ]
+        numpy: [ "numpy" ]
+        java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
+        analytics: [ "false" ]
 
     name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
     env:
       JAVA_HOME: ${{ matrix.java_home }}
       SPARK_VERSION: ${{ matrix.spark }}
       HADOOP_VERSION: ${{ matrix.hadoop }}
-      SPARK_DIRECTORY: ${{ github.workspace }}/../
-      SPARK_HOME: ${{ github.workspace }}/../spark/
+      SPARK_HOME: ${{ github.workspace }}/spark
       YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
+
     steps:
       - uses: actions/checkout@v4
-      - name: Setup python
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y openjdk-11-jdk curl tar
+
+      - name: Setup Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
           architecture: x64
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'Linux')
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-\
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'macOS')
-        with:
-          path: ~/Library/Caches/pip
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
+          key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
           restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-
-      - uses: actions/cache@v4
-        if: startsWith(runner.os, 'Windows')
-        with:
-          path: ~\AppData\Local\pip\Cache
-          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
-          restore-keys: |
-            ${{ runner.os }}-${{ matrix.pandas }}-pip-
-      - run: |
+            ${{ runner.os }}-pip-
+
+      - name: Install Python Dependencies
+        run: |
           pip install --upgrade pip setuptools wheel
-          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}"
+          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
           pip install ".[test]"
-          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}"
+
+      - name: Download and Install Spark
+        run: |
+          SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
+          SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
+          curl -sL "$SPARK_URL" | tar xz
+          mv spark-* $SPARK_HOME
+          echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
+          echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
+
       - if: ${{ matrix.spark != '3.0.1' }}
         run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
+
       - run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
+
       - run: make install
-      - run: make install-spark-ci
-      - run: pip install ".[spark]" # Make sure the proper version of pandas is install after everything
       - run: make test_spark
+  
 

From 515919757365500864ba3a524f292813a7e124a0 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:03:18 -0700
Subject: [PATCH 15/25] chore: fix spark CI/CD

---
 .github/workflows/tests.yml | 69 ++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 98bac977f..ff4ff3447 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -122,34 +122,26 @@ jobs:
     - run: codecov -F py${{ matrix.python-version }}-${{ matrix.os }}-${{ matrix.pandas }}-${{ matrix.numpy }}
 
   test_spark:
-    runs-on: ${{ matrix.os }}
-    continue-on-error: true
+    runs-on: ubuntu-22.04
+    continue-on-error: false
     strategy:
       matrix:
-        os: [ ubuntu-22.04 ]
-        python-version: [ "3.9", "3.10", "3.11" ]
-        pandas: [ "pandas>1.1" ]
-        spark: [ "3.4.4", "3.5.0" ]
-        hadoop: [ "3.3" ]
-        numpy: [ "numpy" ]
-        java_home: [ "/usr/lib/jvm/java-11-openjdk-amd64" ]
-        analytics: [ "false" ]
-
-    name: Tests Spark | python ${{ matrix.python-version }}, ${{ matrix.os }}, spark${{ matrix.spark }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
-    env:
-      JAVA_HOME: ${{ matrix.java_home }}
-      SPARK_VERSION: ${{ matrix.spark }}
-      HADOOP_VERSION: ${{ matrix.hadoop }}
-      SPARK_HOME: ${{ github.workspace }}/spark
-      YDATA_PROFILING_NO_ANALYTICS: ${{ matrix.analytics }}
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        pyspark-version: [ "3.4.4", "3.5.0" ]
+
+    name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 
     steps:
-      - uses: actions/checkout@v4
+      - name: Checkout Code
+        uses: actions/checkout@v4
 
-      - name: Install system dependencies
+      - name: Install Java (OpenJDK 11)
         run: |
           sudo apt-get update
-          sudo apt-get install -y openjdk-11-jdk curl tar
+          sudo apt-get install -y openjdk-11-jdk
+          echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> $GITHUB_ENV
+          echo "PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
+          java -version
 
       - name: Setup Python
         uses: actions/setup-python@v5
@@ -161,32 +153,21 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
+          key: pip-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.pyspark-version }}-${{ hashFiles('requirements/*.txt', 'setup.cfg', 'pyproject.toml') }}
           restore-keys: |
-            ${{ runner.os }}-pip-
+            pip-${{ runner.os }}-
 
-      - name: Install Python Dependencies
+      - name: Install Dependencies
         run: |
-          pip install --upgrade pip setuptools wheel
-          pip install "${{ matrix.pandas }}" "${{ matrix.numpy }}" --no-cache-dir
-          pip install pyarrow>4.0.0 pyspark=="${{ matrix.spark }}" --no-cache-dir
-          pip install ".[test]"
+          python -m pip install --upgrade pip setuptools wheel
+          pip install pyarrow>4.0.0 pyspark=="${{ matrix.pyspark-version }}" --no-cache-dir
+          echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
+          echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
 
-      - name: Download and Install Spark
+      - name: Run Tests
         run: |
-          SPARK_TGZ="spark-${{ matrix.spark }}-bin-hadoop${{ matrix.hadoop }}.tgz"
-          SPARK_URL="https://archive.apache.org/dist/spark/spark-${{ matrix.spark }}/${SPARK_TGZ}"
-          curl -sL "$SPARK_URL" | tar xz
-          mv spark-* $SPARK_HOME
-          echo "SPARK_HOME=${SPARK_HOME}" >> $GITHUB_ENV
-          echo "PATH=${SPARK_HOME}/bin:$PATH" >> $GITHUB_ENV
-
-      - if: ${{ matrix.spark != '3.0.1' }}
-        run: echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
-
-      - run: echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
-
-      - run: make install
-      - run: make test_spark
-  
+          make install
+          make install-spark-ci
+          pip install ".[spark]"
+          make test_spark
 

From 1a01cf26628ef35c0db55f5d5c2f9ab26fcf615b Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:12:59 -0700
Subject: [PATCH 16/25] chore: remove make spark-ci

---
 .github/workflows/tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ff4ff3447..fb3916342 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,7 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          make install-spark-ci
           pip install ".[spark]"
           make test_spark
 

From ad03b8479732cf218370897ed3dcbd43b4687ad2 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:16:57 -0700
Subject: [PATCH 17/25] chore: add pytest to the dependencies

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index fb3916342..5c3ab8ac7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,6 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          pip install ".[spark]"
+          pip install ".[spark, test]"
           make test_spark
 

From af1a17f43fa76b64151dbd206ffc13bee35f9405 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:23:42 -0700
Subject: [PATCH 18/25] fix: fixing numba version due to visions

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 72f019a1f..83fdd8eea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,7 @@ dependencies = [
     "imagehash==4.3.1",
     "wordcloud>=1.9.3",
     "dacite>=1.8",
-    "numba>=0.56.0, <1",
+    "numba>=0.56.0, <=0.61",
 ]
 
 dynamic = ["version"]

From f8b070263af357bbd8d1784c75fb9adc7fdda2aa Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:33:03 -0700
Subject: [PATCH 19/25] feat: update pyspark install

---
 .github/workflows/tests.yml | 2 +-
 pyproject.toml              | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5c3ab8ac7..08136a8f5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -167,6 +167,6 @@ jobs:
       - name: Run Tests
         run: |
           make install
-          pip install ".[spark, test]"
+          pip install ".[test]"
           make test_spark
 
diff --git a/pyproject.toml b/pyproject.toml
index 83fdd8eea..9f5814c8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,10 +84,10 @@ dev = [
 # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
 # set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
 spark = [
-    "pyspark>=2.3.0",
-    "pyarrow>=2.0.0",
-    "pandas>1.1, <2, !=1.4.0",
-    "numpy>=1.16.0,<1.24",
+    "pyspark>=3.0.*",
+    "pyarrow>=4.0.0",
+    "pandas>1.1",
+    "numpy>=1.16.0",
     "visions[type_image_path]>=0.7.5, <0.7.7",
 ]
 test = [

From 439fdbcb9e1489fcc015e107a3db3d2285a1a645 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:36:57 -0700
Subject: [PATCH 20/25] fix: pyproject

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9f5814c8d..508f50c18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ dev = [
 # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
 # set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
 spark = [
-    "pyspark>=3.0.*",
+    "pyspark>=3.0",
     "pyarrow>=4.0.0",
     "pandas>1.1",
     "numpy>=1.16.0",

From b5916ae3e6b67bb60ccab1ed3110802fcf7c0646 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 16:40:48 -0700
Subject: [PATCH 21/25] chore: fix makefile to run the tests

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 46a4a28d5..22a69c758 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ test:
 	ydata_profiling -h
 
 test_spark:
-	pytest --spark_home=${SPARK_HOME} tests/backends/spark_backend/
+	pytest tests/backends/spark_backend/
 	ydata_profiling -h
 
 test_cov:
@@ -36,7 +36,7 @@ install-docs: install ### Installs regular and docs dependencies
 
 install-spark-ci:
 	sudo apt-get update
-	sudo apt-get -y install openjdk-8-jdk
+	sudo apt-get -y install openjdk-11-jdk
 	curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
 	--output ${SPARK_DIRECTORY}/spark.tgz
 	cd ${SPARK_DIRECTORY} && tar -xvzf spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark

From 53383c4f3e57683e0858b979783e47e2f74a4828 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 17:09:19 -0700
Subject: [PATCH 22/25] chore: tests for pyspark versions bigger than 3.4

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 08136a8f5..17920d72b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,7 +127,7 @@ jobs:
     strategy:
       matrix:
         python-version: [ "3.9", "3.10", "3.11", "3.12" ]
-        pyspark-version: [ "3.4.4", "3.5.0" ]
+        pyspark-version: [ "3.5.0" ]
 
     name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 

From 243bf1759b8665753537a9929c1b399adec5e37f Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 21:51:45 -0700
Subject: [PATCH 23/25] fix: add other pyspark versions to the tests

---
 .github/workflows/tests.yml                   |  2 +-
 .../spark_backend/test_descriptions_spark.py  | 21 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 17920d72b..ce45742f6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -127,7 +127,7 @@ jobs:
     strategy:
       matrix:
         python-version: [ "3.9", "3.10", "3.11", "3.12" ]
-        pyspark-version: [ "3.5.0" ]
+        pyspark-version: [ "3.4" , "3.5" ]
 
     name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
 
diff --git a/tests/backends/spark_backend/test_descriptions_spark.py b/tests/backends/spark_backend/test_descriptions_spark.py
index c11330017..20a6ac18d 100644
--- a/tests/backends/spark_backend/test_descriptions_spark.py
+++ b/tests/backends/spark_backend/test_descriptions_spark.py
@@ -7,6 +7,7 @@
 
 from ydata_profiling.config import SparkSettings
 from ydata_profiling.model.describe import describe
+from pyspark.sql.types import TimestampType
 
 check_is_NaN = "ydata_profiling.check_is_NaN"
 
@@ -41,15 +42,15 @@ def describe_data():
         "s1": np.ones(9),
         "s2": ["some constant text $ % value {obj} " for _ in range(1, 10)],
         "somedate": [
-            datetime.datetime(2011, 7, 4),
-            datetime.datetime(2022, 1, 1, 13, 57),
-            datetime.datetime(1990, 12, 9),
+            datetime.date(2011, 7, 4),
+            datetime.date(2011, 7, 2),
+            datetime.date(1990, 12, 9),
             pd.NaT,
-            datetime.datetime(1990, 12, 9),
-            datetime.datetime(1970, 12, 9),
-            datetime.datetime(1972, 1, 2),
-            datetime.datetime(1970, 12, 9),
-            datetime.datetime(1970, 12, 9),
+            datetime.date(1990, 12, 9),
+            datetime.date(1970, 12, 9),
+            datetime.date(1972, 1, 2),
+            datetime.date(1970, 12, 9),
+            datetime.date(1970, 12, 9),
         ],
         "bool_tf": [True, True, False, True, False, True, True, False, True],
         "bool_tf_with_nan": [
@@ -370,13 +371,15 @@ def test_describe_spark_df(
 
     if column == "mixed":
         describe_data[column] = [str(i) for i in describe_data[column]]
-    if column == "bool_tf_with_nan":
+    elif column == "bool_tf_with_nan":
         describe_data[column] = [
             True if i else False for i in describe_data[column]  # noqa: SIM210
         ]
+
     pdf= pd.DataFrame({column: describe_data[column]})# Convert to Pandas DataFrame
     # Ensure NaNs are replaced with None (Spark does not support NaN in non-float columns)
     pdf = pdf.where(pd.notna(pdf), None)
+
     sdf = spark_session.createDataFrame(pdf)
 
     results = describe(cfg, sdf, summarizer_spark, typeset)

From 48cb72f53043082f0cc7c731fc60cc61ceae7700 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Mon, 17 Mar 2025 22:01:07 -0700
Subject: [PATCH 24/25] chore: use ubuntu-22.04

---
 .github/workflows/pull-request.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml
index d51d14932..d93940070 100644
--- a/.github/workflows/pull-request.yml
+++ b/.github/workflows/pull-request.yml
@@ -9,7 +9,7 @@ on:
 jobs:
   commitlint:
     name: Lint commit message
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4
@@ -21,7 +21,7 @@ jobs:
   lint:
     if: github.actor != 'renovate[bot]'
     name: Lint source code
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4
@@ -85,7 +85,7 @@ jobs:
 
   validate-docs:
     name: Validate Docs
-    runs-on: ubuntu-24.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v4

From dfa8987610b80727f8c1823473e6152b01c8ec28 Mon Sep 17 00:00:00 2001
From: Fabiana <30911746+fabclmnt@users.noreply.github.com>
Date: Tue, 18 Mar 2025 11:06:15 -0700
Subject: [PATCH 25/25] chore: remove unused timestamp

---
 tests/backends/spark_backend/test_descriptions_spark.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/backends/spark_backend/test_descriptions_spark.py b/tests/backends/spark_backend/test_descriptions_spark.py
index cf684f369..24de9afbb 100644
--- a/tests/backends/spark_backend/test_descriptions_spark.py
+++ b/tests/backends/spark_backend/test_descriptions_spark.py
@@ -7,7 +7,6 @@
 
 from ydata_profiling.config import SparkSettings
 from ydata_profiling.model.describe import describe
-from pyspark.sql.types import TimestampType
 
 check_is_NaN = "ydata_profiling.check_is_NaN"