Skip to content

Commit 243bf17

Browse files
committed
fix: add other pyspark versions to the tests
1 parent 53383c4 commit 243bf17

2 files changed

Lines changed: 13 additions & 10 deletions

File tree

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ jobs:
127127
strategy:
128128
matrix:
129129
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
130-
pyspark-version: [ "3.5.0" ]
130+
pyspark-version: [ "3.4" , "3.5" ]
131131

132132
name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
133133

tests/backends/spark_backend/test_descriptions_spark.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from ydata_profiling.config import SparkSettings
99
from ydata_profiling.model.describe import describe
10+
from pyspark.sql.types import TimestampType
1011

1112
check_is_NaN = "ydata_profiling.check_is_NaN"
1213

@@ -41,15 +42,15 @@ def describe_data():
4142
"s1": np.ones(9),
4243
"s2": ["some constant text $ % value {obj} " for _ in range(1, 10)],
4344
"somedate": [
44-
datetime.datetime(2011, 7, 4),
45-
datetime.datetime(2022, 1, 1, 13, 57),
46-
datetime.datetime(1990, 12, 9),
45+
datetime.date(2011, 7, 4),
46+
datetime.date(2011, 7, 2),
47+
datetime.date(1990, 12, 9),
4748
pd.NaT,
48-
datetime.datetime(1990, 12, 9),
49-
datetime.datetime(1970, 12, 9),
50-
datetime.datetime(1972, 1, 2),
51-
datetime.datetime(1970, 12, 9),
52-
datetime.datetime(1970, 12, 9),
49+
datetime.date(1990, 12, 9),
50+
datetime.date(1970, 12, 9),
51+
datetime.date(1972, 1, 2),
52+
datetime.date(1970, 12, 9),
53+
datetime.date(1970, 12, 9),
5354
],
5455
"bool_tf": [True, True, False, True, False, True, True, False, True],
5556
"bool_tf_with_nan": [
@@ -370,13 +371,15 @@ def test_describe_spark_df(
370371

371372
if column == "mixed":
372373
describe_data[column] = [str(i) for i in describe_data[column]]
373-
if column == "bool_tf_with_nan":
374+
elif column == "bool_tf_with_nan":
374375
describe_data[column] = [
375376
True if i else False for i in describe_data[column] # noqa: SIM210
376377
]
378+
377379
pdf= pd.DataFrame({column: describe_data[column]})# Convert to Pandas DataFrame
378380
# Ensure NaNs are replaced with None (Spark does not support NaN in non-float columns)
379381
pdf = pdf.where(pd.notna(pdf), None)
382+
380383
sdf = spark_session.createDataFrame(pdf)
381384

382385
results = describe(cfg, sdf, summarizer_spark, typeset)

0 commit comments

Comments
 (0)