|
7 | 7 |
|
8 | 8 | from ydata_profiling.config import SparkSettings |
9 | 9 | from ydata_profiling.model.describe import describe |
| 10 | +from pyspark.sql.types import TimestampType |
10 | 11 |
|
11 | 12 | check_is_NaN = "ydata_profiling.check_is_NaN" |
12 | 13 |
|
@@ -41,15 +42,15 @@ def describe_data(): |
41 | 42 | "s1": np.ones(9), |
42 | 43 | "s2": ["some constant text $ % value {obj} " for _ in range(1, 10)], |
43 | 44 | "somedate": [ |
44 | | - datetime.datetime(2011, 7, 4), |
45 | | - datetime.datetime(2022, 1, 1, 13, 57), |
46 | | - datetime.datetime(1990, 12, 9), |
| 45 | + datetime.date(2011, 7, 4), |
| 46 | + datetime.date(2011, 7, 2), |
| 47 | + datetime.date(1990, 12, 9), |
47 | 48 | pd.NaT, |
48 | | - datetime.datetime(1990, 12, 9), |
49 | | - datetime.datetime(1970, 12, 9), |
50 | | - datetime.datetime(1972, 1, 2), |
51 | | - datetime.datetime(1970, 12, 9), |
52 | | - datetime.datetime(1970, 12, 9), |
| 49 | + datetime.date(1990, 12, 9), |
| 50 | + datetime.date(1970, 12, 9), |
| 51 | + datetime.date(1972, 1, 2), |
| 52 | + datetime.date(1970, 12, 9), |
| 53 | + datetime.date(1970, 12, 9), |
53 | 54 | ], |
54 | 55 | "bool_tf": [True, True, False, True, False, True, True, False, True], |
55 | 56 | "bool_tf_with_nan": [ |
@@ -370,13 +371,15 @@ def test_describe_spark_df( |
370 | 371 |
|
371 | 372 | if column == "mixed": |
372 | 373 | describe_data[column] = [str(i) for i in describe_data[column]] |
373 | | - if column == "bool_tf_with_nan": |
| 374 | + elif column == "bool_tf_with_nan": |
374 | 375 | describe_data[column] = [ |
375 | 376 | True if i else False for i in describe_data[column] # noqa: SIM210 |
376 | 377 | ] |
| 378 | + |
377 | 379 | pdf= pd.DataFrame({column: describe_data[column]})# Convert to Pandas DataFrame |
378 | 380 | # Ensure NaNs are replaced with None (Spark does not support NaN in non-float columns) |
379 | 381 | pdf = pdf.where(pd.notna(pdf), None) |
| 382 | + |
380 | 383 | sdf = spark_session.createDataFrame(pdf) |
381 | 384 |
|
382 | 385 | results = describe(cfg, sdf, summarizer_spark, typeset) |
|
0 commit comments