Skip to content

Commit 1c94d33

Browse files
Fabiana Clementefabclmnt
authored andcommitted
fix: fix correlations issues whenever strings are present in the dataset.
1 parent 9027235 commit 1c94d33

1 file changed

Lines changed: 7 additions & 4 deletions

File tree

src/ydata_profiling/model/pandas/correlations_pandas.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,24 @@
2626
def pandas_spearman_compute(
2727
config: Settings, df: pd.DataFrame, summary: dict
2828
) -> Optional[pd.DataFrame]:
29-
return df.corr(method="spearman")
29+
df_aux = df.select_dtypes(include="number").copy()
30+
return df_aux.corr(method="spearman")
3031

3132

3233
@Pearson.compute.register(Settings, pd.DataFrame, dict)
3334
def pandas_pearson_compute(
3435
config: Settings, df: pd.DataFrame, summary: dict
3536
) -> Optional[pd.DataFrame]:
36-
return df.corr(method="pearson")
37+
df_aux = df.select_dtypes(include="number").copy()
38+
return df_aux.corr(method="pearson")
3739

3840

3941
@Kendall.compute.register(Settings, pd.DataFrame, dict)
4042
def pandas_kendall_compute(
4143
config: Settings, df: pd.DataFrame, summary: dict
4244
) -> Optional[pd.DataFrame]:
43-
return df.corr(method="kendall")
45+
df_aux = df.select_dtypes(include="number").copy()
46+
return df_aux.corr(method="kendall")
4447

4548

4649
def _cramers_corrected_stat(confusion_matrix: pd.DataFrame, correction: bool) -> float:
@@ -195,7 +198,7 @@ def pandas_auto_compute(
195198

196199
method = (
197200
_pairwise_spearman
198-
if col_1_name and col_2_name not in categorical_columns
201+
if any(elem in categorical_columns for elem in [col_1_name, col_2_name]) is False
199202
else _pairwise_cramers
200203
)
201204

0 commit comments

Comments
 (0)