|
1 | 1 | """Correlations between variables.""" |
| 2 | + |
2 | 3 | import itertools |
3 | 4 | import warnings |
4 | 5 | from typing import Callable, Optional |
|
20 | 21 | DiscretizationType, |
21 | 22 | Discretizer, |
22 | 23 | ) |
| 24 | +from ydata_profiling.model.var_description.default import VarDescription |
23 | 25 |
|
24 | 26 |
|
25 | 27 | @Spearman.compute.register(Settings, pd.DataFrame, dict) |
@@ -87,9 +89,9 @@ def _pairwise_cramers(col_1: pd.Series, col_2: pd.Series) -> float: |
87 | 89 | return _cramers_corrected_stat(pd.crosstab(col_1, col_2), correction=True) |
88 | 90 |
|
89 | 91 |
|
90 | | -@Cramers.compute.register(Settings, pd.DataFrame, dict) |
| 92 | +@Cramers.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
91 | 93 | def pandas_cramers_compute( |
92 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 94 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
93 | 95 | ) -> Optional[pd.DataFrame]: |
94 | 96 | threshold = config.categorical_maximum_correlation_distinct |
95 | 97 |
|
@@ -128,9 +130,9 @@ def pandas_cramers_compute( |
128 | 130 | return correlation_matrix |
129 | 131 |
|
130 | 132 |
|
131 | | -@PhiK.compute.register(Settings, pd.DataFrame, dict) |
| 133 | +@PhiK.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
132 | 134 | def pandas_phik_compute( |
133 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 135 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
134 | 136 | ) -> Optional[pd.DataFrame]: |
135 | 137 | df_cols_dict = {i: list(df.columns).index(i) for i in df.columns} |
136 | 138 |
|
@@ -164,9 +166,9 @@ def pandas_phik_compute( |
164 | 166 | return correlation |
165 | 167 |
|
166 | 168 |
|
167 | | -@Auto.compute.register(Settings, pd.DataFrame, dict) |
| 169 | +@Auto.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
168 | 170 | def pandas_auto_compute( |
169 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 171 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
170 | 172 | ) -> Optional[pd.DataFrame]: |
171 | 173 | threshold = config.categorical_maximum_correlation_distinct |
172 | 174 | numerical_columns = [ |
@@ -195,7 +197,6 @@ def pandas_auto_compute( |
195 | 197 | columns=columns_tested, |
196 | 198 | ) |
197 | 199 | for col_1_name, col_2_name in itertools.combinations(columns_tested, 2): |
198 | | - |
199 | 200 | method = ( |
200 | 201 | _pairwise_spearman |
201 | 202 | if any(elem in categorical_columns for elem in [col_1_name, col_2_name]) |
|
0 commit comments