|
1 | 1 | """Correlations between variables.""" |
| 2 | + |
2 | 3 | import itertools |
3 | 4 | import warnings |
4 | 5 | from typing import Callable, Optional |
|
20 | 21 | DiscretizationType, |
21 | 22 | Discretizer, |
22 | 23 | ) |
| 24 | +from ydata_profiling.model.var_description.default import VarDescription |
23 | 25 |
|
24 | 26 |
|
25 | 27 | @Spearman.compute.register(Settings, pd.DataFrame, dict) |
@@ -84,9 +86,9 @@ def _pairwise_cramers(col_1: pd.Series, col_2: pd.Series) -> float: |
84 | 86 | return _cramers_corrected_stat(pd.crosstab(col_1, col_2), correction=True) |
85 | 87 |
|
86 | 88 |
|
87 | | -@Cramers.compute.register(Settings, pd.DataFrame, dict) |
| 89 | +@Cramers.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
88 | 90 | def pandas_cramers_compute( |
89 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 91 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
90 | 92 | ) -> Optional[pd.DataFrame]: |
91 | 93 | threshold = config.categorical_maximum_correlation_distinct |
92 | 94 |
|
@@ -125,9 +127,9 @@ def pandas_cramers_compute( |
125 | 127 | return correlation_matrix |
126 | 128 |
|
127 | 129 |
|
128 | | -@PhiK.compute.register(Settings, pd.DataFrame, dict) |
| 130 | +@PhiK.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
129 | 131 | def pandas_phik_compute( |
130 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 132 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
131 | 133 | ) -> Optional[pd.DataFrame]: |
132 | 134 | df_cols_dict = {i: list(df.columns).index(i) for i in df.columns} |
133 | 135 |
|
@@ -161,9 +163,9 @@ def pandas_phik_compute( |
161 | 163 | return correlation |
162 | 164 |
|
163 | 165 |
|
164 | | -@Auto.compute.register(Settings, pd.DataFrame, dict) |
| 166 | +@Auto.compute.register(Settings, pd.DataFrame, dict[str, VarDescription]) |
165 | 167 | def pandas_auto_compute( |
166 | | - config: Settings, df: pd.DataFrame, summary: dict |
| 168 | + config: Settings, df: pd.DataFrame, summary: dict[str, VarDescription] |
167 | 169 | ) -> Optional[pd.DataFrame]: |
168 | 170 | threshold = config.categorical_maximum_correlation_distinct |
169 | 171 | numerical_columns = [ |
@@ -192,7 +194,6 @@ def pandas_auto_compute( |
192 | 194 | columns=columns_tested, |
193 | 195 | ) |
194 | 196 | for col_1_name, col_2_name in itertools.combinations(columns_tested, 2): |
195 | | - |
196 | 197 | method = ( |
197 | 198 | _pairwise_spearman |
198 | 199 | if col_1_name and col_2_name not in categorical_columns |
|
0 commit comments