Skip to content

Commit 93ae583

Browse files
committed
replace hashable decorator with description class
1 parent 0f50680 commit 93ae583

9 files changed

Lines changed: 93 additions & 61 deletions

src/ydata_profiling/model/pandas/describe_boolean_pandas.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,14 @@
44

55
from ydata_profiling.config import Settings
66
from ydata_profiling.model.pandas.imbalance_pandas import column_imbalance_score
7-
from ydata_profiling.model.summary_algorithms import (
8-
describe_boolean_1d,
9-
series_hashable,
10-
)
11-
from ydata_profiling.model.var_description.default import VarDescription
7+
from ydata_profiling.model.summary_algorithms import describe_boolean_1d
8+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
129

1310

1411
@describe_boolean_1d.register
15-
@series_hashable
1612
def pandas_describe_boolean_1d(
17-
config: Settings, series: pd.Series, summary: VarDescription
18-
) -> Tuple[Settings, pd.Series, VarDescription]:
13+
config: Settings, series: pd.Series, summary: VarDescriptionHashable
14+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
1915
"""Describe a boolean series.
2016
2117
Args:

src/ydata_profiling/model/pandas/describe_categorical_pandas.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414
describe_categorical_1d,
1515
histogram_compute,
1616
series_handle_nulls,
17-
series_hashable,
1817
)
19-
from ydata_profiling.model.var_description.default import VarDescription
18+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
2019

2120

2221
def get_character_counts_vc(vc: pd.Series) -> pd.Series:
@@ -208,11 +207,10 @@ def length_summary_vc(vc: pd.Series) -> dict:
208207

209208

210209
@describe_categorical_1d.register
211-
@series_hashable
212210
@series_handle_nulls
213211
def pandas_describe_categorical_1d(
214-
config: Settings, series: pd.Series, summary: VarDescription
215-
) -> Tuple[Settings, pd.Series, VarDescription]:
212+
config: Settings, series: pd.Series, summary: VarDescriptionHashable
213+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
216214
"""Describe a categorical series.
217215
218216
Args:

src/ydata_profiling/model/pandas/describe_date_pandas.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,15 @@
99
describe_date_1d,
1010
histogram_compute,
1111
series_handle_nulls,
12-
series_hashable,
1312
)
14-
from ydata_profiling.model.var_description.default import VarDescription
13+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
1514

1615

1716
@describe_date_1d.register
18-
@series_hashable
1917
@series_handle_nulls
2018
def pandas_describe_date_1d(
21-
config: Settings, series: pd.Series, summary: VarDescription
22-
) -> Tuple[Settings, pd.Series, VarDescription]:
19+
config: Settings, series: pd.Series, summary: VarDescriptionHashable
20+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
2321
"""Describe a date series.
2422
2523
Args:

src/ydata_profiling/model/pandas/describe_numeric_pandas.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pandas as pd
55

6-
from ydata_profiling.model.var_description.default import VarDescription
6+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
77
from ydata_profiling.utils.compat import pandas_version_info
88

99
if pandas_version_info() >= (1, 5):
@@ -17,7 +17,6 @@
1717
describe_numeric_1d,
1818
histogram_compute,
1919
series_handle_nulls,
20-
series_hashable,
2120
)
2221

2322

@@ -66,11 +65,10 @@ def numeric_stats_numpy(
6665

6766

6867
@describe_numeric_1d.register
69-
@series_hashable
7068
@series_handle_nulls
7169
def pandas_describe_numeric_1d(
72-
config: Settings, series: pd.Series, summary: VarDescription
73-
) -> Tuple[Settings, pd.Series, VarDescription]:
70+
config: Settings, series: pd.Series, summary: VarDescriptionHashable
71+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
7472
"""Describe a numeric series.
7573
7674
Args:

src/ydata_profiling/model/pandas/describe_supported_pandas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from ydata_profiling.config import Settings
66
from ydata_profiling.model.pandas.var_description.default_pandas import (
7-
VarDescriptionPandas,
7+
get_default_pandas_description,
88
)
99
from ydata_profiling.model.summary_algorithms import describe_supported
1010
from ydata_profiling.model.var_description.default import VarDescription
@@ -25,6 +25,6 @@ def pandas_describe_supported(
2525
A dict containing calculated series description values.
2626
"""
2727

28-
series_description = VarDescriptionPandas(config, series, description)
28+
series_description = get_default_pandas_description(config, series, description)
2929

3030
return config, series, series_description

src/ydata_profiling/model/pandas/describe_text_pandas.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,17 @@
1212
describe_text_1d,
1313
histogram_compute,
1414
series_handle_nulls,
15-
series_hashable,
1615
)
17-
from ydata_profiling.model.var_description.default import VarDescription
16+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
1817

1918

2019
@describe_text_1d.register
21-
@series_hashable
2220
@series_handle_nulls
2321
def pandas_describe_text_1d(
2422
config: Settings,
2523
series: pd.Series,
26-
summary: VarDescription,
27-
) -> Tuple[Settings, pd.Series, VarDescription]:
24+
summary: VarDescriptionHashable,
25+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
2826
"""Describe string series.
2927
3028
Args:

src/ydata_profiling/model/pandas/describe_timeseries_pandas.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
describe_numeric_1d,
1212
describe_timeseries_1d,
1313
series_handle_nulls,
14-
series_hashable,
1514
)
16-
from ydata_profiling.model.var_description.default import VarDescription
15+
from ydata_profiling.model.var_description.default import VarDescriptionHashable
1716

1817

1918
def stationarity_test(config: Settings, series: pd.Series) -> Tuple[bool, float]:
@@ -191,11 +190,10 @@ def compute_gap_stats(series: pd.Series) -> pd.Series:
191190

192191

193192
@describe_timeseries_1d.register
194-
@series_hashable
195193
@series_handle_nulls
196194
def pandas_describe_timeseries_1d(
197-
config: Settings, series: pd.Series, summary: VarDescription
198-
) -> Tuple[Settings, pd.Series, VarDescription]:
195+
config: Settings, series: pd.Series, summary: VarDescriptionHashable
196+
) -> Tuple[Settings, pd.Series, VarDescriptionHashable]:
199197
"""Describe a timeseries.
200198
201199
Args:
Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,77 @@
1+
from __future__ import annotations
2+
13
from dataclasses import dataclass
24

35
import pandas as pd
46

57
from ydata_profiling.config import Settings
68
from ydata_profiling.model.pandas.var_description.counts_pandas import VarCountsPandas
7-
from ydata_profiling.model.var_description.default import VarDescription
9+
from ydata_profiling.model.var_description.default import (
10+
VarDescription,
11+
VarDescriptionHashable,
12+
)
813

914

1015
@dataclass
1116
class VarDescriptionPandas(VarDescription):
12-
"""Default description for pandas columns"""
17+
"""Default description for pandas columns."""
18+
19+
@classmethod
20+
def from_var_counts(
21+
cls, var_counts: VarCountsPandas, init_dict: dict
22+
) -> VarDescriptionPandas:
23+
"""Get a default description from a VarCountsPandas object."""
24+
return VarDescriptionPandas(
25+
n=var_counts.n,
26+
count=var_counts.count,
27+
n_missing=var_counts.n_missing,
28+
p_missing=var_counts.p_missing,
29+
hashable=var_counts.hashable,
30+
memory_size=var_counts.memory_size,
31+
ordering=var_counts.ordering,
32+
value_counts_index_sorted=var_counts.value_counts_index_sorted,
33+
value_counts_without_nan=var_counts.value_counts_without_nan,
34+
var_specific=init_dict,
35+
)
1336

14-
def __init__(self, config: Settings, series: pd.Series, init_dict: dict):
15-
_var_counts = VarCountsPandas(config, series)
1637

17-
_count = _var_counts.count
18-
value_counts = _var_counts.value_counts_without_nan
38+
@dataclass
39+
class VarDescriptionPandasHashable(VarDescriptionHashable):
40+
"""Default description for pandas columns that are hashable (common types)."""
41+
42+
@classmethod
43+
def from_var_counts(
44+
cls, var_counts: VarCountsPandas, init_dict: dict
45+
) -> VarDescriptionPandasHashable:
46+
"""Get a default description for a hashable column from a VarCountsPandas object."""
47+
_count = var_counts.count
48+
value_counts = var_counts.value_counts_without_nan
1949
distinct_count = len(value_counts)
2050
unique_count = value_counts.where(value_counts == 1).count()
2151

22-
super().__init__(
23-
n=_var_counts.n,
24-
count=_var_counts.count,
25-
n_missing=_var_counts.n_missing,
26-
p_missing=_var_counts.p_missing,
27-
hashable=_var_counts.hashable,
28-
memory_size=_var_counts.memory_size,
29-
ordering=_var_counts.ordering,
30-
value_counts_index_sorted=_var_counts.value_counts_index_sorted,
31-
value_counts_without_nan=_var_counts.value_counts_without_nan,
52+
return VarDescriptionPandasHashable(
53+
n=var_counts.n,
54+
count=var_counts.count,
55+
n_missing=var_counts.n_missing,
56+
p_missing=var_counts.p_missing,
57+
hashable=var_counts.hashable,
58+
memory_size=var_counts.memory_size,
59+
ordering=var_counts.ordering,
60+
value_counts_index_sorted=var_counts.value_counts_index_sorted,
61+
value_counts_without_nan=var_counts.value_counts_without_nan,
3262
n_distinct=distinct_count,
3363
p_distinct=distinct_count / _count if _count > 0 else 0,
3464
is_unique=unique_count == _count and _count > 0,
3565
n_unique=unique_count,
3666
p_unique=unique_count / _count if _count > 0 else 0,
3767
var_specific=init_dict,
3868
)
69+
70+
71+
def get_default_pandas_description(
72+
config: Settings, series: pd.Series, init_dict: dict
73+
) -> VarDescriptionPandas | VarDescriptionPandasHashable:
74+
_var_counts = VarCountsPandas(config, series)
75+
if _var_counts.hashable:
76+
return VarDescriptionPandasHashable.from_var_counts(_var_counts, init_dict)
77+
return VarDescriptionPandas.from_var_counts(_var_counts, init_dict)

src/ydata_profiling/model/var_description/default.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,6 @@ class VarDescription(VarCounts):
1212
"""Default description for one data column.
1313
Extends VarCounts class with information about distinct and unique values."""
1414

15-
n_distinct: int | list
16-
"""Number of distinct values"""
17-
p_distinct: float | list
18-
"""Proportion of distinct values"""
19-
is_unique: bool | list
20-
"""Whether the variable values are unique"""
21-
n_unique: int | list
22-
"""Number of unique values"""
23-
p_unique: float | list
24-
"""Proportion of unique values"""
2515
var_specific: dict
2616

2717
def __getitem__(self, item: str):
@@ -47,3 +37,20 @@ def get(self, key: str, default: Any = None) -> Any:
4737
def __iter__(self) -> Iterator:
4838
"""To support old dict like interface."""
4939
return self.var_specific.__iter__()
40+
41+
42+
@dataclass
43+
class VarDescriptionHashable(VarDescription):
44+
"""Default description for one data column that is hashable (common types).
45+
Extends VarCounts class with information about distinct and unique values."""
46+
47+
n_distinct: int | list | None
48+
"""Number of distinct values"""
49+
p_distinct: float | list | None
50+
"""Proportion of distinct values"""
51+
is_unique: bool | list | None
52+
"""Whether the variable values are unique"""
53+
n_unique: int | list | None
54+
"""Number of unique values"""
55+
p_unique: float | list | None
56+
"""Proportion of unique values"""

0 commit comments

Comments
 (0)