11from __future__ import annotations
22
3- from dataclasses import dataclass
4-
53import pandas as pd
64
75from ydata_profiling .config import Settings
8- from ydata_profiling .model .pandas .var_description .counts_pandas import VarCountsPandas
6+ from ydata_profiling .model .pandas .var_description .counts_pandas import get_counts_pandas
97from ydata_profiling .model .var_description .default import (
108 VarDescription ,
119 VarDescriptionHashable ,
1210)
1311
1412
15- @dataclass
16- class VarDescriptionPandas (VarDescription ):
17- """Default description for pandas columns."""
18-
19- @classmethod
20- def from_var_counts (
21- cls , var_counts : VarCountsPandas , init_dict : dict
22- ) -> VarDescriptionPandas :
23- """Get a default description from a VarCountsPandas object."""
24- return VarDescriptionPandas (
25- n = var_counts .n ,
26- count = var_counts .count ,
27- n_missing = var_counts .n_missing ,
28- p_missing = var_counts .p_missing ,
29- hashable = var_counts .hashable ,
30- memory_size = var_counts .memory_size ,
31- ordering = var_counts .ordering ,
32- value_counts_index_sorted = var_counts .value_counts_index_sorted ,
33- value_counts_without_nan = var_counts .value_counts_without_nan ,
34- var_specific = init_dict ,
35- )
36-
37-
38- @dataclass
39- class VarDescriptionPandasHashable (VarDescriptionHashable ):
40- """Default description for pandas columns that are hashable (common types)."""
13+ def get_default_pandas_description (
14+ config : Settings , series : pd .Series , init_dict : dict
15+ ) -> VarDescription | VarDescriptionHashable :
16+ var_counts = get_counts_pandas (config , series )
4117
42- @classmethod
43- def from_var_counts (
44- cls , var_counts : VarCountsPandas , init_dict : dict
45- ) -> VarDescriptionPandasHashable :
46- """Get a default description for a hashable column from a VarCountsPandas object."""
47- _count = var_counts .count
18+ if var_counts .hashable :
19+ count = var_counts .count
4820 value_counts = var_counts .value_counts_without_nan
4921 distinct_count = len (value_counts )
5022 unique_count = value_counts .where (value_counts == 1 ).count ()
5123
52- return VarDescriptionPandasHashable (
24+ return VarDescriptionHashable (
5325 n = var_counts .n ,
5426 count = var_counts .count ,
5527 n_missing = var_counts .n_missing ,
@@ -60,19 +32,11 @@ def from_var_counts(
6032 value_counts_index_sorted = var_counts .value_counts_index_sorted ,
6133 value_counts_without_nan = var_counts .value_counts_without_nan ,
6234 n_distinct = distinct_count ,
63- p_distinct = distinct_count / _count if _count > 0 else 0 ,
64- is_unique = unique_count == _count and _count > 0 ,
35+ p_distinct = distinct_count / count if count > 0 else 0 ,
36+ is_unique = unique_count == count and count > 0 ,
6537 n_unique = unique_count ,
66- p_unique = unique_count / _count if _count > 0 else 0 ,
38+ p_unique = unique_count / count if count > 0 else 0 ,
39+ value_counts = None ,
6740 var_specific = init_dict ,
68- value_counts = var_counts .value_counts ,
6941 )
70-
71-
72- def get_default_pandas_description (
73- config : Settings , series : pd .Series , init_dict : dict
74- ) -> VarDescriptionPandas | VarDescriptionPandasHashable :
75- _var_counts = VarCountsPandas (config , series )
76- if _var_counts .hashable :
77- return VarDescriptionPandasHashable .from_var_counts (_var_counts , init_dict )
78- return VarDescriptionPandas .from_var_counts (_var_counts , init_dict )
42+ return VarDescription .from_var_counts (var_counts , init_dict )
0 commit comments