Skip to content

Commit 80a1024

Browse files
ricardodcpereiraaquemy
authored andcommitted
fix: remove the duplicated cardinality threshold under categorical and text settings
1 parent f4886a2 commit 80a1024

2 files changed

Lines changed: 5 additions & 6 deletions

File tree

src/ydata_profiling/config.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,16 @@ class TextVars(BaseModel):
5050
words: bool = True
5151
characters: bool = True
5252
redact: bool = False
53-
# if text has more than threshold categories, its not category
54-
categorical_threshold: int = 50
55-
# if text has more than threshold % distinct values, its not category
56-
percentage_cat_threshold: float = 0.5
5753

5854

5955
class CatVars(BaseModel):
6056
length: bool = True
6157
characters: bool = True
6258
words: bool = True
59+
# if var has more than threshold categories, it's a text var
6360
cardinality_threshold: int = 50
61+
# if var has more than threshold % distinct values, it's a text var
62+
percentage_cat_threshold: float = 0.5
6463
imbalance_threshold: float = 0.5
6564
n_obs: int = 5
6665
# Set to zero to disable

src/ydata_profiling/model/typeset_relations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ def string_is_category(series: pd.Series, state: dict, k: Settings) -> bool:
7373
- (distinct values / count of all values) is less than threshold
7474
- is not bool"""
7575
n_unique = series.nunique()
76-
unique_threshold = k.vars.text.percentage_cat_threshold
77-
threshold = k.vars.text.categorical_threshold
76+
unique_threshold = k.vars.cat.percentage_cat_threshold
77+
threshold = k.vars.cat.cardinality_threshold
7878
return (
7979
1 <= n_unique <= threshold
8080
and (

0 commit comments

Comments
 (0)