Skip to content

Commit fb4f998

Browse files
committed
resolve some problems with tests
1 parent 93ae583 commit fb4f998

8 files changed

Lines changed: 97 additions & 73 deletions

File tree

src/ydata_profiling/model/alerts.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
from ydata_profiling.config import Settings
1010
from ydata_profiling.model.correlations import perform_check_correlation
11-
from ydata_profiling.model.var_description.default import VarDescription
11+
from ydata_profiling.model.var_description.default import (
12+
VarDescription,
13+
VarDescriptionHashable,
14+
)
1215

1316

1417
def fmt_percent(value: float, edge_cases: bool = True) -> str:
@@ -163,7 +166,7 @@ def _get_description(self) -> str:
163166
class ConstantAlert(Alert):
164167
def __init__(
165168
self,
166-
values: VarDescription,
169+
values: VarDescriptionHashable,
167170
column_name: Optional[str] = None,
168171
is_empty: bool = False,
169172
):
@@ -221,7 +224,7 @@ def _get_description(self) -> str:
221224
class HighCardinalityAlert(Alert):
222225
def __init__(
223226
self,
224-
values: VarDescription,
227+
values: VarDescriptionHashable,
225228
column_name: Optional[str] = None,
226229
is_empty: bool = False,
227230
):
@@ -429,7 +432,7 @@ def _get_description(self) -> str:
429432
class UniqueAlert(Alert):
430433
def __init__(
431434
self,
432-
values: VarDescription,
435+
values: VarDescriptionHashable,
433436
column_name: Optional[str] = None,
434437
is_empty: bool = False,
435438
):
@@ -532,7 +535,7 @@ def check_table_alerts(table: dict) -> List[Alert]:
532535
return alerts
533536

534537

535-
def numeric_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
538+
def numeric_alerts(config: Settings, summary: VarDescriptionHashable) -> List[Alert]:
536539
alerts: List[Alert] = []
537540

538541
# Skewness
@@ -556,7 +559,7 @@ def numeric_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
556559
return alerts
557560

558561

559-
def timeseries_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
562+
def timeseries_alerts(config: Settings, summary: VarDescriptionHashable) -> List[Alert]:
560563
alerts: List[Alert] = numeric_alerts(config, summary)
561564

562565
if not summary["stationary"]:
@@ -568,7 +571,9 @@ def timeseries_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
568571
return alerts
569572

570573

571-
def categorical_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
574+
def categorical_alerts(
575+
config: Settings, summary: VarDescriptionHashable
576+
) -> List[Alert]:
572577
alerts: List[Alert] = []
573578

574579
# High cardinality
@@ -597,7 +602,7 @@ def categorical_alerts(config: Settings, summary: VarDescription) -> List[Alert]
597602
return alerts
598603

599604

600-
def boolean_alerts(config: Settings, summary: VarDescription) -> List[Alert]:
605+
def boolean_alerts(config: Settings, summary: VarDescriptionHashable) -> List[Alert]:
601606
alerts: List[Alert] = []
602607

603608
if (
@@ -618,7 +623,7 @@ def generic_alerts(summary: VarDescription) -> List[Alert]:
618623
return alerts
619624

620625

621-
def supported_alerts(summary: VarDescription) -> List[Alert]:
626+
def supported_alerts(summary: VarDescriptionHashable) -> List[Alert]:
622627
alerts: List[Alert] = []
623628

624629
if summary.n_distinct == summary.n:
@@ -637,7 +642,7 @@ def unsupported_alerts(summary: VarDescription) -> List[Alert]:
637642

638643

639644
def check_variable_alerts(
640-
config: Settings, col: str, description: VarDescription
645+
config: Settings, col: str, description: VarDescription | VarDescriptionHashable
641646
) -> List[Alert]:
642647
"""Checks individual variables for alerts.
643648
@@ -654,7 +659,7 @@ def check_variable_alerts(
654659

655660
if description["type"] == "Unsupported":
656661
alerts += unsupported_alerts(description)
657-
else:
662+
elif isinstance(description, VarDescriptionHashable):
658663
alerts += supported_alerts(description)
659664

660665
if description["type"] == "Categorical":
@@ -665,6 +670,8 @@ def check_variable_alerts(
665670
alerts += timeseries_alerts(config, description)
666671
if description["type"] == "Boolean":
667672
alerts += boolean_alerts(config, description)
673+
else:
674+
raise ValueError("description should be 'VarDescriptionHashable'")
668675

669676
for idx in range(len(alerts)):
670677
alerts[idx].column_name = col

src/ydata_profiling/model/description.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
from datetime import datetime, timedelta
33
from typing import Any, Dict, List, Optional, Union
44

5-
from ydata_profiling.model.var_description.default import VarDescription
5+
from ydata_profiling.model.var_description.default import (
6+
VarDescription,
7+
VarDescriptionHashable,
8+
)
69

710

811
@dataclass
@@ -98,7 +101,7 @@ class BaseDescription:
98101
analysis: BaseAnalysis
99102
time_index_analysis: Optional[TimeIndexAnalysis]
100103
table: Any
101-
variables: Dict[str, VarDescription]
104+
variables: Dict[str, Union[VarDescriptionHashable, VarDescription]]
102105
scatter: Any
103106
correlations: Dict[str, Any]
104107
missing: Dict[str, Any]
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
from dataclasses import dataclass
2-
from typing import Any
2+
from typing import Any, Union
33

44

55
@dataclass
66
class VarCounts:
77
"""Data about counts in variable column."""
88

9-
n: int
9+
n: Union[int, list]
1010
"""Count of rows in the series."""
11-
count: int
11+
count: Union[int, list]
1212
"""Count of not missing rows in the series."""
13-
n_missing: int
13+
n_missing: Union[int, list]
1414
"""Count of missing rows in the series."""
15-
p_missing: float
15+
p_missing: Union[float, list]
1616
"""Proportion of missing rows in the series."""
1717

18-
hashable: bool
18+
hashable: Union[bool, list]
1919
value_counts_without_nan: Any
2020
"""Counts of values in the series without NaN. Values as index, counts as values."""
2121
value_counts_index_sorted: Any
2222
"""Sorted counts of values in the series without NaN. Sorted by counts."""
23-
ordering: bool
24-
memory_size: int
23+
ordering: Union[bool, list]
24+
memory_size: Union[int, list]

tests/unit/test_comparison.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,5 +66,6 @@ def test_generate_comparison():
6666

6767
p1 = ProfileReport(df1, title="p1")
6868
p2 = ProfileReport(df2, title="p1")
69-
html = p1.compare(p2).to_html()
69+
_compare = p1.compare(p2)
70+
html = _compare.to_html()
7071
assert len(html) > 0

tests/unit/test_describe.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@ def test_describe_unique(data, expected, summarizer, typeset):
5252
desc_1d = describe_1d(config, data, summarizer, typeset)
5353
if expected["is_unique"] is not None:
5454
assert (
55-
desc_1d["p_unique"] == expected["p_unique"]
55+
desc_1d.p_unique == expected["p_unique"]
5656
), "Describe 1D p_unique incorrect"
5757
assert (
58-
desc_1d["p_distinct"] == expected["p_distinct"]
58+
desc_1d.p_distinct == expected["p_distinct"]
5959
), "Describe 1D p_distinct incorrect"
6060
assert (
61-
desc_1d["is_unique"] == expected["is_unique"]
61+
desc_1d.is_unique == expected["is_unique"]
6262
), "Describe 1D should return unique"
6363

6464

@@ -562,6 +562,13 @@ def test_describe_df(column, describe_data, expected_results, summarizer):
562562
for k, v in expected_results[column].items():
563563
if v == check_is_NaN:
564564
test_condition = k not in results.variables[column]
565+
# values from common description
566+
elif k in asdict(results.variables[column]):
567+
if isinstance(v, float):
568+
assert pytest.approx(v) == getattr(results.variables[column], k)
569+
else:
570+
assert v == getattr(results.variables[column], k)
571+
continue
565572
elif isinstance(v, float):
566573
test_condition = pytest.approx(v) == results.variables[column][k]
567574
else:

tests/unit/test_ge_integration_expectations.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from unittest.mock import Mock, patch
1+
from unittest.mock import MagicMock, Mock, patch
22

33
import pytest
44

@@ -20,14 +20,20 @@ def batch():
2020

2121

2222
def test_generic_expectations(batch):
23-
generic_expectations("column", {"n_missing": 0, "p_unique": 1.0}, batch)
23+
default_desc = MagicMock()
24+
default_desc.n_missing = 0
25+
default_desc.p_unique = 1.0
26+
generic_expectations("column", default_desc, batch)
2427
batch.expect_column_to_exist.assert_called_once()
2528
batch.expect_column_values_to_not_be_null.assert_called_once()
2629
batch.expect_column_values_to_be_unique.assert_called_once()
2730

2831

2932
def test_generic_expectations_min(batch):
30-
generic_expectations("column", {"n_missing": 1, "p_unique": 0.5}, batch)
33+
default_desc = MagicMock()
34+
default_desc.n_missing = 1
35+
default_desc.p_unique = 0.5
36+
generic_expectations("column", default_desc, batch)
3137
batch.expect_column_to_exist.assert_called_once()
3238
batch.expect_column_values_to_not_be_null.assert_not_called()
3339
batch.expect_column_values_to_be_unique.assert_not_called()
@@ -93,22 +99,21 @@ def test_numeric_expectations_min(batch):
9399

94100

95101
def test_categorical_expectations(batch):
96-
categorical_expectations(
97-
"column",
98-
{
99-
"n_distinct": 1,
100-
"p_distinct": 0.1,
101-
"value_counts_without_nan": {"val1": 1, "val2": 2},
102-
},
103-
batch,
104-
)
102+
default_desc = MagicMock()
103+
default_desc.n_distinct = 1
104+
default_desc.p_distinct = 0.1
105+
default_desc.value_counts_without_nan = {"val1": 1, "val2": 2}
106+
categorical_expectations("column", default_desc, batch)
105107
batch.expect_column_values_to_be_in_set.assert_called_once_with(
106108
"column", {"val1", "val2"}
107109
)
108110

109111

110112
def test_categorical_expectations_min(batch):
111-
categorical_expectations("column", {"n_distinct": 15, "p_distinct": 1.0}, batch)
113+
default_desc = MagicMock()
114+
default_desc.n_distinct = 15
115+
default_desc.p_distinct = 1.0
116+
categorical_expectations("column", default_desc, batch)
112117
batch.expect_column_values_to_be_in_set.assert_not_called()
113118

114119

tests/unit/test_pandas/test_correlations.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from unittest.mock import MagicMock
2+
13
import numpy as np
24
import pandas as pd
35
import pytest
@@ -41,16 +43,24 @@ def test_config():
4143

4244
@pytest.fixture
4345
def test_summary():
44-
summary = {
45-
"float_1": {"type": "Numeric", "n_distinct": 10},
46-
"float_2": {"type": "Numeric", "n_distinct": 10},
47-
"integer_1": {"type": "Numeric", "n_distinct": 10},
48-
"integer_2": {"type": "Numeric", "n_distinct": 10},
49-
"string_1": {"type": "Categorical", "n_distinct": 10},
50-
"string_2": {"type": "Categorical", "n_distinct": 10},
51-
}
46+
# mock numeric description
47+
num_desc = MagicMock()
48+
num_desc.n_distinct = 10
49+
num_desc.__getitem__.return_value = "Numeric"
5250

53-
return summary
51+
# mock categorical description
52+
cat_desc = MagicMock()
53+
cat_desc.n_distinct = 10
54+
cat_desc.__getitem__.return_value = "Categorical"
55+
56+
return {
57+
"float_1": num_desc,
58+
"float_2": num_desc,
59+
"integer_1": num_desc,
60+
"integer_2": num_desc,
61+
"string_1": cat_desc,
62+
"string_2": cat_desc,
63+
}
5464

5565

5666
def test_auto_compute_all(test_config, test_dataframe, test_summary):

tests/unit/test_summary_algos.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,20 @@
22
import pandas as pd
33
import pytest
44

5-
from ydata_profiling.model.summary_algorithms import (
6-
describe_counts,
7-
describe_generic,
8-
describe_supported,
9-
)
5+
from ydata_profiling.model.summary_algorithms import describe_supported
106

117

128
def test_count_summary_sorted(config):
139
s = pd.Series([1] + [2] * 1000)
14-
_, sn, r = describe_counts(config, s, {})
15-
assert r["value_counts_without_nan"].index[0] == 2
16-
assert r["value_counts_without_nan"].index[1] == 1
10+
_, sn, r = describe_supported(config, s, {})
11+
assert r.value_counts_without_nan.index[0] == 2
12+
assert r.value_counts_without_nan.index[1] == 1
1713

1814

1915
def test_count_summary_nat(config):
2016
s = pd.to_datetime(pd.Series([1, 2] + [np.nan, pd.NaT]))
21-
_, sn, r = describe_counts(config, s, {})
22-
assert len(r["value_counts_without_nan"].index) == 2
17+
_, sn, r = describe_supported(config, s, {})
18+
assert len(r.value_counts_without_nan.index) == 2
2319

2420

2521
def test_count_summary_category(config):
@@ -29,8 +25,8 @@ def test_count_summary_category(config):
2925
categories=["Poor", "Neutral", "Excellent"],
3026
)
3127
)
32-
_, sn, r = describe_counts(config, s, {})
33-
assert len(r["value_counts_without_nan"].index) == 2
28+
_, sn, r = describe_supported(config, s, {})
29+
assert len(r.value_counts_without_nan.index) == 2
3430

3531

3632
@pytest.fixture(scope="class")
@@ -39,17 +35,12 @@ def empty_data() -> pd.DataFrame:
3935

4036

4137
def test_summary_supported_empty_df(config, empty_data):
42-
_, series, summary = describe_counts(config, empty_data["A"], {})
43-
assert summary["n_missing"] == 0
44-
assert "p_missing" not in summary
45-
46-
_, series, summary = describe_generic(config, series, summary)
47-
assert summary["n_missing"] == 0
48-
assert summary["p_missing"] == 0
49-
assert summary["count"] == 0
50-
51-
_, _, summary = describe_supported(config, series, summary)
52-
assert summary["n_distinct"] == 0
53-
assert summary["p_distinct"] == 0
54-
assert summary["n_unique"] == 0
55-
assert not summary["is_unique"]
38+
_, _, summary = describe_supported(config, empty_data["A"], {})
39+
assert summary.n_missing == 0
40+
assert summary.n_missing == 0
41+
assert summary.p_missing == 0
42+
assert summary.count == 0
43+
assert summary.n_distinct == 0
44+
assert summary.p_distinct == 0
45+
assert summary.n_unique == 0
46+
assert not summary.is_unique

0 commit comments

Comments
 (0)