Skip to content

Commit b0dc709

Browse files
committed
feat: update tests
1 parent b3e7120 commit b0dc709

5 files changed

Lines changed: 77 additions & 38 deletions

File tree

tests/unit/test_comparison.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def test_generate_comparison():
6666

6767
p1 = ProfileReport(df1, title="p1")
6868
p2 = ProfileReport(df2, title="p1")
69-
html = p1.compare(p2).to_html()
69+
_compare = p1.compare(p2)
70+
html = _compare.to_html()
7071
assert len(html) > 0
7172

7273

tests/unit/test_describe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from ydata_profiling.model.describe import describe
1010
from ydata_profiling.model.summary import describe_1d
1111
from ydata_profiling.model.typeset import ProfilingTypeSet
12+
from ydata_profiling.model.var_description.default import VarDescription
1213

1314
check_is_NaN = "ydata_profiling.check_is_NaN"
1415

@@ -49,7 +50,7 @@ def test_describe_unique(data, expected, summarizer, typeset):
4950
config = Settings()
5051
config.vars.num.low_categorical_threshold = 0
5152

52-
desc_1d = describe_1d(config, data, summarizer, typeset)
53+
desc_1d: VarDescription = describe_1d(config, data, summarizer, typeset)
5354
if expected["is_unique"] is not None:
5455
assert (
5556
desc_1d["p_unique"] == expected["p_unique"]
@@ -562,6 +563,13 @@ def test_describe_df(column, describe_data, expected_results, summarizer):
562563
for k, v in expected_results[column].items():
563564
if v == check_is_NaN:
564565
test_condition = k not in results.variables[column]
566+
# values from common description
567+
elif k in asdict(results.variables[column]):
568+
if isinstance(v, float):
569+
assert pytest.approx(v) == getattr(results.variables[column], k)
570+
else:
571+
assert v == getattr(results.variables[column], k)
572+
continue
565573
elif isinstance(v, float):
566574
test_condition = pytest.approx(v) == results.variables[column][k]
567575
else:

tests/unit/test_ge_integration_expectations.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from unittest.mock import Mock, patch
1+
from unittest.mock import MagicMock, Mock, patch
22

33
import pytest
44

@@ -20,14 +20,22 @@ def batch():
2020

2121

2222
def test_generic_expectations(batch):
23-
generic_expectations("column", {"n_missing": 0, "p_unique": 1.0}, batch)
23+
default_desc = MagicMock()
24+
default_desc.n_missing = 0
25+
d = {"p_unique": 1.0}
26+
default_desc.__getitem__.side_effect = d.__getitem__
27+
generic_expectations("column", default_desc, batch)
2428
batch.expect_column_to_exist.assert_called_once()
2529
batch.expect_column_values_to_not_be_null.assert_called_once()
2630
batch.expect_column_values_to_be_unique.assert_called_once()
2731

2832

2933
def test_generic_expectations_min(batch):
30-
generic_expectations("column", {"n_missing": 1, "p_unique": 0.5}, batch)
34+
default_desc = MagicMock()
35+
default_desc.n_missing = 1
36+
d = {"p_unique": 0.5}
37+
default_desc.__getitem__.side_effect = d.__getitem__
38+
generic_expectations("column", default_desc, batch)
3139
batch.expect_column_to_exist.assert_called_once()
3240
batch.expect_column_values_to_not_be_null.assert_not_called()
3341
batch.expect_column_values_to_be_unique.assert_not_called()
@@ -93,22 +101,21 @@ def test_numeric_expectations_min(batch):
93101

94102

95103
def test_categorical_expectations(batch):
96-
categorical_expectations(
97-
"column",
98-
{
99-
"n_distinct": 1,
100-
"p_distinct": 0.1,
101-
"value_counts_without_nan": {"val1": 1, "val2": 2},
102-
},
103-
batch,
104-
)
104+
default_desc = MagicMock()
105+
d = {"n_distinct": 1, "p_unique": 0.1}
106+
default_desc.__getitem__.side_effect = d.__getitem__
107+
default_desc.value_counts_without_nan = {"val1": 1, "val2": 2}
108+
categorical_expectations("column", default_desc, batch)
105109
batch.expect_column_values_to_be_in_set.assert_called_once_with(
106110
"column", {"val1", "val2"}
107111
)
108112

109113

110114
def test_categorical_expectations_min(batch):
111-
categorical_expectations("column", {"n_distinct": 15, "p_distinct": 1.0}, batch)
115+
default_desc = MagicMock()
116+
d = {"n_distinct": 15, "p_distinct": 1.0}
117+
default_desc.__getitem__.side_effect = d.__getitem__
118+
categorical_expectations("column", default_desc, batch)
112119
batch.expect_column_values_to_be_in_set.assert_not_called()
113120

114121

tests/unit/test_summarizer.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22

33
import pandas as pd
44

5+
from ydata_profiling.config import Settings
56
from ydata_profiling.model.summarizer import PandasProfilingSummarizer, format_summary
67
from ydata_profiling.model.typeset import ProfilingTypeSet
78

89
base_path = os.path.abspath(os.path.dirname(__file__))
910

1011

11-
def test_summarizer(config):
12+
def test_summarizer_base_types(config: Settings):
1213
pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
1314

1415
_ = format_summary(pps.summarize(config, pd.Series([1, 2, 3, 4, 5]), "Unsupported"))
@@ -23,9 +24,23 @@ def test_summarizer(config):
2324
_ = format_summary(
2425
pps.summarize(config, pd.Series(["abc", "abc", "abba"]), "Categorical")
2526
)
27+
28+
_ = format_summary(
29+
pps.summarize(config, pd.Series([True, False, True, False, False]), "Boolean")
30+
)
31+
32+
33+
def test_summarizer_url(config: Settings):
34+
config.vars.url.active = True
35+
pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
2636
_ = format_summary(
2737
pps.summarize(config, pd.Series(["https://www.example.com"]), "URL")
2838
)
39+
40+
41+
def test_summarizer_path(config: Settings):
42+
config.vars.path.active = True
43+
pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
2944
_ = format_summary(
3045
pps.summarize(
3146
config,
@@ -40,6 +55,12 @@ def test_summarizer(config):
4055
"Path",
4156
)
4257
)
58+
59+
60+
def test_summarizer_file(config: Settings):
61+
config.vars.path.active = True
62+
config.vars.file.active = True
63+
pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
4364
_ = format_summary(
4465
pps.summarize(
4566
config,
@@ -53,6 +74,13 @@ def test_summarizer(config):
5374
"File",
5475
)
5576
)
77+
78+
79+
def test_summarizer_image(config: Settings):
80+
config.vars.path.active = True
81+
config.vars.file.active = True
82+
config.vars.image.active = True
83+
pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
5684
_ = format_summary(
5785
pps.summarize(
5886
config,
@@ -62,6 +90,3 @@ def test_summarizer(config):
6290
"Image",
6391
)
6492
)
65-
_ = format_summary(
66-
pps.summarize(config, pd.Series([True, False, True, False, False]), "Boolean")
67-
)

tests/unit/test_summary_algos.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,37 @@
44

55
from ydata_profiling.config import Settings
66
from ydata_profiling.model.summary_algorithms import (
7-
describe_counts,
8-
describe_generic,
97
describe_supported,
108
histogram_compute,
119
)
10+
from ydata_profiling.model.var_description.default import VarDescription
1211

1312

1413
def test_count_summary_sorted(config):
1514
s = pd.Series([1] + [2] * 1000)
16-
_, sn, r = describe_counts(config, s, {})
17-
assert r["value_counts_without_nan"].index[0] == 2
18-
assert r["value_counts_without_nan"].index[1] == 1
15+
r: VarDescription
16+
_, sn, r = describe_supported(config, s, {})
17+
assert r.value_counts_without_nan.index[0] == 2
18+
assert r.value_counts_without_nan.index[1] == 1
1919

2020

2121
def test_count_summary_nat(config):
22+
r: VarDescription
2223
s = pd.to_datetime(pd.Series([1, 2] + [np.nan, pd.NaT]))
23-
_, sn, r = describe_counts(config, s, {})
24-
assert len(r["value_counts_without_nan"].index) == 2
24+
_, sn, r = describe_supported(config, s, {})
25+
assert len(r.value_counts_without_nan.index) == 2
2526

2627

2728
def test_count_summary_category(config):
29+
r: VarDescription
2830
s = pd.Series(
2931
pd.Categorical(
3032
["Poor", "Neutral"] + [np.nan] * 100,
3133
categories=["Poor", "Neutral", "Excellent"],
3234
)
3335
)
34-
_, sn, r = describe_counts(config, s, {})
35-
assert len(r["value_counts_without_nan"].index) == 2
36+
_, sn, r = describe_supported(config, s, {})
37+
assert len(r.value_counts_without_nan.index) == 2
3638

3739

3840
@pytest.fixture(scope="class")
@@ -41,16 +43,12 @@ def empty_data() -> pd.DataFrame:
4143

4244

4345
def test_summary_supported_empty_df(config, empty_data):
44-
_, series, summary = describe_counts(config, empty_data["A"], {})
45-
assert summary["n_missing"] == 0
46-
assert "p_missing" not in summary
47-
48-
_, series, summary = describe_generic(config, series, summary)
49-
assert summary["n_missing"] == 0
50-
assert summary["p_missing"] == 0
51-
assert summary["count"] == 0
52-
53-
_, _, summary = describe_supported(config, series, summary)
46+
summary: VarDescription
47+
_, _, summary = describe_supported(config, empty_data["A"], {})
48+
assert summary.n_missing == 0
49+
assert summary.n_missing == 0
50+
assert summary.p_missing == 0
51+
assert summary.count == 0
5452
assert summary["n_distinct"] == 0
5553
assert summary["p_distinct"] == 0
5654
assert summary["n_unique"] == 0

0 commit comments

Comments
 (0)