feat: update tests

vorel99 · vorel99 · commit b0dc70938663 · 2024-09-23T14:31:09.000+02:00
diff --git a/tests/unit/test_comparison.py b/tests/unit/test_comparison.py
@@ -66,7 +66,8 @@ def test_generate_comparison():
 
     p1 = ProfileReport(df1, title="p1")
     p2 = ProfileReport(df2, title="p1")
-    html = p1.compare(p2).to_html()
+    _compare = p1.compare(p2)
+    html = _compare.to_html()
     assert len(html) > 0
 
 
diff --git a/tests/unit/test_describe.py b/tests/unit/test_describe.py
@@ -9,6 +9,7 @@
 from ydata_profiling.model.describe import describe
 from ydata_profiling.model.summary import describe_1d
 from ydata_profiling.model.typeset import ProfilingTypeSet
+from ydata_profiling.model.var_description.default import VarDescription
 
 check_is_NaN = "ydata_profiling.check_is_NaN"
 
@@ -49,7 +50,7 @@ def test_describe_unique(data, expected, summarizer, typeset):
     config = Settings()
     config.vars.num.low_categorical_threshold = 0
 
-    desc_1d = describe_1d(config, data, summarizer, typeset)
+    desc_1d: VarDescription = describe_1d(config, data, summarizer, typeset)
     if expected["is_unique"] is not None:
         assert (
             desc_1d["p_unique"] == expected["p_unique"]
@@ -562,6 +563,13 @@ def test_describe_df(column, describe_data, expected_results, summarizer):
     for k, v in expected_results[column].items():
         if v == check_is_NaN:
             test_condition = k not in results.variables[column]
+        # values from common description
+        elif k in asdict(results.variables[column]):
+            if isinstance(v, float):
+                assert pytest.approx(v) == getattr(results.variables[column], k)
+            else:
+                assert v == getattr(results.variables[column], k)
+            continue
         elif isinstance(v, float):
             test_condition = pytest.approx(v) == results.variables[column][k]
         else:
diff --git a/tests/unit/test_ge_integration_expectations.py b/tests/unit/test_ge_integration_expectations.py
@@ -1,4 +1,4 @@
-from unittest.mock import Mock, patch
+from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 
@@ -20,14 +20,22 @@ def batch():
 
 
 def test_generic_expectations(batch):
-    generic_expectations("column", {"n_missing": 0, "p_unique": 1.0}, batch)
+    default_desc = MagicMock()
+    default_desc.n_missing = 0
+    d = {"p_unique": 1.0}
+    default_desc.__getitem__.side_effect = d.__getitem__
+    generic_expectations("column", default_desc, batch)
     batch.expect_column_to_exist.assert_called_once()
     batch.expect_column_values_to_not_be_null.assert_called_once()
     batch.expect_column_values_to_be_unique.assert_called_once()
 
 
 def test_generic_expectations_min(batch):
-    generic_expectations("column", {"n_missing": 1, "p_unique": 0.5}, batch)
+    default_desc = MagicMock()
+    default_desc.n_missing = 1
+    d = {"p_unique": 0.5}
+    default_desc.__getitem__.side_effect = d.__getitem__
+    generic_expectations("column", default_desc, batch)
     batch.expect_column_to_exist.assert_called_once()
     batch.expect_column_values_to_not_be_null.assert_not_called()
     batch.expect_column_values_to_be_unique.assert_not_called()
@@ -93,22 +101,21 @@ def test_numeric_expectations_min(batch):
 
 
 def test_categorical_expectations(batch):
-    categorical_expectations(
-        "column",
-        {
-            "n_distinct": 1,
-            "p_distinct": 0.1,
-            "value_counts_without_nan": {"val1": 1, "val2": 2},
-        },
-        batch,
-    )
+    default_desc = MagicMock()
+    d = {"n_distinct": 1, "p_unique": 0.1}
+    default_desc.__getitem__.side_effect = d.__getitem__
+    default_desc.value_counts_without_nan = {"val1": 1, "val2": 2}
+    categorical_expectations("column", default_desc, batch)
     batch.expect_column_values_to_be_in_set.assert_called_once_with(
         "column", {"val1", "val2"}
     )
 
 
 def test_categorical_expectations_min(batch):
-    categorical_expectations("column", {"n_distinct": 15, "p_distinct": 1.0}, batch)
+    default_desc = MagicMock()
+    d = {"n_distinct": 15, "p_distinct": 1.0}
+    default_desc.__getitem__.side_effect = d.__getitem__
+    categorical_expectations("column", default_desc, batch)
     batch.expect_column_values_to_be_in_set.assert_not_called()
 
 
diff --git a/tests/unit/test_summarizer.py b/tests/unit/test_summarizer.py
@@ -2,13 +2,14 @@
 
 import pandas as pd
 
+from ydata_profiling.config import Settings
 from ydata_profiling.model.summarizer import PandasProfilingSummarizer, format_summary
 from ydata_profiling.model.typeset import ProfilingTypeSet
 
 base_path = os.path.abspath(os.path.dirname(__file__))
 
 
-def test_summarizer(config):
+def test_summarizer_base_types(config: Settings):
     pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
 
     _ = format_summary(pps.summarize(config, pd.Series([1, 2, 3, 4, 5]), "Unsupported"))
@@ -23,9 +24,23 @@ def test_summarizer(config):
     _ = format_summary(
         pps.summarize(config, pd.Series(["abc", "abc", "abba"]), "Categorical")
     )
+
+    _ = format_summary(
+        pps.summarize(config, pd.Series([True, False, True, False, False]), "Boolean")
+    )
+
+
+def test_summarizer_url(config: Settings):
+    config.vars.url.active = True
+    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
     _ = format_summary(
         pps.summarize(config, pd.Series(["https://www.example.com"]), "URL")
     )
+
+
+def test_summarizer_path(config: Settings):
+    config.vars.path.active = True
+    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
     _ = format_summary(
         pps.summarize(
             config,
@@ -40,6 +55,12 @@ def test_summarizer(config):
             "Path",
         )
     )
+
+
+def test_summarizer_file(config: Settings):
+    config.vars.path.active = True
+    config.vars.file.active = True
+    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
     _ = format_summary(
         pps.summarize(
             config,
@@ -53,6 +74,13 @@ def test_summarizer(config):
             "File",
         )
     )
+
+
+def test_summarizer_image(config: Settings):
+    config.vars.path.active = True
+    config.vars.file.active = True
+    config.vars.image.active = True
+    pps = PandasProfilingSummarizer(typeset=ProfilingTypeSet(config))
     _ = format_summary(
         pps.summarize(
             config,
@@ -62,6 +90,3 @@ def test_summarizer(config):
             "Image",
         )
     )
-    _ = format_summary(
-        pps.summarize(config, pd.Series([True, False, True, False, False]), "Boolean")
-    )
diff --git a/tests/unit/test_summary_algos.py b/tests/unit/test_summary_algos.py
@@ -4,35 +4,37 @@
 
 from ydata_profiling.config import Settings
 from ydata_profiling.model.summary_algorithms import (
-    describe_counts,
-    describe_generic,
     describe_supported,
     histogram_compute,
 )
+from ydata_profiling.model.var_description.default import VarDescription
 
 
 def test_count_summary_sorted(config):
     s = pd.Series([1] + [2] * 1000)
-    _, sn, r = describe_counts(config, s, {})
-    assert r["value_counts_without_nan"].index[0] == 2
-    assert r["value_counts_without_nan"].index[1] == 1
+    r: VarDescription
+    _, sn, r = describe_supported(config, s, {})
+    assert r.value_counts_without_nan.index[0] == 2
+    assert r.value_counts_without_nan.index[1] == 1
 
 
 def test_count_summary_nat(config):
+    r: VarDescription
     s = pd.to_datetime(pd.Series([1, 2] + [np.nan, pd.NaT]))
-    _, sn, r = describe_counts(config, s, {})
-    assert len(r["value_counts_without_nan"].index) == 2
+    _, sn, r = describe_supported(config, s, {})
+    assert len(r.value_counts_without_nan.index) == 2
 
 
 def test_count_summary_category(config):
+    r: VarDescription
     s = pd.Series(
         pd.Categorical(
             ["Poor", "Neutral"] + [np.nan] * 100,
             categories=["Poor", "Neutral", "Excellent"],
         )
     )
-    _, sn, r = describe_counts(config, s, {})
-    assert len(r["value_counts_without_nan"].index) == 2
+    _, sn, r = describe_supported(config, s, {})
+    assert len(r.value_counts_without_nan.index) == 2
 
 
 @pytest.fixture(scope="class")
@@ -41,16 +43,12 @@ def empty_data() -> pd.DataFrame:
 
 
 def test_summary_supported_empty_df(config, empty_data):
-    _, series, summary = describe_counts(config, empty_data["A"], {})
-    assert summary["n_missing"] == 0
-    assert "p_missing" not in summary
-
-    _, series, summary = describe_generic(config, series, summary)
-    assert summary["n_missing"] == 0
-    assert summary["p_missing"] == 0
-    assert summary["count"] == 0
-
-    _, _, summary = describe_supported(config, series, summary)
+    summary: VarDescription
+    _, _, summary = describe_supported(config, empty_data["A"], {})
+    assert summary.n_missing == 0
+    assert summary.n_missing == 0
+    assert summary.p_missing == 0
+    assert summary.count == 0
     assert summary["n_distinct"] == 0
     assert summary["p_distinct"] == 0
     assert summary["n_unique"] == 0