From c39e0b3407a4cf6dccc7c28314733b3191388ab7 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 15:59:52 +0800
Subject: [PATCH 01/11] feat: initial release

---
 Dockerfile                                    |  21 +++
 src/ydata_profiling/config.py                 | 142 ++++++++----------
 src/ydata_profiling/model/handler.py          |  20 +--
 src/ydata_profiling/model/summarizer.py       |  31 ++--
 src/ydata_profiling/profile_report.py         |   6 +-
 .../report/structure/__init__.py              |  22 +++
 .../report/structure/report.py                |   2 +-
 7 files changed, 130 insertions(+), 114 deletions(-)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..7bb15bf5d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,21 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . .
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
+    pip install --no-cache-dir . && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
+    pip install --no-cache-dir jupyter
+
+EXPOSE 8888
+
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
+
+
diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py
index 09dbecdde..2bb934ed1 100644
--- a/src/ydata_profiling/config.py
+++ b/src/ydata_profiling/config.py
@@ -6,24 +6,7 @@
 import yaml
 from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr
 
-
-def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
-    """
-    Recursive merge dictionaries.
-
-    :param dict1: Base dictionary to merge.
-    :param dict2: Dictionary to merge on top of base dictionary.
-    :return: Merged dictionary
-    """
-    for key, val in dict1.items():
-        if isinstance(val, dict):
-            dict2_node = dict2.setdefault(key, {})
-            _merge_dictionaries(val, dict2_node)
-        else:
-            if key not in dict2:
-                dict2[key] = val
-
-    return dict2
+from ydata_profiling.utils.common import update
 
 
 class Dataset(BaseModel):
@@ -355,60 +338,7 @@ class Config:
     html: Html = Html()
     notebook: Notebook = Notebook()
 
-    def update(self, updates: dict) -> "Settings":
-        update = _merge_dictionaries(self.dict(), updates)
-        return self.parse_obj(self.copy(update=update))
-
-    @staticmethod
-    def from_file(config_file: Union[Path, str]) -> "Settings":
-        """Create a Settings object from a yaml file.
-
-        Args:
-            config_file: yaml file path
-        Returns:
-            Settings
-        """
-        with open(config_file) as f:
-            data = yaml.safe_load(f)
-
-        return Settings.parse_obj(data)
-
-
-class SparkSettings(Settings):
-    """
-    Setting class with the standard report configuration for Spark DataFrames
-    All the supported analysis are set to true
-    """
-
-    vars: Univariate = Univariate()
-
-    vars.num.low_categorical_threshold = 0
-
-    infer_dtypes: bool = False
-
-    correlations: Dict[str, Correlation] = {
-        "spearman": Correlation(key="spearman", calculate=True),
-        "pearson": Correlation(key="pearson", calculate=True),
-    }
-
-    correlation_table: bool = True
-
-    interactions: Interactions = Interactions()
-    interactions.continuous = False
-
-    missing_diagrams: Dict[str, bool] = {
-        "bar": False,
-        "matrix": False,
-        "dendrogram": False,
-        "heatmap": False,
-    }
-    samples: Samples = Samples()
-    samples.tail = 0
-    samples.random = 0
-
-
-class Config:
-    arg_groups: Dict[str, Any] = {
+    _arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -475,8 +405,8 @@ class Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        kwargs = Config.arg_groups[key]
-        shorthand_args, _ = Config.shorthands(kwargs, split=False)
+        kwargs = Settings._arg_groups[key]
+        shorthand_args, _ = Settings.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
@@ -485,8 +415,8 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in Config._shorthands:
-                shorthand_args[key] = Config._shorthands[key]
+            if value is None and key in Settings._shorthands:
+                shorthand_args[key] = Settings._shorthands[key]
                 if split:
                     del kwargs[key]
 
@@ -494,3 +424,63 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
+
+    def update(self, updates: dict) -> "Settings":
+        merged = update(self.dict().copy(), updates)
+        return self.parse_obj(self.copy(update=merged))
+
+    @staticmethod
+    def from_file(config_file: Union[Path, str]) -> "Settings":
+        """Create a Settings object from a yaml file.
+
+        Args:
+            config_file: yaml file path
+        Returns:
+            Settings
+        """
+        with open(config_file) as f:
+            data = yaml.safe_load(f)
+
+        return Settings.parse_obj(data)
+
+
+class SparkSettings(Settings):
+    """
+    Setting class with the standard report configuration for Spark DataFrames
+    All the supported analysis are set to true
+    """
+
+    vars: Univariate = Univariate()
+
+    vars.num.low_categorical_threshold = 0
+
+    infer_dtypes: bool = False
+
+    correlations: Dict[str, Correlation] = {
+        "spearman": Correlation(key="spearman", calculate=True),
+        "pearson": Correlation(key="pearson", calculate=True),
+    }
+
+    correlation_table: bool = True
+
+    interactions: Interactions = Interactions()
+    interactions.continuous = False
+
+    missing_diagrams: Dict[str, bool] = {
+        "bar": False,
+        "matrix": False,
+        "dendrogram": False,
+        "heatmap": False,
+    }
+    samples: Samples = Samples()
+    samples.tail = 0
+    samples.random = 0
+
+
+class Config(Settings):
+    """
+    Deprecated: Use Settings instead.
+    Backward compatibility alias for Settings class.
+    """
+
+    pass
diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 992c1840c..e983ce2a1 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,22 +60,6 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
-def get_render_map() -> Dict[str, Callable]:
-    import ydata_profiling.report.structure.variables as render_algorithms
+from ydata_profiling.report.structure import get_render_map
 
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map
+__all__ = ["compose", "Handler", "get_render_map"]
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..a57ed1c97 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,9 +50,8 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
-# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer for Pandas DataFrames."""
+    """A summarizer supporting both Pandas and Spark DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
@@ -65,7 +64,15 @@ def summary_map(self) -> Dict[str, List[Callable]]:
         return self._summary_map
 
     def _create_summary_map(self) -> Dict[str, List[Callable]]:
-        """Creates the summary map for Pandas summarization."""
+        """Creates the summary map based on the backend."""
+        common_map = {
+            "URL": [describe_url_1d],
+            "Path": [describe_path_1d],
+            "File": [describe_file_1d],
+            "Image": [describe_image_1d],
+            "TimeSeries": [describe_timeseries_1d],
+        }
+
         if self.use_spark:
             from ydata_profiling.model.spark import (
                 describe_boolean_1d_spark,
@@ -78,7 +85,7 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 describe_text_1d_spark,
             )
 
-            summary_map = {
+            base_map = {
                 "Unsupported": [
                     describe_counts_spark,
                     describe_generic_spark,
@@ -89,14 +96,9 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [describe_text_1d_spark],
                 "Categorical": [describe_categorical_1d_spark],
                 "Boolean": [describe_boolean_1d_spark],
-                "URL": [describe_url_1d],
-                "Path": [describe_path_1d],
-                "File": [describe_file_1d],
-                "Image": [describe_image_1d],
-                "TimeSeries": [describe_timeseries_1d],
             }
         else:
-            summary_map = {
+            base_map = {
                 "Unsupported": [
                     pandas_describe_counts,
                     pandas_describe_generic,
@@ -107,13 +109,10 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [pandas_describe_text_1d],
                 "Categorical": [pandas_describe_categorical_1d],
                 "Boolean": [pandas_describe_boolean_1d],
-                "URL": [pandas_describe_url_1d],
-                "Path": [pandas_describe_path_1d],
-                "File": [pandas_describe_file_1d],
-                "Image": [pandas_describe_image_1d],
-                "TimeSeries": [pandas_describe_timeseries_1d],
             }
-        return summary_map
+
+        base_map.update(common_map)
+        return base_map
 
 
 def format_summary(summary: Union[BaseDescription, dict]) -> dict:
diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py
index a7d6d9134..916b4681e 100644
--- a/src/ydata_profiling/profile_report.py
+++ b/src/ydata_profiling/profile_report.py
@@ -25,7 +25,7 @@
 from typeguard import typechecked
 from visions import VisionsTypeset
 
-from ydata_profiling.config import Config, Settings, SparkSettings
+from ydata_profiling.config import Settings, SparkSettings
 from ydata_profiling.expectations_report import ExpectationsReport
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
@@ -132,11 +132,11 @@ def __init__(
             cfg = Settings()
             for condition, key in groups:
                 if condition:
-                    cfg = cfg.update(Config.get_arg_groups(key))
+                    cfg = cfg.update(Settings.get_arg_groups(key))
             report_config = report_config.update(cfg.dict(exclude_defaults=True))
 
         if len(kwargs) > 0:
-            shorthands, kwargs = Config.shorthands(kwargs)
+            shorthands, kwargs = Settings.shorthands(kwargs)
             report_config = report_config.update(
                 Settings().update(shorthands).dict(exclude_defaults=True)
             )
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index 8324d248d..a2efd029a 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -1 +1,23 @@
 """Data structure for the report"""
+from typing import Callable, Dict
+
+
+def get_render_map() -> Dict[str, Callable]:
+    import ydata_profiling.report.structure.variables as render_algorithms
+
+    render_map = {
+        "Boolean": render_algorithms.render_boolean,
+        "Numeric": render_algorithms.render_real,
+        "Complex": render_algorithms.render_complex,
+        "Text": render_algorithms.render_text,
+        "DateTime": render_algorithms.render_date,
+        "Categorical": render_algorithms.render_categorical,
+        "URL": render_algorithms.render_url,
+        "Path": render_algorithms.render_path,
+        "File": render_algorithms.render_file,
+        "Image": render_algorithms.render_image,
+        "Unsupported": render_algorithms.render_generic,
+        "TimeSeries": render_algorithms.render_timeseries,
+    }
+
+    return render_map
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index 482b410b2..b64a41aae 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.model.handler import get_render_map
+from ydata_profiling.report.structure import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,

From 27a314be64b586f58de6a2956d456e2a3d03da1f Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 17:14:23 +0800
Subject: [PATCH 02/11] feat: initial release

---
 src/ydata_profiling/config.py           | 142 +++++++++++++-----------
 src/ydata_profiling/model/handler.py    |   2 -
 src/ydata_profiling/model/summarizer.py |  31 +++---
 src/ydata_profiling/profile_report.py   |   6 +-
 4 files changed, 95 insertions(+), 86 deletions(-)

diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py
index 2bb934ed1..09dbecdde 100644
--- a/src/ydata_profiling/config.py
+++ b/src/ydata_profiling/config.py
@@ -6,7 +6,24 @@
 import yaml
 from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr
 
-from ydata_profiling.utils.common import update
+
+def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
+    """
+    Recursive merge dictionaries.
+
+    :param dict1: Base dictionary to merge.
+    :param dict2: Dictionary to merge on top of base dictionary.
+    :return: Merged dictionary
+    """
+    for key, val in dict1.items():
+        if isinstance(val, dict):
+            dict2_node = dict2.setdefault(key, {})
+            _merge_dictionaries(val, dict2_node)
+        else:
+            if key not in dict2:
+                dict2[key] = val
+
+    return dict2
 
 
 class Dataset(BaseModel):
@@ -338,7 +355,60 @@ class Config:
     html: Html = Html()
     notebook: Notebook = Notebook()
 
-    _arg_groups: Dict[str, Any] = {
+    def update(self, updates: dict) -> "Settings":
+        update = _merge_dictionaries(self.dict(), updates)
+        return self.parse_obj(self.copy(update=update))
+
+    @staticmethod
+    def from_file(config_file: Union[Path, str]) -> "Settings":
+        """Create a Settings object from a yaml file.
+
+        Args:
+            config_file: yaml file path
+        Returns:
+            Settings
+        """
+        with open(config_file) as f:
+            data = yaml.safe_load(f)
+
+        return Settings.parse_obj(data)
+
+
+class SparkSettings(Settings):
+    """
+    Setting class with the standard report configuration for Spark DataFrames
+    All the supported analysis are set to true
+    """
+
+    vars: Univariate = Univariate()
+
+    vars.num.low_categorical_threshold = 0
+
+    infer_dtypes: bool = False
+
+    correlations: Dict[str, Correlation] = {
+        "spearman": Correlation(key="spearman", calculate=True),
+        "pearson": Correlation(key="pearson", calculate=True),
+    }
+
+    correlation_table: bool = True
+
+    interactions: Interactions = Interactions()
+    interactions.continuous = False
+
+    missing_diagrams: Dict[str, bool] = {
+        "bar": False,
+        "matrix": False,
+        "dendrogram": False,
+        "heatmap": False,
+    }
+    samples: Samples = Samples()
+    samples.tail = 0
+    samples.random = 0
+
+
+class Config:
+    arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -405,8 +475,8 @@ class Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        kwargs = Settings._arg_groups[key]
-        shorthand_args, _ = Settings.shorthands(kwargs, split=False)
+        kwargs = Config.arg_groups[key]
+        shorthand_args, _ = Config.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
@@ -415,8 +485,8 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in Settings._shorthands:
-                shorthand_args[key] = Settings._shorthands[key]
+            if value is None and key in Config._shorthands:
+                shorthand_args[key] = Config._shorthands[key]
                 if split:
                     del kwargs[key]
 
@@ -424,63 +494,3 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
-
-    def update(self, updates: dict) -> "Settings":
-        merged = update(self.dict().copy(), updates)
-        return self.parse_obj(self.copy(update=merged))
-
-    @staticmethod
-    def from_file(config_file: Union[Path, str]) -> "Settings":
-        """Create a Settings object from a yaml file.
-
-        Args:
-            config_file: yaml file path
-        Returns:
-            Settings
-        """
-        with open(config_file) as f:
-            data = yaml.safe_load(f)
-
-        return Settings.parse_obj(data)
-
-
-class SparkSettings(Settings):
-    """
-    Setting class with the standard report configuration for Spark DataFrames
-    All the supported analysis are set to true
-    """
-
-    vars: Univariate = Univariate()
-
-    vars.num.low_categorical_threshold = 0
-
-    infer_dtypes: bool = False
-
-    correlations: Dict[str, Correlation] = {
-        "spearman": Correlation(key="spearman", calculate=True),
-        "pearson": Correlation(key="pearson", calculate=True),
-    }
-
-    correlation_table: bool = True
-
-    interactions: Interactions = Interactions()
-    interactions.continuous = False
-
-    missing_diagrams: Dict[str, bool] = {
-        "bar": False,
-        "matrix": False,
-        "dendrogram": False,
-        "heatmap": False,
-    }
-    samples: Samples = Samples()
-    samples.tail = 0
-    samples.random = 0
-
-
-class Config(Settings):
-    """
-    Deprecated: Use Settings instead.
-    Backward compatibility alias for Settings class.
-    """
-
-    pass
diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index e983ce2a1..bcca12a1c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,6 +60,4 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
-from ydata_profiling.report.structure import get_render_map
 
-__all__ = ["compose", "Handler", "get_render_map"]
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index a57ed1c97..d733a7d36 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,8 +50,9 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
+# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer supporting both Pandas and Spark DataFrames."""
+    """A summarizer for Pandas DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
@@ -64,15 +65,7 @@ def summary_map(self) -> Dict[str, List[Callable]]:
         return self._summary_map
 
     def _create_summary_map(self) -> Dict[str, List[Callable]]:
-        """Creates the summary map based on the backend."""
-        common_map = {
-            "URL": [describe_url_1d],
-            "Path": [describe_path_1d],
-            "File": [describe_file_1d],
-            "Image": [describe_image_1d],
-            "TimeSeries": [describe_timeseries_1d],
-        }
-
+        """Creates the summary map for Pandas summarization."""
         if self.use_spark:
             from ydata_profiling.model.spark import (
                 describe_boolean_1d_spark,
@@ -85,7 +78,7 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 describe_text_1d_spark,
             )
 
-            base_map = {
+            summary_map = {
                 "Unsupported": [
                     describe_counts_spark,
                     describe_generic_spark,
@@ -96,9 +89,14 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [describe_text_1d_spark],
                 "Categorical": [describe_categorical_1d_spark],
                 "Boolean": [describe_boolean_1d_spark],
+                "URL": [describe_url_1d],
+                "Path": [describe_path_1d],
+                "File": [describe_file_1d],
+                "Image": [describe_image_1d],
+                "TimeSeries": [describe_timeseries_1d],
             }
         else:
-            base_map = {
+            summary_map = {
                 "Unsupported": [
                     pandas_describe_counts,
                     pandas_describe_generic,
@@ -109,10 +107,13 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [pandas_describe_text_1d],
                 "Categorical": [pandas_describe_categorical_1d],
                 "Boolean": [pandas_describe_boolean_1d],
+                "URL": [pandas_describe_url_1d],
+                "Path": [pandas_describe_path_1d],
+                "File": [pandas_describe_file_1d],
+                "Image": [pandas_describe_image_1d],
+                "TimeSeries": [pandas_describe_timeseries_1d],
             }
-
-        base_map.update(common_map)
-        return base_map
+        return summary_map
 
 
 def format_summary(summary: Union[BaseDescription, dict]) -> dict:
diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py
index 916b4681e..a7d6d9134 100644
--- a/src/ydata_profiling/profile_report.py
+++ b/src/ydata_profiling/profile_report.py
@@ -25,7 +25,7 @@
 from typeguard import typechecked
 from visions import VisionsTypeset
 
-from ydata_profiling.config import Settings, SparkSettings
+from ydata_profiling.config import Config, Settings, SparkSettings
 from ydata_profiling.expectations_report import ExpectationsReport
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
@@ -132,11 +132,11 @@ def __init__(
             cfg = Settings()
             for condition, key in groups:
                 if condition:
-                    cfg = cfg.update(Settings.get_arg_groups(key))
+                    cfg = cfg.update(Config.get_arg_groups(key))
             report_config = report_config.update(cfg.dict(exclude_defaults=True))
 
         if len(kwargs) > 0:
-            shorthands, kwargs = Settings.shorthands(kwargs)
+            shorthands, kwargs = Config.shorthands(kwargs)
             report_config = report_config.update(
                 Settings().update(shorthands).dict(exclude_defaults=True)
             )

From 8d8f6b71b5f46178749d0b100ba9bc8cefbfb261 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 17:39:22 +0800
Subject: [PATCH 03/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 123 +++++++++---------
 .../report/structure/__init__.py              |   8 ++
 src/ydata_profiling/utils/backend.py          |   2 +-
 3 files changed, 69 insertions(+), 64 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index bcca12a1c..aa36a811c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,63 +1,60 @@
-"""
-    Auxiliary handler methods for data summary extraction
-"""
-from typing import Any, Callable, Dict, List, Sequence
-
-import networkx as nx
-from visions import VisionsTypeset
-
-
-def compose(functions: Sequence[Callable]) -> Callable:
-    """
-    Compose a sequence of functions.
-
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
-    """
-
-    def composed_function(*args) -> List[Any]:
-        result = args  # Start with the input arguments
-        for func in functions:
-            result = func(*result) if isinstance(result, tuple) else func(result)
-        return result  # type: ignore
-
-    return composed_function  # type: ignore
-
-
-class Handler:
-    """A generic handler
-
-    Allows any custom mapping between data types and functions
-    """
-
-    def __init__(
-        self,
-        mapping: Dict[str, List[Callable]],
-        typeset: VisionsTypeset,
-        *args,
-        **kwargs
-    ):
-        self.mapping = mapping
-        self.typeset = typeset
-        self._complete_dag()
-
-    def _complete_dag(self) -> None:
-        for from_type, to_type in nx.topological_sort(
-            nx.line_graph(self.typeset.base_graph)
-        ):
-            self.mapping[str(to_type)] = (
-                self.mapping[str(from_type)] + self.mapping[str(to_type)]
-            )
-
-    def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
-        """
-        funcs = self.mapping.get(dtype, [])
-        op = compose(funcs)
-        summary = op(*args)[-1]
-        return summary
-
-
-
+"""
+    Auxiliary handler methods for data summary extraction
+"""
+from typing import Any, Callable, Dict, List, Sequence
+
+import networkx as nx
+from visions import VisionsTypeset
+
+
+def compose(functions: Sequence[Callable]) -> Callable:
+    """
+    Compose a sequence of functions.
+
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
+    """
+
+    def composed_function(*args) -> List[Any]:
+        result = args  # Start with the input arguments
+        for func in functions:
+            result = func(*result) if isinstance(result, tuple) else func(result)
+        return result  # type: ignore
+
+    return composed_function  # type: ignore
+
+
+class Handler:
+    """A generic handler
+
+    Allows any custom mapping between data types and functions
+    """
+
+    def __init__(
+        self,
+        mapping: Dict[str, List[Callable]],
+        typeset: VisionsTypeset,
+        *args,
+        **kwargs
+    ):
+        self.mapping = mapping
+        self.typeset = typeset
+        self._complete_dag()
+
+    def _complete_dag(self) -> None:
+        for from_type, to_type in nx.topological_sort(
+            nx.line_graph(self.typeset.base_graph)
+        ):
+            self.mapping[str(to_type)] = (
+                self.mapping[str(from_type)] + self.mapping[str(to_type)]
+            )
+
+    def handle(self, dtype: str, *args, **kwargs) -> dict:
+        """
+        Returns:
+            object: a tuple containing the config, the dataset series and the summary extracted
+        """
+        funcs = self.mapping.get(dtype, [])
+        op = compose(funcs)
+        summary = op(*args)[-1]
+        return summary
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index a2efd029a..7ba9c10c9 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -3,6 +3,14 @@
 
 
 def get_render_map() -> Dict[str, Callable]:
+    """Get the mapping of variable types to their render functions.
+
+    This function was moved from model.handler to report.structure to eliminate
+    the reverse dependency from model layer to report layer.
+
+    Returns:
+        Dictionary mapping type names to render functions.
+    """
     import ydata_profiling.report.structure.variables as render_algorithms
 
     render_map = {
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index e99d91c11..dd12f9fd3 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,5 +1,5 @@
 """
-    File with a function to check the backend being used
+Backend detection utilities for pandas and spark.
 """
 import importlib
 

From 307cba98bfab9196a8de5355022f3919539e4520 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 18:09:37 +0800
Subject: [PATCH 04/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 123 +++++++++---------
 src/ydata_profiling/model/summarizer.py       |   5 +-
 .../report/structure/__init__.py              |  30 -----
 .../report/structure/report.py                |   2 +-
 .../report/structure/variables/__init__.py    |  23 ++++
 src/ydata_profiling/utils/backend.py          |   2 +-
 6 files changed, 90 insertions(+), 95 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index aa36a811c..bcca12a1c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,60 +1,63 @@
-"""
-    Auxiliary handler methods for data summary extraction
-"""
-from typing import Any, Callable, Dict, List, Sequence
-
-import networkx as nx
-from visions import VisionsTypeset
-
-
-def compose(functions: Sequence[Callable]) -> Callable:
-    """
-    Compose a sequence of functions.
-
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
-    """
-
-    def composed_function(*args) -> List[Any]:
-        result = args  # Start with the input arguments
-        for func in functions:
-            result = func(*result) if isinstance(result, tuple) else func(result)
-        return result  # type: ignore
-
-    return composed_function  # type: ignore
-
-
-class Handler:
-    """A generic handler
-
-    Allows any custom mapping between data types and functions
-    """
-
-    def __init__(
-        self,
-        mapping: Dict[str, List[Callable]],
-        typeset: VisionsTypeset,
-        *args,
-        **kwargs
-    ):
-        self.mapping = mapping
-        self.typeset = typeset
-        self._complete_dag()
-
-    def _complete_dag(self) -> None:
-        for from_type, to_type in nx.topological_sort(
-            nx.line_graph(self.typeset.base_graph)
-        ):
-            self.mapping[str(to_type)] = (
-                self.mapping[str(from_type)] + self.mapping[str(to_type)]
-            )
-
-    def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
-        """
-        funcs = self.mapping.get(dtype, [])
-        op = compose(funcs)
-        summary = op(*args)[-1]
-        return summary
+"""
+    Auxiliary handler methods for data summary extraction
+"""
+from typing import Any, Callable, Dict, List, Sequence
+
+import networkx as nx
+from visions import VisionsTypeset
+
+
+def compose(functions: Sequence[Callable]) -> Callable:
+    """
+    Compose a sequence of functions.
+
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
+    """
+
+    def composed_function(*args) -> List[Any]:
+        result = args  # Start with the input arguments
+        for func in functions:
+            result = func(*result) if isinstance(result, tuple) else func(result)
+        return result  # type: ignore
+
+    return composed_function  # type: ignore
+
+
+class Handler:
+    """A generic handler
+
+    Allows any custom mapping between data types and functions
+    """
+
+    def __init__(
+        self,
+        mapping: Dict[str, List[Callable]],
+        typeset: VisionsTypeset,
+        *args,
+        **kwargs
+    ):
+        self.mapping = mapping
+        self.typeset = typeset
+        self._complete_dag()
+
+    def _complete_dag(self) -> None:
+        for from_type, to_type in nx.topological_sort(
+            nx.line_graph(self.typeset.base_graph)
+        ):
+            self.mapping[str(to_type)] = (
+                self.mapping[str(from_type)] + self.mapping[str(to_type)]
+            )
+
+    def handle(self, dtype: str, *args, **kwargs) -> dict:
+        """
+        Returns:
+            object: a tuple containing the config, the dataset series and the summary extracted
+        """
+        funcs = self.mapping.get(dtype, [])
+        op = compose(funcs)
+        summary = op(*args)[-1]
+        return summary
+
+
+
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..54d839915 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -27,7 +27,7 @@
 from ydata_profiling.model.pandas.describe_supported_pandas import (
     pandas_describe_supported,
 )
-from ydata_profiling.model.summary_algorithms import (  # Check what is this method used for
+from ydata_profiling.model.summary_algorithms import (
     describe_file_1d,
     describe_image_1d,
     describe_path_1d,
@@ -50,9 +50,8 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
-# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer for Pandas DataFrames."""
+    """A summarizer supporting both Pandas and Spark DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index 7ba9c10c9..8324d248d 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -1,31 +1 @@
 """Data structure for the report"""
-from typing import Callable, Dict
-
-
-def get_render_map() -> Dict[str, Callable]:
-    """Get the mapping of variable types to their render functions.
-
-    This function was moved from model.handler to report.structure to eliminate
-    the reverse dependency from model layer to report layer.
-
-    Returns:
-        Dictionary mapping type names to render functions.
-    """
-    import ydata_profiling.report.structure.variables as render_algorithms
-
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index b64a41aae..0f027f23f 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.report.structure import get_render_map
+from ydata_profiling.report.structure.variables import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,
diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py
index 64f1d6d54..a8aa301b5 100644
--- a/src/ydata_profiling/report/structure/variables/__init__.py
+++ b/src/ydata_profiling/report/structure/variables/__init__.py
@@ -1,3 +1,5 @@
+from typing import Callable, Dict
+
 from ydata_profiling.report.structure.variables.render_boolean import render_boolean
 from ydata_profiling.report.structure.variables.render_categorical import (
     render_categorical,
@@ -17,6 +19,26 @@
 )
 from ydata_profiling.report.structure.variables.render_url import render_url
 
+
+def get_render_map() -> Dict[str, Callable]:
+    render_map = {
+        "Boolean": render_boolean,
+        "Numeric": render_real,
+        "Complex": render_complex,
+        "Text": render_text,
+        "DateTime": render_date,
+        "Categorical": render_categorical,
+        "URL": render_url,
+        "Path": render_path,
+        "File": render_file,
+        "Image": render_image,
+        "Unsupported": render_generic,
+        "TimeSeries": render_timeseries,
+    }
+
+    return render_map
+
+
 __all__ = [
     "render_boolean",
     "render_categorical",
@@ -32,4 +54,5 @@
     "render_text",
     "render_timeseries",
     "render_url",
+    "get_render_map",
 ]
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index dd12f9fd3..e99d91c11 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,5 +1,5 @@
 """
-Backend detection utilities for pandas and spark.
+    File with a function to check the backend being used
 """
 import importlib
 

From 1e2fa10eaf7a951acea663fa270784244ac18404 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 19:27:37 +0800
Subject: [PATCH 05/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 18 +++++++++++++++
 src/ydata_profiling/model/summarizer.py       |  5 ++--
 .../report/structure/report.py                |  2 +-
 .../report/structure/variables/__init__.py    | 23 -------------------
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index bcca12a1c..992c1840c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,4 +60,22 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
+def get_render_map() -> Dict[str, Callable]:
+    import ydata_profiling.report.structure.variables as render_algorithms
 
+    render_map = {
+        "Boolean": render_algorithms.render_boolean,
+        "Numeric": render_algorithms.render_real,
+        "Complex": render_algorithms.render_complex,
+        "Text": render_algorithms.render_text,
+        "DateTime": render_algorithms.render_date,
+        "Categorical": render_algorithms.render_categorical,
+        "URL": render_algorithms.render_url,
+        "Path": render_algorithms.render_path,
+        "File": render_algorithms.render_file,
+        "Image": render_algorithms.render_image,
+        "Unsupported": render_algorithms.render_generic,
+        "TimeSeries": render_algorithms.render_timeseries,
+    }
+
+    return render_map
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index 54d839915..d733a7d36 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -27,7 +27,7 @@
 from ydata_profiling.model.pandas.describe_supported_pandas import (
     pandas_describe_supported,
 )
-from ydata_profiling.model.summary_algorithms import (
+from ydata_profiling.model.summary_algorithms import (  # Check what is this method used for
     describe_file_1d,
     describe_image_1d,
     describe_path_1d,
@@ -50,8 +50,9 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
+# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer supporting both Pandas and Spark DataFrames."""
+    """A summarizer for Pandas DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index 0f027f23f..482b410b2 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.report.structure.variables import get_render_map
+from ydata_profiling.model.handler import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,
diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py
index a8aa301b5..64f1d6d54 100644
--- a/src/ydata_profiling/report/structure/variables/__init__.py
+++ b/src/ydata_profiling/report/structure/variables/__init__.py
@@ -1,5 +1,3 @@
-from typing import Callable, Dict
-
 from ydata_profiling.report.structure.variables.render_boolean import render_boolean
 from ydata_profiling.report.structure.variables.render_categorical import (
     render_categorical,
@@ -19,26 +17,6 @@
 )
 from ydata_profiling.report.structure.variables.render_url import render_url
 
-
-def get_render_map() -> Dict[str, Callable]:
-    render_map = {
-        "Boolean": render_boolean,
-        "Numeric": render_real,
-        "Complex": render_complex,
-        "Text": render_text,
-        "DateTime": render_date,
-        "Categorical": render_categorical,
-        "URL": render_url,
-        "Path": render_path,
-        "File": render_file,
-        "Image": render_image,
-        "Unsupported": render_generic,
-        "TimeSeries": render_timeseries,
-    }
-
-    return render_map
-
-
 __all__ = [
     "render_boolean",
     "render_categorical",
@@ -54,5 +32,4 @@ def get_render_map() -> Dict[str, Callable]:
     "render_text",
     "render_timeseries",
     "render_url",
-    "get_render_map",
 ]

From 3f158155243784086e17dd0d59e60dc086dc0844 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 20:03:28 +0800
Subject: [PATCH 06/11] feat: initial release

---
 src/ydata_profiling/model/alerts.py            | 18 +++++++++---------
 .../model/summary_algorithms.py                | 15 ---------------
 .../structure/variables/render_common.py       |  1 -
 3 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/src/ydata_profiling/model/alerts.py b/src/ydata_profiling/model/alerts.py
index 1b16d27a0..611b5de85 100644
--- a/src/ydata_profiling/model/alerts.py
+++ b/src/ydata_profiling/model/alerts.py
@@ -12,8 +12,8 @@
 from ydata_profiling.utils.styles import get_alert_styles
 
 
-def fmt_percent(value: float, edge_cases: bool = True) -> str:
-    """Format a ratio as a percentage.
+def _fmt_percent(value: float, edge_cases: bool = True) -> str:
+    """Format a ratio as a percentage (internal copy to avoid circular imports).
 
     Args:
         edge_cases: Check for edge cases?
@@ -209,7 +209,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_duplicates']} ({fmt_percent(self.values['p_duplicates'])}) duplicate rows"
+            return f"Dataset has {self.values['n_duplicates']} ({_fmt_percent(self.values['p_duplicates'])}) duplicate rows"
         else:
             return "Dataset has no duplicated rows"
 
@@ -231,7 +231,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_near_dups']} ({fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
+            return f"Dataset has {self.values['n_near_dups']} ({_fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
         else:
             return "Dataset has no near duplicated rows"
 
@@ -272,7 +272,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_distinct']:} ({fmt_percent(self.values['p_distinct'])}) distinct values"
+            return f"[{self.column_name}] has {self.values['n_distinct']:} ({_fmt_percent(self.values['p_distinct'])}) distinct values"
         else:
             return f"[{self.column_name}] has a high cardinality"
 
@@ -294,7 +294,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {fmt_percent(self.values['p_fuzzy_vals'])} per category"
+            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {_fmt_percent(self.values['p_fuzzy_vals'])} per category"
         else:
             return f"[{self.column_name}] no dirty categories values."
 
@@ -365,7 +365,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_infinite']} ({fmt_percent(self.values['p_infinite'])}) infinite values"
+            return f"[{self.column_name}] has {self.values['n_infinite']} ({_fmt_percent(self.values['p_infinite'])}) infinite values"
         else:
             return f"[{self.column_name}] has infinite values"
 
@@ -387,7 +387,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] {self.values['n_missing']} ({fmt_percent(self.values['p_missing'])}) missing values"
+            return f"[{self.column_name}] {self.values['n_missing']} ({_fmt_percent(self.values['p_missing'])}) missing values"
         else:
             return f"[{self.column_name}] has missing values"
 
@@ -541,7 +541,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_zeros']} ({fmt_percent(self.values['p_zeros'])}) zeros"
+            return f"[{self.column_name}] has {self.values['n_zeros']} ({_fmt_percent(self.values['p_zeros'])}) zeros"
         else:
             return f"[{self.column_name}] has predominantly zeros"
 
diff --git a/src/ydata_profiling/model/summary_algorithms.py b/src/ydata_profiling/model/summary_algorithms.py
index 9c3e5ef38..49569605b 100644
--- a/src/ydata_profiling/model/summary_algorithms.py
+++ b/src/ydata_profiling/model/summary_algorithms.py
@@ -11,21 +11,6 @@
 T = TypeVar("T")
 
 
-def func_nullable_series_contains(fn: Callable) -> Callable:
-    @functools.wraps(fn)
-    def inner(
-        config: Settings, series: pd.Series, state: dict, *args, **kwargs
-    ) -> bool:
-        if series.hasnans:
-            series = series.dropna()
-            if series.empty:
-                return False
-
-        return fn(config, series, state, *args, **kwargs)
-
-    return inner
-
-
 def safe_histogram(
     values: np.ndarray,
     bins: Union[int, str, np.ndarray] = "auto",
diff --git a/src/ydata_profiling/report/structure/variables/render_common.py b/src/ydata_profiling/report/structure/variables/render_common.py
index aef8de357..e90935640 100644
--- a/src/ydata_profiling/report/structure/variables/render_common.py
+++ b/src/ydata_profiling/report/structure/variables/render_common.py
@@ -10,7 +10,6 @@ def render_common(config: Settings, summary: dict) -> dict:
     n_freq_table_max = config.n_freq_table_max
 
     template_variables = {
-        # TODO: with nan
         "freq_table_rows": freq_table(
             freqtable=summary["value_counts_without_nan"],
             n=summary["n"],

From d83e1a17d23c4d1bdee001b326fdaac1a4707548 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 20:42:38 +0800
Subject: [PATCH 07/11] feat: initial release

---
 src/ydata_profiling/model/alerts.py           | 18 ++++-----
 src/ydata_profiling/model/correlations.py     | 28 +++----------
 src/ydata_profiling/model/missing.py          | 25 +++---------
 .../model/pandas/table_pandas.py              | 28 ++-----------
 .../model/spark/table_spark.py                | 39 ++-----------------
 .../model/spark/timeseries_index_spark.py     |  2 +-
 src/ydata_profiling/model/summarizer.py       |  3 +-
 .../model/summary_algorithms.py               | 15 +++++++
 src/ydata_profiling/model/table.py            | 37 ++++++++++++++++++
 .../structure/variables/render_common.py      |  1 +
 src/ydata_profiling/utils/backend.py          | 34 +++++++++++++++-
 11 files changed, 115 insertions(+), 115 deletions(-)

diff --git a/src/ydata_profiling/model/alerts.py b/src/ydata_profiling/model/alerts.py
index 611b5de85..1b16d27a0 100644
--- a/src/ydata_profiling/model/alerts.py
+++ b/src/ydata_profiling/model/alerts.py
@@ -12,8 +12,8 @@
 from ydata_profiling.utils.styles import get_alert_styles
 
 
-def _fmt_percent(value: float, edge_cases: bool = True) -> str:
-    """Format a ratio as a percentage (internal copy to avoid circular imports).
+def fmt_percent(value: float, edge_cases: bool = True) -> str:
+    """Format a ratio as a percentage.
 
     Args:
         edge_cases: Check for edge cases?
@@ -209,7 +209,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_duplicates']} ({_fmt_percent(self.values['p_duplicates'])}) duplicate rows"
+            return f"Dataset has {self.values['n_duplicates']} ({fmt_percent(self.values['p_duplicates'])}) duplicate rows"
         else:
             return "Dataset has no duplicated rows"
 
@@ -231,7 +231,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"Dataset has {self.values['n_near_dups']} ({_fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
+            return f"Dataset has {self.values['n_near_dups']} ({fmt_percent(self.values['p_near_dups'])}) near duplicate rows"
         else:
             return "Dataset has no near duplicated rows"
 
@@ -272,7 +272,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_distinct']:} ({_fmt_percent(self.values['p_distinct'])}) distinct values"
+            return f"[{self.column_name}] has {self.values['n_distinct']:} ({fmt_percent(self.values['p_distinct'])}) distinct values"
         else:
             return f"[{self.column_name}] has a high cardinality"
 
@@ -294,7 +294,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {_fmt_percent(self.values['p_fuzzy_vals'])} per category"
+            return f"[{self.column_name}] has {self.values['n_fuzzy_vals']} fuzzy values: {fmt_percent(self.values['p_fuzzy_vals'])} per category"
         else:
             return f"[{self.column_name}] no dirty categories values."
 
@@ -365,7 +365,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_infinite']} ({_fmt_percent(self.values['p_infinite'])}) infinite values"
+            return f"[{self.column_name}] has {self.values['n_infinite']} ({fmt_percent(self.values['p_infinite'])}) infinite values"
         else:
             return f"[{self.column_name}] has infinite values"
 
@@ -387,7 +387,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] {self.values['n_missing']} ({_fmt_percent(self.values['p_missing'])}) missing values"
+            return f"[{self.column_name}] {self.values['n_missing']} ({fmt_percent(self.values['p_missing'])}) missing values"
         else:
             return f"[{self.column_name}] has missing values"
 
@@ -541,7 +541,7 @@ def __init__(
 
     def _get_description(self) -> str:
         if self.values is not None:
-            return f"[{self.column_name}] has {self.values['n_zeros']} ({_fmt_percent(self.values['p_zeros'])}) zeros"
+            return f"[{self.column_name}] has {self.values['n_zeros']} ({fmt_percent(self.values['p_zeros'])}) zeros"
         else:
             return f"[{self.column_name}] has predominantly zeros"
 
diff --git a/src/ydata_profiling/model/correlations.py b/src/ydata_profiling/model/correlations.py
index 2bbaa1112..25e2e13c4 100644
--- a/src/ydata_profiling/model/correlations.py
+++ b/src/ydata_profiling/model/correlations.py
@@ -3,12 +3,13 @@
 """Correlations between variables."""
 
 import warnings
-from typing import Dict, List, Optional, Sized, no_type_check
+from typing import Dict, List, Optional, Sized
 
 import numpy as np
 import pandas as pd
 
 from ydata_profiling.config import Settings
+from ydata_profiling.utils.backend import BaseBackend
 
 try:
     from pandas.core.base import DataError
@@ -16,30 +17,11 @@
     from pandas.errors import DataError
 
 
-class CorrelationBackend:
+class CorrelationBackend(BaseBackend):
     """Helper class to select and cache the appropriate correlation backend (Pandas or Spark)."""
 
-    @no_type_check
-    def __init__(self, df: Sized):
-        """Determine backend once and store it for all correlation computations."""
-        if isinstance(df, pd.DataFrame):
-            from ydata_profiling.model.pandas import (
-                correlations_pandas as correlation_backend,  # type: ignore
-            )
-        else:
-            from ydata_profiling.model.spark import (
-                correlations_spark as correlation_backend,  # type: ignore
-            )
-
-        self.backend = correlation_backend
-
-    def get_method(self, method_name: str):  # noqa: ANN201
-        """Retrieve the appropriate correlation method class from the backend."""
-        if hasattr(self.backend, method_name):
-            return getattr(self.backend, method_name)
-        raise AttributeError(
-            f"Correlation method '{method_name}' is not available in the backend."
-        )
+    _pandas_module = "ydata_profiling.model.pandas.correlations_pandas"
+    _spark_module = "ydata_profiling.model.spark.correlations_spark"
 
 
 class Correlation:
diff --git a/src/ydata_profiling/model/missing.py b/src/ydata_profiling/model/missing.py
index 46ec2dee3..aa14cc425 100644
--- a/src/ydata_profiling/model/missing.py
+++ b/src/ydata_profiling/model/missing.py
@@ -1,32 +1,17 @@
-import importlib
 import warnings
-from typing import Any, Callable, Dict, Optional, Sized
+from typing import Any, Dict, Optional, Sized
 
 import pandas as pd
 
 from ydata_profiling.config import Settings
+from ydata_profiling.utils.backend import BaseBackend
 
 
-class MissingDataBackend:
+class MissingDataBackend(BaseBackend):
     """Helper class to select and cache the appropriate missing-data backend (Pandas or Spark)."""
 
-    def __init__(self, df: Sized):
-        """Determine backend once and store it for all missing-data computations."""
-        if isinstance(df, pd.DataFrame):
-            self.backend_module = "ydata_profiling.model.pandas.missing_pandas"
-        else:
-            self.backend_module = "ydata_profiling.model.spark.missing_spark"
-
-        self.module = importlib.import_module(self.backend_module)
-
-    def get_method(self, method_name: str) -> Callable:
-        """Retrieve the appropriate missing-data function from the backend module."""
-        try:
-            return getattr(self.module, method_name)
-        except AttributeError as ex:
-            raise AttributeError(
-                f"Missing-data function '{method_name}' is not available in {self.backend_module}."
-            ) from ex
+    _pandas_module = "ydata_profiling.model.pandas.missing_pandas"
+    _spark_module = "ydata_profiling.model.spark.missing_spark"
 
 
 class MissingData:
diff --git a/src/ydata_profiling/model/pandas/table_pandas.py b/src/ydata_profiling/model/pandas/table_pandas.py
index a919ee33b..28c79f849 100644
--- a/src/ydata_profiling/model/pandas/table_pandas.py
+++ b/src/ydata_profiling/model/pandas/table_pandas.py
@@ -1,9 +1,7 @@
-from collections import Counter
-
 import pandas as pd
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.table import get_table_stats
+from ydata_profiling.model.table import compute_common_table_stats, get_table_stats
 
 
 @get_table_stats.register
@@ -21,36 +19,18 @@ def pandas_get_table_stats(
         A dictionary that contains the table statistics.
     """
     n = len(df) if not df.empty else 0
+    n_var = len(df.columns)
 
     memory_size = df.memory_usage(deep=config.memory_deep).sum()
     record_size = float(memory_size) / n if n > 0 else 0
 
     table_stats = {
         "n": n,
-        "n_var": len(df.columns),
+        "n_var": n_var,
         "memory_size": memory_size,
         "record_size": record_size,
-        "n_cells_missing": 0,
-        "n_vars_with_missing": 0,
-        "n_vars_all_missing": 0,
     }
 
-    for series_summary in variable_stats.values():
-        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
-            table_stats["n_vars_with_missing"] += 1
-            table_stats["n_cells_missing"] += series_summary["n_missing"]
-            if series_summary["n_missing"] == n:
-                table_stats["n_vars_all_missing"] += 1
-
-    table_stats["p_cells_missing"] = (
-        table_stats["n_cells_missing"] / (table_stats["n"] * table_stats["n_var"])
-        if table_stats["n"] > 0 and table_stats["n_var"] > 0
-        else 0
-    )
-
-    # Variable type counts
-    table_stats.update(
-        {"types": dict(Counter([v["type"] for v in variable_stats.values()]))}
-    )
+    table_stats.update(compute_common_table_stats(n, n_var, variable_stats))
 
     return table_stats
diff --git a/src/ydata_profiling/model/spark/table_spark.py b/src/ydata_profiling/model/spark/table_spark.py
index 33e862e61..2a2985059 100644
--- a/src/ydata_profiling/model/spark/table_spark.py
+++ b/src/ydata_profiling/model/spark/table_spark.py
@@ -1,9 +1,7 @@
-from collections import Counter
-
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.table import get_table_stats
+from ydata_profiling.model.table import compute_common_table_stats, get_table_stats
 
 
 @get_table_stats.register
@@ -21,38 +19,9 @@ def get_table_stats_spark(
         A dictionary that contains the table statistics.
     """
     n = df.count()
+    n_var = len(df.columns)
 
-    result = {"n": n, "n_var": len(df.columns)}
-
-    table_stats = {
-        "n_cells_missing": 0,
-        "n_vars_with_missing": 0,
-        "n_vars_all_missing": 0,
-    }
-
-    for series_summary in variable_stats.values():
-        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
-            table_stats["n_vars_with_missing"] += 1
-            table_stats["n_cells_missing"] += series_summary["n_missing"]
-            if series_summary["n_missing"] == n:
-                table_stats["n_vars_all_missing"] += 1
-
-    # without this check we'll get a div by zero error
-    if result["n"] * result["n_var"] > 0:
-        table_stats["p_cells_missing"] = (
-            table_stats["n_cells_missing"] / (result["n"] * result["n_var"])
-            if result["n"] > 0
-            else 0
-        )
-    else:
-        table_stats["p_cells_missing"] = 0
-
-    result["p_cells_missing"] = table_stats["p_cells_missing"]
-    result["n_cells_missing"] = table_stats["n_cells_missing"]
-    result["n_vars_all_missing"] = table_stats["n_vars_all_missing"]
-    result["n_vars_with_missing"] = table_stats["n_vars_with_missing"]
-
-    # Variable type counts
-    result["types"] = dict(Counter([v["type"] for v in variable_stats.values()]))
+    result = {"n": n, "n_var": n_var}
+    result.update(compute_common_table_stats(n, n_var, variable_stats))
 
     return result
diff --git a/src/ydata_profiling/model/spark/timeseries_index_spark.py b/src/ydata_profiling/model/spark/timeseries_index_spark.py
index e8145d76c..a31f25ccf 100644
--- a/src/ydata_profiling/model/spark/timeseries_index_spark.py
+++ b/src/ydata_profiling/model/spark/timeseries_index_spark.py
@@ -4,7 +4,7 @@
 from ydata_profiling.config import Settings
 
 
-def spark_get_time_index_description_spark(
+def get_time_index_description_spark(
     config: Settings,
     df: DataFrame,
     table_stats: dict,
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..41b8d6f88 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,9 +50,8 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
-# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer for Pandas DataFrames."""
+    """A summarizer supporting both Pandas and Spark DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/model/summary_algorithms.py b/src/ydata_profiling/model/summary_algorithms.py
index 49569605b..9c3e5ef38 100644
--- a/src/ydata_profiling/model/summary_algorithms.py
+++ b/src/ydata_profiling/model/summary_algorithms.py
@@ -11,6 +11,21 @@
 T = TypeVar("T")
 
 
+def func_nullable_series_contains(fn: Callable) -> Callable:
+    @functools.wraps(fn)
+    def inner(
+        config: Settings, series: pd.Series, state: dict, *args, **kwargs
+    ) -> bool:
+        if series.hasnans:
+            series = series.dropna()
+            if series.empty:
+                return False
+
+        return fn(config, series, state, *args, **kwargs)
+
+    return inner
+
+
 def safe_histogram(
     values: np.ndarray,
     bins: Union[int, str, np.ndarray] = "auto",
diff --git a/src/ydata_profiling/model/table.py b/src/ydata_profiling/model/table.py
index e5eb6fdc2..6f5c7305d 100644
--- a/src/ydata_profiling/model/table.py
+++ b/src/ydata_profiling/model/table.py
@@ -1,3 +1,4 @@
+from collections import Counter
 from typing import Any
 
 from multimethod import multimethod
@@ -5,6 +6,42 @@
 from ydata_profiling.config import Settings
 
 
+def compute_common_table_stats(
+    n: int, n_var: int, variable_stats: dict
+) -> dict:
+    """Compute common table statistics shared by Pandas and Spark backends.
+
+    Args:
+        n: Number of rows in the DataFrame
+        n_var: Number of columns (variables)
+        variable_stats: Previously calculated statistic on the DataFrame series
+
+    Returns:
+        A dictionary with common table statistics: missing values counts, percentages, and type counts
+    """
+    table_stats = {
+        "n_cells_missing": 0,
+        "n_vars_with_missing": 0,
+        "n_vars_all_missing": 0,
+    }
+
+    for series_summary in variable_stats.values():
+        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
+            table_stats["n_vars_with_missing"] += 1
+            table_stats["n_cells_missing"] += series_summary["n_missing"]
+            if series_summary["n_missing"] == n:
+                table_stats["n_vars_all_missing"] += 1
+
+    total_cells = n * n_var
+    table_stats["p_cells_missing"] = (
+        table_stats["n_cells_missing"] / total_cells if total_cells > 0 else 0
+    )
+
+    table_stats["types"] = dict(Counter([v["type"] for v in variable_stats.values()]))
+
+    return table_stats
+
+
 @multimethod
 def get_table_stats(config: Settings, df: Any, variable_stats: dict) -> dict:
     raise NotImplementedError()
diff --git a/src/ydata_profiling/report/structure/variables/render_common.py b/src/ydata_profiling/report/structure/variables/render_common.py
index e90935640..aef8de357 100644
--- a/src/ydata_profiling/report/structure/variables/render_common.py
+++ b/src/ydata_profiling/report/structure/variables/render_common.py
@@ -10,6 +10,7 @@ def render_common(config: Settings, summary: dict) -> dict:
     n_freq_table_max = config.n_freq_table_max
 
     template_variables = {
+        # TODO: with nan
         "freq_table_rows": freq_table(
             freqtable=summary["value_counts_without_nan"],
             n=summary["n"],
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index e99d91c11..1cee2aea8 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,9 +1,41 @@
 """
-    File with a function to check the backend being used
+    File with backend utilities and helper functions to check the backend being used
 """
 import importlib
+from typing import Callable, Optional, Sized, Union
+
+import pandas as pd
 
 
 def is_pyspark_installed() -> bool:
     """Check if PySpark is installed without importing it."""
     return importlib.util.find_spec("pyspark") is not None
+
+
+class BaseBackend:
+    """Base helper class to select and cache the appropriate backend (Pandas or Spark)."""
+
+    _pandas_module: Optional[str] = None
+    _spark_module: Optional[str] = None
+
+    def __init__(self, df: Union[pd.DataFrame, Sized]):
+        """Determine backend once and store it for all computations."""
+        if isinstance(df, pd.DataFrame):
+            module_path = self._pandas_module
+        else:
+            module_path = self._spark_module
+
+        if module_path is None:
+            raise ValueError("Backend module path not configured")
+
+        self.module = importlib.import_module(module_path)
+        self.module_path = module_path
+
+    def get_method(self, method_name: str) -> Callable:
+        """Retrieve the appropriate function from the backend module."""
+        try:
+            return getattr(self.module, method_name)
+        except AttributeError as ex:
+            raise AttributeError(
+                f"Function '{method_name}' is not available in {self.module_path}."
+            ) from ex

From a1892a275016c5f6bc9eba06e1fdd88bbbbf5379 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 21:10:03 +0800
Subject: [PATCH 08/11] feat: initial release

---
 src/ydata_profiling/model/correlations.py     | 28 +++++++++++---
 src/ydata_profiling/model/missing.py          | 25 ++++++++++---
 .../model/pandas/table_pandas.py              | 27 ++++++++++++--
 .../model/spark/describe_boolean_spark.py     |  2 +
 .../model/spark/describe_date_spark.py        |  2 +
 .../model/spark/describe_generic_spark.py     |  2 +
 .../model/spark/describe_numeric_spark.py     | 12 +++---
 .../model/spark/describe_text_spark.py        |  2 +
 .../model/spark/table_spark.py                | 37 +++++++++++++++++--
 .../model/spark/timeseries_index_spark.py     |  2 +
 src/ydata_profiling/model/summarizer.py       |  3 +-
 src/ydata_profiling/model/table.py            | 37 -------------------
 src/ydata_profiling/utils/backend.py          | 34 +----------------
 13 files changed, 117 insertions(+), 96 deletions(-)

diff --git a/src/ydata_profiling/model/correlations.py b/src/ydata_profiling/model/correlations.py
index 25e2e13c4..2bbaa1112 100644
--- a/src/ydata_profiling/model/correlations.py
+++ b/src/ydata_profiling/model/correlations.py
@@ -3,13 +3,12 @@
 """Correlations between variables."""
 
 import warnings
-from typing import Dict, List, Optional, Sized
+from typing import Dict, List, Optional, Sized, no_type_check
 
 import numpy as np
 import pandas as pd
 
 from ydata_profiling.config import Settings
-from ydata_profiling.utils.backend import BaseBackend
 
 try:
     from pandas.core.base import DataError
@@ -17,11 +16,30 @@
     from pandas.errors import DataError
 
 
-class CorrelationBackend(BaseBackend):
+class CorrelationBackend:
     """Helper class to select and cache the appropriate correlation backend (Pandas or Spark)."""
 
-    _pandas_module = "ydata_profiling.model.pandas.correlations_pandas"
-    _spark_module = "ydata_profiling.model.spark.correlations_spark"
+    @no_type_check
+    def __init__(self, df: Sized):
+        """Determine backend once and store it for all correlation computations."""
+        if isinstance(df, pd.DataFrame):
+            from ydata_profiling.model.pandas import (
+                correlations_pandas as correlation_backend,  # type: ignore
+            )
+        else:
+            from ydata_profiling.model.spark import (
+                correlations_spark as correlation_backend,  # type: ignore
+            )
+
+        self.backend = correlation_backend
+
+    def get_method(self, method_name: str):  # noqa: ANN201
+        """Retrieve the appropriate correlation method class from the backend."""
+        if hasattr(self.backend, method_name):
+            return getattr(self.backend, method_name)
+        raise AttributeError(
+            f"Correlation method '{method_name}' is not available in the backend."
+        )
 
 
 class Correlation:
diff --git a/src/ydata_profiling/model/missing.py b/src/ydata_profiling/model/missing.py
index aa14cc425..46ec2dee3 100644
--- a/src/ydata_profiling/model/missing.py
+++ b/src/ydata_profiling/model/missing.py
@@ -1,17 +1,32 @@
+import importlib
 import warnings
-from typing import Any, Dict, Optional, Sized
+from typing import Any, Callable, Dict, Optional, Sized
 
 import pandas as pd
 
 from ydata_profiling.config import Settings
-from ydata_profiling.utils.backend import BaseBackend
 
 
-class MissingDataBackend(BaseBackend):
+class MissingDataBackend:
     """Helper class to select and cache the appropriate missing-data backend (Pandas or Spark)."""
 
-    _pandas_module = "ydata_profiling.model.pandas.missing_pandas"
-    _spark_module = "ydata_profiling.model.spark.missing_spark"
+    def __init__(self, df: Sized):
+        """Determine backend once and store it for all missing-data computations."""
+        if isinstance(df, pd.DataFrame):
+            self.backend_module = "ydata_profiling.model.pandas.missing_pandas"
+        else:
+            self.backend_module = "ydata_profiling.model.spark.missing_spark"
+
+        self.module = importlib.import_module(self.backend_module)
+
+    def get_method(self, method_name: str) -> Callable:
+        """Retrieve the appropriate missing-data function from the backend module."""
+        try:
+            return getattr(self.module, method_name)
+        except AttributeError as ex:
+            raise AttributeError(
+                f"Missing-data function '{method_name}' is not available in {self.backend_module}."
+            ) from ex
 
 
 class MissingData:
diff --git a/src/ydata_profiling/model/pandas/table_pandas.py b/src/ydata_profiling/model/pandas/table_pandas.py
index 28c79f849..546b369ef 100644
--- a/src/ydata_profiling/model/pandas/table_pandas.py
+++ b/src/ydata_profiling/model/pandas/table_pandas.py
@@ -1,7 +1,9 @@
+from collections import Counter
+
 import pandas as pd
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.table import compute_common_table_stats, get_table_stats
+from ydata_profiling.model.table import get_table_stats
 
 
 @get_table_stats.register
@@ -19,18 +21,35 @@ def pandas_get_table_stats(
         A dictionary that contains the table statistics.
     """
     n = len(df) if not df.empty else 0
-    n_var = len(df.columns)
 
     memory_size = df.memory_usage(deep=config.memory_deep).sum()
     record_size = float(memory_size) / n if n > 0 else 0
 
     table_stats = {
         "n": n,
-        "n_var": n_var,
+        "n_var": len(df.columns),
         "memory_size": memory_size,
         "record_size": record_size,
+        "n_cells_missing": 0,
+        "n_vars_with_missing": 0,
+        "n_vars_all_missing": 0,
     }
 
-    table_stats.update(compute_common_table_stats(n, n_var, variable_stats))
+    for series_summary in variable_stats.values():
+        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
+            table_stats["n_vars_with_missing"] += 1
+            table_stats["n_cells_missing"] += series_summary["n_missing"]
+            if series_summary["n_missing"] == n:
+                table_stats["n_vars_all_missing"] += 1
+
+    table_stats["p_cells_missing"] = (
+        table_stats["n_cells_missing"] / (table_stats["n"] * table_stats["n_var"])
+        if table_stats["n"] > 0 and table_stats["n_var"] > 0
+        else 0
+    )
+
+    table_stats.update(
+        {"types": dict(Counter([v["type"] for v in variable_stats.values()]))}
+    )
 
     return table_stats
diff --git a/src/ydata_profiling/model/spark/describe_boolean_spark.py b/src/ydata_profiling/model/spark/describe_boolean_spark.py
index 148dbce6c..ab5cf20fb 100644
--- a/src/ydata_profiling/model/spark/describe_boolean_spark.py
+++ b/src/ydata_profiling/model/spark/describe_boolean_spark.py
@@ -3,8 +3,10 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
+from ydata_profiling.model.summary_algorithms import describe_boolean_1d
 
 
+@describe_boolean_1d.register
 def describe_boolean_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_date_spark.py b/src/ydata_profiling/model/spark/describe_date_spark.py
index c44d36650..a5e11a0f1 100644
--- a/src/ydata_profiling/model/spark/describe_date_spark.py
+++ b/src/ydata_profiling/model/spark/describe_date_spark.py
@@ -5,6 +5,7 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
+from ydata_profiling.model.summary_algorithms import describe_date_1d
 
 
 def date_stats_spark(df: DataFrame, summary: dict) -> dict:
@@ -18,6 +19,7 @@ def date_stats_spark(df: DataFrame, summary: dict) -> dict:
     return df.agg(*expr).first().asDict()
 
 
+@describe_date_1d.register
 def describe_date_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_generic_spark.py b/src/ydata_profiling/model/spark/describe_generic_spark.py
index 1171881cd..ee2356c0a 100644
--- a/src/ydata_profiling/model/spark/describe_generic_spark.py
+++ b/src/ydata_profiling/model/spark/describe_generic_spark.py
@@ -3,8 +3,10 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
+from ydata_profiling.model.summary_algorithms import describe_generic
 
 
+@describe_generic.register
 def describe_generic_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_numeric_spark.py b/src/ydata_profiling/model/spark/describe_numeric_spark.py
index 8c299577e..395b1461b 100644
--- a/src/ydata_profiling/model/spark/describe_numeric_spark.py
+++ b/src/ydata_profiling/model/spark/describe_numeric_spark.py
@@ -5,13 +5,15 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import histogram_compute
+from ydata_profiling.model.summary_algorithms import (
+    describe_numeric_1d,
+    histogram_compute,
+)
 
 
 def numeric_stats_spark(df: DataFrame, summary: dict) -> dict:
     column = df.columns[0]
 
-    # Removing null types from numeric summary stats to match Pandas defaults which skip na's (skipna=False)
     finite_filter = (
         F.col(column).isNotNull()
         & ~F.isnan(F.col(column))
@@ -32,6 +34,7 @@ def numeric_stats_spark(df: DataFrame, summary: dict) -> dict:
     return non_null_df.agg(*expr).first().asDict()
 
 
+@describe_numeric_1d.register
 def describe_numeric_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
@@ -90,7 +93,6 @@ def describe_numeric_1d_spark(
     quantile_threshold = 0.05
 
     if summary.get("n") == summary.get("n_missing"):
-        # This means the entire column is null/nan, so summary values need to be hard-coded:
         summary.update({f"{percentile:.0%}": np.nan for percentile in quantiles})
 
         summary["mad"] = np.nan
@@ -135,10 +137,6 @@ def describe_numeric_1d_spark(
     # ... https://stackoverflow.com/questions/60221841/how-to-detect-monotonic-decrease-in-pyspark
     summary["monotonic"] = 0
 
-    # this function only displays the top N (see config) values for a histogram.
-    # This might be confusing if there are a lot of values of equal magnitude, but we cannot bring all the values to
-    # display in pandas display
-    # the alternative is to do this in spark natively, but it is not trivial
     infinity_values = [np.inf, -np.inf]
 
     infinity_index = summary["value_counts_without_nan"].index.isin(infinity_values)
diff --git a/src/ydata_profiling/model/spark/describe_text_spark.py b/src/ydata_profiling/model/spark/describe_text_spark.py
index 6d7804cf5..b5e27f615 100644
--- a/src/ydata_profiling/model/spark/describe_text_spark.py
+++ b/src/ydata_profiling/model/spark/describe_text_spark.py
@@ -3,8 +3,10 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
+from ydata_profiling.model.summary_algorithms import describe_text_1d
 
 
+@describe_text_1d.register
 def describe_text_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/table_spark.py b/src/ydata_profiling/model/spark/table_spark.py
index 2a2985059..17ac03323 100644
--- a/src/ydata_profiling/model/spark/table_spark.py
+++ b/src/ydata_profiling/model/spark/table_spark.py
@@ -1,7 +1,9 @@
+from collections import Counter
+
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.table import compute_common_table_stats, get_table_stats
+from ydata_profiling.model.table import get_table_stats
 
 
 @get_table_stats.register
@@ -19,9 +21,36 @@ def get_table_stats_spark(
         A dictionary that contains the table statistics.
     """
     n = df.count()
-    n_var = len(df.columns)
 
-    result = {"n": n, "n_var": n_var}
-    result.update(compute_common_table_stats(n, n_var, variable_stats))
+    result = {"n": n, "n_var": len(df.columns)}
+
+    table_stats = {
+        "n_cells_missing": 0,
+        "n_vars_with_missing": 0,
+        "n_vars_all_missing": 0,
+    }
+
+    for series_summary in variable_stats.values():
+        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
+            table_stats["n_vars_with_missing"] += 1
+            table_stats["n_cells_missing"] += series_summary["n_missing"]
+            if series_summary["n_missing"] == n:
+                table_stats["n_vars_all_missing"] += 1
+
+    if result["n"] * result["n_var"] > 0:
+        table_stats["p_cells_missing"] = (
+            table_stats["n_cells_missing"] / (result["n"] * result["n_var"])
+            if result["n"] > 0
+            else 0
+        )
+    else:
+        table_stats["p_cells_missing"] = 0
+
+    result["p_cells_missing"] = table_stats["p_cells_missing"]
+    result["n_cells_missing"] = table_stats["n_cells_missing"]
+    result["n_vars_all_missing"] = table_stats["n_vars_all_missing"]
+    result["n_vars_with_missing"] = table_stats["n_vars_with_missing"]
+
+    result["types"] = dict(Counter([v["type"] for v in variable_stats.values()]))
 
     return result
diff --git a/src/ydata_profiling/model/spark/timeseries_index_spark.py b/src/ydata_profiling/model/spark/timeseries_index_spark.py
index a31f25ccf..c16204ac3 100644
--- a/src/ydata_profiling/model/spark/timeseries_index_spark.py
+++ b/src/ydata_profiling/model/spark/timeseries_index_spark.py
@@ -2,8 +2,10 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
+from ydata_profiling.model.timeseries_index import get_time_index_description
 
 
+@get_time_index_description.register
 def get_time_index_description_spark(
     config: Settings,
     df: DataFrame,
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index 41b8d6f88..d733a7d36 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,8 +50,9 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
+# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer supporting both Pandas and Spark DataFrames."""
+    """A summarizer for Pandas DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/model/table.py b/src/ydata_profiling/model/table.py
index 6f5c7305d..e5eb6fdc2 100644
--- a/src/ydata_profiling/model/table.py
+++ b/src/ydata_profiling/model/table.py
@@ -1,4 +1,3 @@
-from collections import Counter
 from typing import Any
 
 from multimethod import multimethod
@@ -6,42 +5,6 @@
 from ydata_profiling.config import Settings
 
 
-def compute_common_table_stats(
-    n: int, n_var: int, variable_stats: dict
-) -> dict:
-    """Compute common table statistics shared by Pandas and Spark backends.
-
-    Args:
-        n: Number of rows in the DataFrame
-        n_var: Number of columns (variables)
-        variable_stats: Previously calculated statistic on the DataFrame series
-
-    Returns:
-        A dictionary with common table statistics: missing values counts, percentages, and type counts
-    """
-    table_stats = {
-        "n_cells_missing": 0,
-        "n_vars_with_missing": 0,
-        "n_vars_all_missing": 0,
-    }
-
-    for series_summary in variable_stats.values():
-        if "n_missing" in series_summary and series_summary["n_missing"] > 0:
-            table_stats["n_vars_with_missing"] += 1
-            table_stats["n_cells_missing"] += series_summary["n_missing"]
-            if series_summary["n_missing"] == n:
-                table_stats["n_vars_all_missing"] += 1
-
-    total_cells = n * n_var
-    table_stats["p_cells_missing"] = (
-        table_stats["n_cells_missing"] / total_cells if total_cells > 0 else 0
-    )
-
-    table_stats["types"] = dict(Counter([v["type"] for v in variable_stats.values()]))
-
-    return table_stats
-
-
 @multimethod
 def get_table_stats(config: Settings, df: Any, variable_stats: dict) -> dict:
     raise NotImplementedError()
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index 1cee2aea8..e99d91c11 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,41 +1,9 @@
 """
-    File with backend utilities and helper functions to check the backend being used
+    File with a function to check the backend being used
 """
 import importlib
-from typing import Callable, Optional, Sized, Union
-
-import pandas as pd
 
 
 def is_pyspark_installed() -> bool:
     """Check if PySpark is installed without importing it."""
     return importlib.util.find_spec("pyspark") is not None
-
-
-class BaseBackend:
-    """Base helper class to select and cache the appropriate backend (Pandas or Spark)."""
-
-    _pandas_module: Optional[str] = None
-    _spark_module: Optional[str] = None
-
-    def __init__(self, df: Union[pd.DataFrame, Sized]):
-        """Determine backend once and store it for all computations."""
-        if isinstance(df, pd.DataFrame):
-            module_path = self._pandas_module
-        else:
-            module_path = self._spark_module
-
-        if module_path is None:
-            raise ValueError("Backend module path not configured")
-
-        self.module = importlib.import_module(module_path)
-        self.module_path = module_path
-
-    def get_method(self, method_name: str) -> Callable:
-        """Retrieve the appropriate function from the backend module."""
-        try:
-            return getattr(self.module, method_name)
-        except AttributeError as ex:
-            raise AttributeError(
-                f"Function '{method_name}' is not available in {self.module_path}."
-            ) from ex

From 754677bed6f889c50091ea51e50d833fe47693c4 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 22:44:21 +0800
Subject: [PATCH 09/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 178 ++++++++++--------
 .../pandas/describe_categorical_pandas.py     |   1 -
 .../model/pandas/table_pandas.py              |   1 +
 .../model/spark/describe_boolean_spark.py     |   2 -
 .../model/spark/describe_date_spark.py        |   2 -
 .../model/spark/describe_generic_spark.py     |   2 -
 .../model/spark/describe_numeric_spark.py     |  12 +-
 .../model/spark/describe_text_spark.py        |   2 -
 .../model/spark/missing_spark.py              |   1 -
 .../model/spark/table_spark.py                |   2 +
 .../model/spark/timeseries_index_spark.py     |   4 +-
 .../report/presentation/core/collapse.py      |   2 +-
 .../report/presentation/core/container.py     |   2 +-
 .../report/presentation/core/dropdown.py      |   2 +-
 .../report/presentation/core/renderable.py    |   6 +-
 .../report/presentation/core/root.py          |   4 +-
 .../report/presentation/core/variable.py      |   6 +-
 .../presentation/flavours/flavour_html.py     |   7 +-
 .../presentation/flavours/flavour_widget.py   |   7 +-
 .../report/presentation/flavours/flavours.py  |  20 +-
 20 files changed, 142 insertions(+), 121 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 992c1840c..13722e1cb 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,81 +1,97 @@
-"""
-    Auxiliary handler methods for data summary extraction
-"""
-from typing import Any, Callable, Dict, List, Sequence
-
-import networkx as nx
-from visions import VisionsTypeset
-
-
-def compose(functions: Sequence[Callable]) -> Callable:
-    """
-    Compose a sequence of functions.
-
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
-    """
-
-    def composed_function(*args) -> List[Any]:
-        result = args  # Start with the input arguments
-        for func in functions:
-            result = func(*result) if isinstance(result, tuple) else func(result)
-        return result  # type: ignore
-
-    return composed_function  # type: ignore
-
-
-class Handler:
-    """A generic handler
-
-    Allows any custom mapping between data types and functions
-    """
-
-    def __init__(
-        self,
-        mapping: Dict[str, List[Callable]],
-        typeset: VisionsTypeset,
-        *args,
-        **kwargs
-    ):
-        self.mapping = mapping
-        self.typeset = typeset
-        self._complete_dag()
-
-    def _complete_dag(self) -> None:
-        for from_type, to_type in nx.topological_sort(
-            nx.line_graph(self.typeset.base_graph)
-        ):
-            self.mapping[str(to_type)] = (
-                self.mapping[str(from_type)] + self.mapping[str(to_type)]
-            )
-
-    def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
-        """
-        funcs = self.mapping.get(dtype, [])
-        op = compose(funcs)
-        summary = op(*args)[-1]
-        return summary
-
-
-def get_render_map() -> Dict[str, Callable]:
-    import ydata_profiling.report.structure.variables as render_algorithms
-
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map
+"""
+    Auxiliary handler methods for data summary extraction
+"""
+from typing import Any, Callable, Dict, List, Sequence, Tuple, TypeVar, cast
+
+import networkx as nx
+from visions import VisionsTypeset
+
+T = TypeVar("T")
+SummaryFunction = Callable[..., Tuple[Any, ...]]
+
+
+def compose(functions: Sequence[SummaryFunction]) -> SummaryFunction:
+    """
+    Compose a sequence of functions.
+
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
+    """
+
+    def composed_function(*args: Any) -> Tuple[Any, ...]:
+        result: Tuple[Any, ...] = args
+        for func in functions:
+            step_result = func(*result)
+            if not isinstance(step_result, tuple):
+                result = (step_result,)
+            else:
+                result = step_result
+        return result
+
+    return composed_function
+
+
+class Handler:
+    """A generic handler
+
+    Allows any custom mapping between data types and functions
+    """
+
+    def __init__(
+        self,
+        mapping: Dict[str, List[SummaryFunction]],
+        typeset: VisionsTypeset,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        self.mapping: Dict[str, List[SummaryFunction]] = mapping
+        self.typeset = typeset
+        self._complete_dag()
+
+    def _complete_dag(self) -> None:
+        for from_type, to_type in nx.topological_sort(
+            nx.line_graph(self.typeset.base_graph)
+        ):
+            from_type_str = str(from_type)
+            to_type_str = str(to_type)
+            
+            if from_type_str not in self.mapping:
+                continue
+                
+            if to_type_str in self.mapping:
+                self.mapping[to_type_str] = (
+                    self.mapping[from_type_str] + self.mapping[to_type_str]
+                )
+            else:
+                self.mapping[to_type_str] = self.mapping[from_type_str].copy()
+
+    def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
+        """
+        Returns:
+            object: a tuple containing the config, the dataset series and the summary extracted
+        """
+        funcs = self.mapping.get(dtype, [])
+        op = compose(funcs)
+        result = op(*args)
+        return cast(Dict[str, Any], result[-1])
+
+
+def get_render_map() -> Dict[str, Callable]:
+    import ydata_profiling.report.structure.variables as render_algorithms
+
+    render_map = {
+        "Boolean": render_algorithms.render_boolean,
+        "Numeric": render_algorithms.render_real,
+        "Complex": render_algorithms.render_complex,
+        "Text": render_algorithms.render_text,
+        "DateTime": render_algorithms.render_date,
+        "Categorical": render_algorithms.render_categorical,
+        "URL": render_algorithms.render_url,
+        "Path": render_algorithms.render_path,
+        "File": render_algorithms.render_file,
+        "Image": render_algorithms.render_image,
+        "Unsupported": render_algorithms.render_generic,
+        "TimeSeries": render_algorithms.render_timeseries,
+    }
+
+    return render_map
diff --git a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
index a53f16d91..568aa7a9c 100644
--- a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
+++ b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
@@ -27,7 +27,6 @@ def get_character_counts_vc(vc: pd.Series) -> pd.Series:
     if len(counts) > 0:
         counts = counts.groupby(level=0, sort=False).sum()
         counts = counts.sort_values(ascending=False)
-        # FIXME: correct in split, below should be zero: print(counts.loc[''])
         counts = counts[counts.index.str.len() > 0]
     return counts
 
diff --git a/src/ydata_profiling/model/pandas/table_pandas.py b/src/ydata_profiling/model/pandas/table_pandas.py
index 546b369ef..a919ee33b 100644
--- a/src/ydata_profiling/model/pandas/table_pandas.py
+++ b/src/ydata_profiling/model/pandas/table_pandas.py
@@ -48,6 +48,7 @@ def pandas_get_table_stats(
         else 0
     )
 
+    # Variable type counts
     table_stats.update(
         {"types": dict(Counter([v["type"] for v in variable_stats.values()]))}
     )
diff --git a/src/ydata_profiling/model/spark/describe_boolean_spark.py b/src/ydata_profiling/model/spark/describe_boolean_spark.py
index ab5cf20fb..148dbce6c 100644
--- a/src/ydata_profiling/model/spark/describe_boolean_spark.py
+++ b/src/ydata_profiling/model/spark/describe_boolean_spark.py
@@ -3,10 +3,8 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import describe_boolean_1d
 
 
-@describe_boolean_1d.register
 def describe_boolean_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_date_spark.py b/src/ydata_profiling/model/spark/describe_date_spark.py
index a5e11a0f1..c44d36650 100644
--- a/src/ydata_profiling/model/spark/describe_date_spark.py
+++ b/src/ydata_profiling/model/spark/describe_date_spark.py
@@ -5,7 +5,6 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import describe_date_1d
 
 
 def date_stats_spark(df: DataFrame, summary: dict) -> dict:
@@ -19,7 +18,6 @@ def date_stats_spark(df: DataFrame, summary: dict) -> dict:
     return df.agg(*expr).first().asDict()
 
 
-@describe_date_1d.register
 def describe_date_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_generic_spark.py b/src/ydata_profiling/model/spark/describe_generic_spark.py
index ee2356c0a..1171881cd 100644
--- a/src/ydata_profiling/model/spark/describe_generic_spark.py
+++ b/src/ydata_profiling/model/spark/describe_generic_spark.py
@@ -3,10 +3,8 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import describe_generic
 
 
-@describe_generic.register
 def describe_generic_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/describe_numeric_spark.py b/src/ydata_profiling/model/spark/describe_numeric_spark.py
index 395b1461b..8c299577e 100644
--- a/src/ydata_profiling/model/spark/describe_numeric_spark.py
+++ b/src/ydata_profiling/model/spark/describe_numeric_spark.py
@@ -5,15 +5,13 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import (
-    describe_numeric_1d,
-    histogram_compute,
-)
+from ydata_profiling.model.summary_algorithms import histogram_compute
 
 
 def numeric_stats_spark(df: DataFrame, summary: dict) -> dict:
     column = df.columns[0]
 
+    # Removing null types from numeric summary stats to match Pandas defaults which skip na's (skipna=False)
     finite_filter = (
         F.col(column).isNotNull()
         & ~F.isnan(F.col(column))
@@ -34,7 +32,6 @@ def numeric_stats_spark(df: DataFrame, summary: dict) -> dict:
     return non_null_df.agg(*expr).first().asDict()
 
 
-@describe_numeric_1d.register
 def describe_numeric_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
@@ -93,6 +90,7 @@ def describe_numeric_1d_spark(
     quantile_threshold = 0.05
 
     if summary.get("n") == summary.get("n_missing"):
+        # This means the entire column is null/nan, so summary values need to be hard-coded:
         summary.update({f"{percentile:.0%}": np.nan for percentile in quantiles})
 
         summary["mad"] = np.nan
@@ -137,6 +135,10 @@ def describe_numeric_1d_spark(
     # ... https://stackoverflow.com/questions/60221841/how-to-detect-monotonic-decrease-in-pyspark
     summary["monotonic"] = 0
 
+    # this function only displays the top N (see config) values for a histogram.
+    # This might be confusing if there are a lot of values of equal magnitude, but we cannot bring all the values to
+    # display in pandas display
+    # the alternative is to do this in spark natively, but it is not trivial
     infinity_values = [np.inf, -np.inf]
 
     infinity_index = summary["value_counts_without_nan"].index.isin(infinity_values)
diff --git a/src/ydata_profiling/model/spark/describe_text_spark.py b/src/ydata_profiling/model/spark/describe_text_spark.py
index b5e27f615..6d7804cf5 100644
--- a/src/ydata_profiling/model/spark/describe_text_spark.py
+++ b/src/ydata_profiling/model/spark/describe_text_spark.py
@@ -3,10 +3,8 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.summary_algorithms import describe_text_1d
 
 
-@describe_text_1d.register
 def describe_text_1d_spark(
     config: Settings, df: DataFrame, summary: dict
 ) -> Tuple[Settings, DataFrame, dict]:
diff --git a/src/ydata_profiling/model/spark/missing_spark.py b/src/ydata_profiling/model/spark/missing_spark.py
index deacf1b89..02529dceb 100644
--- a/src/ydata_profiling/model/spark/missing_spark.py
+++ b/src/ydata_profiling/model/spark/missing_spark.py
@@ -56,7 +56,6 @@ def __len__(self) -> Optional[int]:
 def missing_bar(config: Settings, df: DataFrame) -> str:
     import pyspark.sql.functions as F
 
-    # FIXME: move to univariate
     data_nan_counts = (
         df.agg(
             *[F.count(F.when(F.isnull(c) | F.isnan(c), c)).alias(c) for c in df.columns]
diff --git a/src/ydata_profiling/model/spark/table_spark.py b/src/ydata_profiling/model/spark/table_spark.py
index 17ac03323..33e862e61 100644
--- a/src/ydata_profiling/model/spark/table_spark.py
+++ b/src/ydata_profiling/model/spark/table_spark.py
@@ -37,6 +37,7 @@ def get_table_stats_spark(
             if series_summary["n_missing"] == n:
                 table_stats["n_vars_all_missing"] += 1
 
+    # without this check we'll get a div by zero error
     if result["n"] * result["n_var"] > 0:
         table_stats["p_cells_missing"] = (
             table_stats["n_cells_missing"] / (result["n"] * result["n_var"])
@@ -51,6 +52,7 @@ def get_table_stats_spark(
     result["n_vars_all_missing"] = table_stats["n_vars_all_missing"]
     result["n_vars_with_missing"] = table_stats["n_vars_with_missing"]
 
+    # Variable type counts
     result["types"] = dict(Counter([v["type"] for v in variable_stats.values()]))
 
     return result
diff --git a/src/ydata_profiling/model/spark/timeseries_index_spark.py b/src/ydata_profiling/model/spark/timeseries_index_spark.py
index c16204ac3..e8145d76c 100644
--- a/src/ydata_profiling/model/spark/timeseries_index_spark.py
+++ b/src/ydata_profiling/model/spark/timeseries_index_spark.py
@@ -2,11 +2,9 @@
 from pyspark.sql import DataFrame
 
 from ydata_profiling.config import Settings
-from ydata_profiling.model.timeseries_index import get_time_index_description
 
 
-@get_time_index_description.register
-def get_time_index_description_spark(
+def spark_get_time_index_description_spark(
     config: Settings,
     df: DataFrame,
     table_stats: dict,
diff --git a/src/ydata_profiling/report/presentation/core/collapse.py b/src/ydata_profiling/report/presentation/core/collapse.py
index a7dba34f1..9bc393602 100644
--- a/src/ydata_profiling/report/presentation/core/collapse.py
+++ b/src/ydata_profiling/report/presentation/core/collapse.py
@@ -6,7 +6,7 @@
 
 
 class Collapse(ItemRenderer):
-    def __init__(self, button: ToggleButton, item: Renderable, **kwargs):
+    def __init__(self, button: ToggleButton, item: Renderable, **kwargs: Any):
         super().__init__("collapse", {"button": button, "item": item}, **kwargs)
 
     def __repr__(self) -> str:
diff --git a/src/ydata_profiling/report/presentation/core/container.py b/src/ydata_profiling/report/presentation/core/container.py
index c82f06266..d4ed121ca 100644
--- a/src/ydata_profiling/report/presentation/core/container.py
+++ b/src/ydata_profiling/report/presentation/core/container.py
@@ -13,7 +13,7 @@ def __init__(
         anchor_id: Optional[str] = None,
         classes: Optional[str] = None,
         oss: Optional[bool] = None,
-        **kwargs,
+        **kwargs: Any,
     ):
         args = {"items": items, "nested": nested}
         args.update(**kwargs)
diff --git a/src/ydata_profiling/report/presentation/core/dropdown.py b/src/ydata_profiling/report/presentation/core/dropdown.py
index c1c2f274e..4c9dfb3a9 100644
--- a/src/ydata_profiling/report/presentation/core/dropdown.py
+++ b/src/ydata_profiling/report/presentation/core/dropdown.py
@@ -15,7 +15,7 @@ def __init__(
         anchor_id: str,
         classes: list,
         is_row: bool,
-        **kwargs
+        **kwargs: Any,
     ):
         super().__init__(
             "dropdown",
diff --git a/src/ydata_profiling/report/presentation/core/renderable.py b/src/ydata_profiling/report/presentation/core/renderable.py
index 3f7f09f6c..028151532 100644
--- a/src/ydata_profiling/report/presentation/core/renderable.py
+++ b/src/ydata_profiling/report/presentation/core/renderable.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional
 
 
 class Renderable(ABC):
@@ -34,9 +34,9 @@ def classes(self) -> str:
     def render(self) -> Any:
         pass
 
-    def __str__(self):
+    def __str__(self) -> str:
         return self.__class__.__name__
 
     @classmethod
-    def convert_to_class(cls, obj: "Renderable", flavour_func) -> None:  # noqa: ANN001
+    def convert_to_class(cls, obj: "Renderable", flavour_func: Callable) -> None:
         obj.__class__ = cls
diff --git a/src/ydata_profiling/report/presentation/core/root.py b/src/ydata_profiling/report/presentation/core/root.py
index 0c3f1e3c9..6e96e7f14 100644
--- a/src/ydata_profiling/report/presentation/core/root.py
+++ b/src/ydata_profiling/report/presentation/core/root.py
@@ -11,7 +11,7 @@ class Root(ItemRenderer):
     """
 
     def __init__(
-        self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs
+        self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs: Any
     ):
         super().__init__(
             "report",
@@ -23,7 +23,7 @@ def __init__(
     def __repr__(self) -> str:
         return "Root"
 
-    def render(self, **kwargs) -> Any:
+    def render(self, **kwargs: Any) -> Any:
         raise NotImplementedError()
 
     @classmethod
diff --git a/src/ydata_profiling/report/presentation/core/variable.py b/src/ydata_profiling/report/presentation/core/variable.py
index cdf063202..34bd110a8 100644
--- a/src/ydata_profiling/report/presentation/core/variable.py
+++ b/src/ydata_profiling/report/presentation/core/variable.py
@@ -10,13 +10,13 @@ def __init__(
         top: Renderable,
         bottom: Optional[Renderable] = None,
         ignore: bool = False,
-        **kwargs,
+        **kwargs: Any,
     ):
         super().__init__(
             "variable", {"top": top, "bottom": bottom, "ignore": ignore}, **kwargs
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         top_text = str(self.content["top"]).replace("\n", "\n\t")
         bottom_text = str(self.content["bottom"]).replace("\n", "\n\t")
 
@@ -25,7 +25,7 @@ def __str__(self):
         text += f"- bottom: {bottom_text}"
         return text
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "Variable"
 
     def render(self) -> Any:
diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_html.py b/src/ydata_profiling/report/presentation/flavours/flavour_html.py
index b342ff32f..7ad2b9c1d 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavour_html.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavour_html.py
@@ -41,7 +41,10 @@
     HTMLVariableInfo,
 )
 
-html_mapping = {
+from typing import cast
+from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping
+
+html_mapping = cast(_FlavourMapping, {
     Container: HTMLContainer,
     Variable: HTMLVariable,
     VariableInfo: HTMLVariableInfo,
@@ -59,6 +62,6 @@
     Collapse: HTMLCollapse,
     CorrelationTable: HTMLCorrelationTable,
     Scores: HTMLScores,
-}
+})
 
 register_flavour("html", html_mapping)
diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py b/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
index b95d724f1..29ff1ad2c 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
@@ -39,7 +39,10 @@
     WidgetVariableInfo,
 )
 
-widget_mapping = {
+from typing import cast
+from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping
+
+widget_mapping = cast(_FlavourMapping, {
     Container: WidgetContainer,
     Variable: WidgetVariable,
     VariableInfo: WidgetVariableInfo,
@@ -56,6 +59,6 @@
     ToggleButton: WidgetToggleButton,
     Collapse: WidgetCollapse,
     CorrelationTable: WidgetCorrelationTable,
-}
+})
 
 register_flavour("widget", widget_mapping)
diff --git a/src/ydata_profiling/report/presentation/flavours/flavours.py b/src/ydata_profiling/report/presentation/flavours/flavours.py
index 10a5fa522..e31aa1e3c 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavours.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavours.py
@@ -1,26 +1,32 @@
 """
     Flavours registry information
 """
+from typing import Callable, Dict, Type
+
 from ydata_profiling.report.presentation.core import Root
 from ydata_profiling.report.presentation.core.renderable import Renderable
 
-_FLAVOUR_REGISTRY: dict = {}
+_FlavourMapping = Dict[Type[Renderable], Type[Renderable]]
+_FLAVOUR_REGISTRY: Dict[str, _FlavourMapping] = {}
 
 
-def register_flavour(name: str, mapping: dict) -> None:
+def register_flavour(name: str, mapping: _FlavourMapping) -> None:
     _FLAVOUR_REGISTRY[name] = mapping
 
 
-def get_flavour_mapping(name: str) -> dict:
+def get_flavour_mapping(name: str) -> _FlavourMapping:
     if name not in _FLAVOUR_REGISTRY:
         raise ValueError(f"Flavour '{name}' is not registered.")
     return _FLAVOUR_REGISTRY[name]
 
 
+_FlavourFunc = Callable[[Renderable], Renderable]
+
+
 def apply_renderable_mapping(
-    mapping: dict,
+    mapping: _FlavourMapping,
     structure: Renderable,
-    flavour_func,  # noqa: ANN001
+    flavour_func: _FlavourFunc,
 ) -> None:
     mapping[type(structure)].convert_to_class(structure, flavour_func)
 
@@ -29,7 +35,7 @@ def HTMLReport(structure: Root) -> Root:
     from ydata_profiling.report.presentation.flavours import flavour_html  # noqa: F401
 
     mapping = get_flavour_mapping("html")
-    apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport)
+    apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport)  # type: ignore
     return structure
 
 
@@ -39,5 +45,5 @@ def WidgetReport(structure: Root) -> Root:
     )
 
     mapping = get_flavour_mapping("widget")
-    apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport)
+    apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport)  # type: ignore
     return structure

From 307270e5d995c4a7c15e58a3c803b589086f0ff6 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 23:15:01 +0800
Subject: [PATCH 10/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 67 ++++++++++---------
 .../pandas/describe_categorical_pandas.py     |  1 +
 .../model/spark/missing_spark.py              |  2 -
 .../report/presentation/core/collapse.py      |  2 +-
 .../report/presentation/core/container.py     |  2 +-
 .../report/presentation/core/dropdown.py      |  2 +-
 .../report/presentation/core/renderable.py    |  4 +-
 .../report/presentation/core/root.py          |  4 +-
 .../report/presentation/core/variable.py      |  6 +-
 .../presentation/flavours/flavour_html.py     |  7 +-
 .../presentation/flavours/flavour_widget.py   |  7 +-
 .../report/presentation/flavours/flavours.py  | 20 +++---
 12 files changed, 57 insertions(+), 67 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 13722e1cb..4ea43192a 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,32 +1,33 @@
 """
     Auxiliary handler methods for data summary extraction
 """
-from typing import Any, Callable, Dict, List, Sequence, Tuple, TypeVar, cast
+from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
 
 import networkx as nx
 from visions import VisionsTypeset
 
-T = TypeVar("T")
-SummaryFunction = Callable[..., Tuple[Any, ...]]
 
-
-def compose(functions: Sequence[SummaryFunction]) -> SummaryFunction:
+def compose(functions: Sequence[Callable]) -> Callable:
     """
     Compose a sequence of functions.
 
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
+    Each function in the sequence receives the result of the previous function.
+    Functions are expected to accept and return tuples for proper chaining.
+
+    :param functions: sequence of functions that accept and return tuples
+    :return: combined function applying all functions in order
     """
 
     def composed_function(*args: Any) -> Tuple[Any, ...]:
-        result: Tuple[Any, ...] = args
+        result: Union[Tuple[Any, ...], Any] = args
         for func in functions:
-            step_result = func(*result)
-            if not isinstance(step_result, tuple):
-                result = (step_result,)
+            if isinstance(result, tuple):
+                result = func(*result)
             else:
-                result = step_result
-        return result
+                result = func(result)
+        if isinstance(result, tuple):
+            return result
+        return (result,)
 
     return composed_function
 
@@ -34,17 +35,18 @@ def composed_function(*args: Any) -> Tuple[Any, ...]:
 class Handler:
     """A generic handler
 
-    Allows any custom mapping between data types and functions
+    Allows any custom mapping between data types and functions.
+    Functions are composed based on the type hierarchy defined in the typeset.
     """
 
     def __init__(
         self,
-        mapping: Dict[str, List[SummaryFunction]],
+        mapping: Dict[str, List[Callable]],
         typeset: VisionsTypeset,
         *args: Any,
-        **kwargs: Any,
-    ) -> None:
-        self.mapping: Dict[str, List[SummaryFunction]] = mapping
+        **kwargs: Any
+    ):
+        self.mapping = mapping
         self.typeset = typeset
         self._complete_dag()
 
@@ -52,28 +54,27 @@ def _complete_dag(self) -> None:
         for from_type, to_type in nx.topological_sort(
             nx.line_graph(self.typeset.base_graph)
         ):
-            from_type_str = str(from_type)
-            to_type_str = str(to_type)
-            
-            if from_type_str not in self.mapping:
-                continue
-                
-            if to_type_str in self.mapping:
-                self.mapping[to_type_str] = (
-                    self.mapping[from_type_str] + self.mapping[to_type_str]
-                )
-            else:
-                self.mapping[to_type_str] = self.mapping[from_type_str].copy()
+            from_key = str(from_type)
+            to_key = str(to_type)
+            self.mapping[to_key] = self.mapping.get(from_key, []) + self.mapping.get(
+                to_key, []
+            )
 
     def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
         """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
+        Execute the handler chain for the given data type.
+
+        :param dtype: the data type to handle
+        :param args: arguments to pass to the handler functions
+        :param kwargs: keyword arguments (currently unused but reserved for extensibility)
+        :return: a dictionary containing the summary extracted from the data
         """
         funcs = self.mapping.get(dtype, [])
         op = compose(funcs)
         result = op(*args)
-        return cast(Dict[str, Any], result[-1])
+        if result:
+            return result[-1] if isinstance(result[-1], dict) else {}
+        return {}
 
 
 def get_render_map() -> Dict[str, Callable]:
diff --git a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
index 568aa7a9c..a53f16d91 100644
--- a/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
+++ b/src/ydata_profiling/model/pandas/describe_categorical_pandas.py
@@ -27,6 +27,7 @@ def get_character_counts_vc(vc: pd.Series) -> pd.Series:
     if len(counts) > 0:
         counts = counts.groupby(level=0, sort=False).sum()
         counts = counts.sort_values(ascending=False)
+        # FIXME: correct in split, below should be zero: print(counts.loc[''])
         counts = counts[counts.index.str.len() > 0]
     return counts
 
diff --git a/src/ydata_profiling/model/spark/missing_spark.py b/src/ydata_profiling/model/spark/missing_spark.py
index 02529dceb..5ad367e6e 100644
--- a/src/ydata_profiling/model/spark/missing_spark.py
+++ b/src/ydata_profiling/model/spark/missing_spark.py
@@ -82,11 +82,9 @@ def missing_matrix(config: Settings, df: DataFrame) -> str:
 def missing_heatmap(config: Settings, df: DataFrame) -> str:
     df = MissingnoBarSparkPatch(df, columns=df.columns, original_df_size=df.count())
 
-    # Remove completely filled or completely empty variables.
     columns = [i for i, n in enumerate(np.var(df.isnull(), axis="rows")) if n > 0]
     df = df.iloc[:, columns]
 
-    # Create and mask the correlation matrix. Construct the base heatmap.
     corr_mat = df.isnull().corr()
     mask = np.zeros_like(corr_mat)
     mask[np.triu_indices_from(mask)] = True
diff --git a/src/ydata_profiling/report/presentation/core/collapse.py b/src/ydata_profiling/report/presentation/core/collapse.py
index 9bc393602..a7dba34f1 100644
--- a/src/ydata_profiling/report/presentation/core/collapse.py
+++ b/src/ydata_profiling/report/presentation/core/collapse.py
@@ -6,7 +6,7 @@
 
 
 class Collapse(ItemRenderer):
-    def __init__(self, button: ToggleButton, item: Renderable, **kwargs: Any):
+    def __init__(self, button: ToggleButton, item: Renderable, **kwargs):
         super().__init__("collapse", {"button": button, "item": item}, **kwargs)
 
     def __repr__(self) -> str:
diff --git a/src/ydata_profiling/report/presentation/core/container.py b/src/ydata_profiling/report/presentation/core/container.py
index d4ed121ca..c82f06266 100644
--- a/src/ydata_profiling/report/presentation/core/container.py
+++ b/src/ydata_profiling/report/presentation/core/container.py
@@ -13,7 +13,7 @@ def __init__(
         anchor_id: Optional[str] = None,
         classes: Optional[str] = None,
         oss: Optional[bool] = None,
-        **kwargs: Any,
+        **kwargs,
     ):
         args = {"items": items, "nested": nested}
         args.update(**kwargs)
diff --git a/src/ydata_profiling/report/presentation/core/dropdown.py b/src/ydata_profiling/report/presentation/core/dropdown.py
index 4c9dfb3a9..c1c2f274e 100644
--- a/src/ydata_profiling/report/presentation/core/dropdown.py
+++ b/src/ydata_profiling/report/presentation/core/dropdown.py
@@ -15,7 +15,7 @@ def __init__(
         anchor_id: str,
         classes: list,
         is_row: bool,
-        **kwargs: Any,
+        **kwargs
     ):
         super().__init__(
             "dropdown",
diff --git a/src/ydata_profiling/report/presentation/core/renderable.py b/src/ydata_profiling/report/presentation/core/renderable.py
index 028151532..84265c1c6 100644
--- a/src/ydata_profiling/report/presentation/core/renderable.py
+++ b/src/ydata_profiling/report/presentation/core/renderable.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Dict, Optional
 
 
 class Renderable(ABC):
@@ -38,5 +38,5 @@ def __str__(self) -> str:
         return self.__class__.__name__
 
     @classmethod
-    def convert_to_class(cls, obj: "Renderable", flavour_func: Callable) -> None:
+    def convert_to_class(cls, obj: "Renderable", flavour_func) -> None:
         obj.__class__ = cls
diff --git a/src/ydata_profiling/report/presentation/core/root.py b/src/ydata_profiling/report/presentation/core/root.py
index 6e96e7f14..0c3f1e3c9 100644
--- a/src/ydata_profiling/report/presentation/core/root.py
+++ b/src/ydata_profiling/report/presentation/core/root.py
@@ -11,7 +11,7 @@ class Root(ItemRenderer):
     """
 
     def __init__(
-        self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs: Any
+        self, name: str, body: Renderable, footer: Renderable, style: Style, **kwargs
     ):
         super().__init__(
             "report",
@@ -23,7 +23,7 @@ def __init__(
     def __repr__(self) -> str:
         return "Root"
 
-    def render(self, **kwargs: Any) -> Any:
+    def render(self, **kwargs) -> Any:
         raise NotImplementedError()
 
     @classmethod
diff --git a/src/ydata_profiling/report/presentation/core/variable.py b/src/ydata_profiling/report/presentation/core/variable.py
index 34bd110a8..cdf063202 100644
--- a/src/ydata_profiling/report/presentation/core/variable.py
+++ b/src/ydata_profiling/report/presentation/core/variable.py
@@ -10,13 +10,13 @@ def __init__(
         top: Renderable,
         bottom: Optional[Renderable] = None,
         ignore: bool = False,
-        **kwargs: Any,
+        **kwargs,
     ):
         super().__init__(
             "variable", {"top": top, "bottom": bottom, "ignore": ignore}, **kwargs
         )
 
-    def __str__(self) -> str:
+    def __str__(self):
         top_text = str(self.content["top"]).replace("\n", "\n\t")
         bottom_text = str(self.content["bottom"]).replace("\n", "\n\t")
 
@@ -25,7 +25,7 @@ def __str__(self) -> str:
         text += f"- bottom: {bottom_text}"
         return text
 
-    def __repr__(self) -> str:
+    def __repr__(self):
         return "Variable"
 
     def render(self) -> Any:
diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_html.py b/src/ydata_profiling/report/presentation/flavours/flavour_html.py
index 7ad2b9c1d..b342ff32f 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavour_html.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavour_html.py
@@ -41,10 +41,7 @@
     HTMLVariableInfo,
 )
 
-from typing import cast
-from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping
-
-html_mapping = cast(_FlavourMapping, {
+html_mapping = {
     Container: HTMLContainer,
     Variable: HTMLVariable,
     VariableInfo: HTMLVariableInfo,
@@ -62,6 +59,6 @@
     Collapse: HTMLCollapse,
     CorrelationTable: HTMLCorrelationTable,
     Scores: HTMLScores,
-})
+}
 
 register_flavour("html", html_mapping)
diff --git a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py b/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
index 29ff1ad2c..b95d724f1 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavour_widget.py
@@ -39,10 +39,7 @@
     WidgetVariableInfo,
 )
 
-from typing import cast
-from ydata_profiling.report.presentation.flavours.flavours import _FlavourMapping
-
-widget_mapping = cast(_FlavourMapping, {
+widget_mapping = {
     Container: WidgetContainer,
     Variable: WidgetVariable,
     VariableInfo: WidgetVariableInfo,
@@ -59,6 +56,6 @@
     ToggleButton: WidgetToggleButton,
     Collapse: WidgetCollapse,
     CorrelationTable: WidgetCorrelationTable,
-})
+}
 
 register_flavour("widget", widget_mapping)
diff --git a/src/ydata_profiling/report/presentation/flavours/flavours.py b/src/ydata_profiling/report/presentation/flavours/flavours.py
index e31aa1e3c..5b7551d99 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavours.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavours.py
@@ -1,32 +1,28 @@
 """
     Flavours registry information
 """
-from typing import Callable, Dict, Type
+from typing import Callable
 
 from ydata_profiling.report.presentation.core import Root
 from ydata_profiling.report.presentation.core.renderable import Renderable
 
-_FlavourMapping = Dict[Type[Renderable], Type[Renderable]]
-_FLAVOUR_REGISTRY: Dict[str, _FlavourMapping] = {}
+_FLAVOUR_REGISTRY: dict = {}
 
 
-def register_flavour(name: str, mapping: _FlavourMapping) -> None:
+def register_flavour(name: str, mapping: dict) -> None:
     _FLAVOUR_REGISTRY[name] = mapping
 
 
-def get_flavour_mapping(name: str) -> _FlavourMapping:
+def get_flavour_mapping(name: str) -> dict:
     if name not in _FLAVOUR_REGISTRY:
         raise ValueError(f"Flavour '{name}' is not registered.")
     return _FLAVOUR_REGISTRY[name]
 
 
-_FlavourFunc = Callable[[Renderable], Renderable]
-
-
 def apply_renderable_mapping(
-    mapping: _FlavourMapping,
+    mapping: dict,
     structure: Renderable,
-    flavour_func: _FlavourFunc,
+    flavour_func: Callable[[Renderable], None],
 ) -> None:
     mapping[type(structure)].convert_to_class(structure, flavour_func)
 
@@ -35,7 +31,7 @@ def HTMLReport(structure: Root) -> Root:
     from ydata_profiling.report.presentation.flavours import flavour_html  # noqa: F401
 
     mapping = get_flavour_mapping("html")
-    apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport)  # type: ignore
+    apply_renderable_mapping(mapping, structure, flavour_func=HTMLReport)
     return structure
 
 
@@ -45,5 +41,5 @@ def WidgetReport(structure: Root) -> Root:
     )
 
     mapping = get_flavour_mapping("widget")
-    apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport)  # type: ignore
+    apply_renderable_mapping(mapping, structure, flavour_func=WidgetReport)
     return structure

From ae0223356748b7299c3c8c24048337647fa7a658 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 23:34:50 +0800
Subject: [PATCH 11/11] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 62 ++++++++----------
 .../model/spark/missing_spark.py              | 64 +++++++++++--------
 .../report/presentation/core/renderable.py    |  9 ++-
 .../report/presentation/flavours/flavours.py  | 27 ++++++--
 .../presentation/flavours/html/table.py       |  2 +-
 .../presentation/flavours/html/templates.py   |  1 +
 .../presentation/frequency_table_utils.py     |  9 +--
 7 files changed, 100 insertions(+), 74 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 4ea43192a..e9ba6a39a 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,33 +1,31 @@
 """
     Auxiliary handler methods for data summary extraction
 """
-from typing import Any, Callable, Dict, List, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Sequence, Tuple
 
 import networkx as nx
 from visions import VisionsTypeset
 
 
-def compose(functions: Sequence[Callable]) -> Callable:
+def compose(functions: Sequence[Callable[..., Any]]) -> Callable[..., Tuple[Any, ...]]:
     """
     Compose a sequence of functions.
 
-    Each function in the sequence receives the result of the previous function.
-    Functions are expected to accept and return tuples for proper chaining.
-
-    :param functions: sequence of functions that accept and return tuples
-    :return: combined function applying all functions in order
+    Each function in the sequence should accept the arguments passed to the composed
+    function and return either a single value or a tuple of values.
+    
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
     """
 
     def composed_function(*args: Any) -> Tuple[Any, ...]:
-        result: Union[Tuple[Any, ...], Any] = args
+        result: Tuple[Any, ...] = args
         for func in functions:
-            if isinstance(result, tuple):
-                result = func(*result)
-            else:
-                result = func(result)
-        if isinstance(result, tuple):
-            return result
-        return (result,)
+            result = func(*result)
+            # Ensure result is always a tuple for consistent unpacking
+            if not isinstance(result, tuple):
+                result = (result,)
+        return result
 
     return composed_function
 
@@ -35,13 +33,12 @@ def composed_function(*args: Any) -> Tuple[Any, ...]:
 class Handler:
     """A generic handler
 
-    Allows any custom mapping between data types and functions.
-    Functions are composed based on the type hierarchy defined in the typeset.
+    Allows any custom mapping between data types and functions
     """
 
     def __init__(
         self,
-        mapping: Dict[str, List[Callable]],
+        mapping: Dict[str, List[Callable[..., Any]]],
         typeset: VisionsTypeset,
         *args: Any,
         **kwargs: Any
@@ -54,33 +51,28 @@ def _complete_dag(self) -> None:
         for from_type, to_type in nx.topological_sort(
             nx.line_graph(self.typeset.base_graph)
         ):
-            from_key = str(from_type)
-            to_key = str(to_type)
-            self.mapping[to_key] = self.mapping.get(from_key, []) + self.mapping.get(
-                to_key, []
+            self.mapping[str(to_type)] = (
+                self.mapping[str(from_type)] + self.mapping[str(to_type)]
             )
 
-    def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
+    def handle(self, dtype: str, *args: Any, **kwargs: Any) -> Any:
         """
-        Execute the handler chain for the given data type.
-
-        :param dtype: the data type to handle
-        :param args: arguments to pass to the handler functions
-        :param kwargs: keyword arguments (currently unused but reserved for extensibility)
-        :return: a dictionary containing the summary extracted from the data
+        Execute the handler chain for the given dtype.
+        
+        :param dtype: The data type to handle
+        :param args: Arguments to pass to the handler chain
+        :return: The last element of the result tuple from the handler chain
         """
         funcs = self.mapping.get(dtype, [])
         op = compose(funcs)
-        result = op(*args)
-        if result:
-            return result[-1] if isinstance(result[-1], dict) else {}
-        return {}
+        summary = op(*args)[-1]
+        return summary
 
 
-def get_render_map() -> Dict[str, Callable]:
+def get_render_map() -> Dict[str, Callable[..., Any]]:
     import ydata_profiling.report.structure.variables as render_algorithms
 
-    render_map = {
+    render_map: Dict[str, Callable[..., Any]] = {
         "Boolean": render_algorithms.render_boolean,
         "Numeric": render_algorithms.render_real,
         "Complex": render_algorithms.render_complex,
diff --git a/src/ydata_profiling/model/spark/missing_spark.py b/src/ydata_profiling/model/spark/missing_spark.py
index 5ad367e6e..384670232 100644
--- a/src/ydata_profiling/model/spark/missing_spark.py
+++ b/src/ydata_profiling/model/spark/missing_spark.py
@@ -13,47 +13,47 @@
 
 class MissingnoBarSparkPatch:
     """
-    Technical Debt :
-    This is a monkey patching object that allows usage of the library missingno as is for spark dataframes.
-    This is because missingno library's bar function always applies a isnull().sum() on dataframes in the visualisation
-    function, instead of allowing just values counts as an entry point. Thus, in order to calculate the
-    missing values dataframe in spark, we compute it first, then wrap it in this MissingnoBarSparkPatch object which
-    will be unwrapped by missingno and return the pre-computed value counts.
-    The best fix to this currently terrible patch is to submit a PR to missingno to separate preprocessing function
-    (compute value counts from df) and visualisation functions such that we can call the visualisation directly.
-    Unfortunately, the missingno library people have not really responded to our issues on gitlab.
-    See https://github.com/ResidentMario/missingno/issues/119.
-    We could also fork the missingno library and implement some of the code in our database, but that feels
-    like bad practice as well.
+    Adapter class to enable missingno library compatibility with Spark DataFrames.
+    
+    The missingno library's visualization functions internally call isnull().sum() 
+    on dataframes. For Spark DataFrames, we pre-compute the null counts and wrap
+    them in this adapter to provide the expected interface.
+    
+    Note: This is a workaround for missingno's lack of separation between
+    data preprocessing and visualization. See:
+    https://github.com/ResidentMario/missingno/issues/119
     """
 
     def __init__(
-        self, df: DataFrame, columns: List[str] = None, original_df_size: int = None
+        self, 
+        df: DataFrame, 
+        columns: Optional[List[str]] = None, 
+        original_df_size: Optional[int] = None
     ):
         self.df = df
         self.columns = columns
         self.original_df_size = original_df_size
 
-    def isnull(self) -> Any:
-        """
-        This patches the .isnull().sum() function called by missingno library
-        """
-        return self  # return self to patch .sum() function
+    def isnull(self) -> "MissingnoBarSparkPatch":
+        """Returns self to enable chained .isnull().sum() calls."""
+        return self
 
     def sum(self) -> DataFrame:
-        """
-        This patches the .sum() function called by missingno library
-        """
-        return self.df  # return unwrapped dataframe
+        """Returns the pre-computed null counts dataframe."""
+        return self.df
 
     def __len__(self) -> Optional[int]:
-        """
-        This patches the len(df) function called by missingno library
-        """
+        """Returns the original dataframe size."""
         return self.original_df_size
 
 
 def missing_bar(config: Settings, df: DataFrame) -> str:
+    """Generate a missing values bar chart for Spark DataFrame.
+    
+    :param config: Report settings
+    :param df: Spark DataFrame
+    :return: HTML string of the bar chart
+    """
     import pyspark.sql.functions as F
 
     data_nan_counts = (
@@ -70,6 +70,12 @@ def missing_bar(config: Settings, df: DataFrame) -> str:
 
 
 def missing_matrix(config: Settings, df: DataFrame) -> str:
+    """Generate a missing values matrix visualization for Spark DataFrame.
+    
+    :param config: Report settings
+    :param df: Spark DataFrame
+    :return: HTML string of the matrix visualization
+    """
     df = MissingnoBarSparkPatch(df, columns=df.columns, original_df_size=df.count())
     return plot_missing_matrix(
         config,
@@ -80,11 +86,19 @@ def missing_matrix(config: Settings, df: DataFrame) -> str:
 
 
 def missing_heatmap(config: Settings, df: DataFrame) -> str:
+    """Generate a missing values heatmap for Spark DataFrame.
+    
+    :param config: Report settings
+    :param df: Spark DataFrame
+    :return: HTML string of the heatmap
+    """
     df = MissingnoBarSparkPatch(df, columns=df.columns, original_df_size=df.count())
 
+    # Remove completely filled or completely empty variables.
     columns = [i for i, n in enumerate(np.var(df.isnull(), axis="rows")) if n > 0]
     df = df.iloc[:, columns]
 
+    # Create and mask the correlation matrix. Construct the base heatmap.
     corr_mat = df.isnull().corr()
     mask = np.zeros_like(corr_mat)
     mask[np.triu_indices_from(mask)] = True
diff --git a/src/ydata_profiling/report/presentation/core/renderable.py b/src/ydata_profiling/report/presentation/core/renderable.py
index 84265c1c6..1040c9656 100644
--- a/src/ydata_profiling/report/presentation/core/renderable.py
+++ b/src/ydata_profiling/report/presentation/core/renderable.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional
 
 
 class Renderable(ABC):
@@ -38,5 +38,10 @@ def __str__(self) -> str:
         return self.__class__.__name__
 
     @classmethod
-    def convert_to_class(cls, obj: "Renderable", flavour_func) -> None:
+    def convert_to_class(cls, obj: "Renderable", flavour_func: Callable[["Renderable"], None]) -> None:
+        """Convert the object's class to this class and recursively apply flavour to nested items.
+        
+        :param obj: The renderable object to convert
+        :param flavour_func: Function to apply to nested renderable items
+        """
         obj.__class__ = cls
diff --git a/src/ydata_profiling/report/presentation/flavours/flavours.py b/src/ydata_profiling/report/presentation/flavours/flavours.py
index 5b7551d99..547a7a758 100644
--- a/src/ydata_profiling/report/presentation/flavours/flavours.py
+++ b/src/ydata_profiling/report/presentation/flavours/flavours.py
@@ -1,29 +1,46 @@
 """
     Flavours registry information
 """
-from typing import Callable
+from typing import Callable, Dict, Type
 
 from ydata_profiling.report.presentation.core import Root
 from ydata_profiling.report.presentation.core.renderable import Renderable
 
-_FLAVOUR_REGISTRY: dict = {}
+_FLAVOUR_REGISTRY: Dict[str, Dict[Type[Renderable], Type[Renderable]]] = {}
 
 
-def register_flavour(name: str, mapping: dict) -> None:
+def register_flavour(name: str, mapping: Dict[Type[Renderable], Type[Renderable]]) -> None:
+    """Register a flavour mapping.
+    
+    :param name: The flavour name
+    :param mapping: Dictionary mapping core renderable types to flavour-specific types
+    """
     _FLAVOUR_REGISTRY[name] = mapping
 
 
-def get_flavour_mapping(name: str) -> dict:
+def get_flavour_mapping(name: str) -> Dict[Type[Renderable], Type[Renderable]]:
+    """Get a registered flavour mapping.
+    
+    :param name: The flavour name
+    :return: The flavour mapping dictionary
+    :raises ValueError: If the flavour is not registered
+    """
     if name not in _FLAVOUR_REGISTRY:
         raise ValueError(f"Flavour '{name}' is not registered.")
     return _FLAVOUR_REGISTRY[name]
 
 
 def apply_renderable_mapping(
-    mapping: dict,
+    mapping: Dict[Type[Renderable], Type[Renderable]],
     structure: Renderable,
     flavour_func: Callable[[Renderable], None],
 ) -> None:
+    """Apply flavour mapping to a renderable structure.
+    
+    :param mapping: The flavour mapping dictionary
+    :param structure: The renderable structure to transform
+    :param flavour_func: The flavour application function for recursive calls
+    """
     mapping[type(structure)].convert_to_class(structure, flavour_func)
 
 
diff --git a/src/ydata_profiling/report/presentation/flavours/html/table.py b/src/ydata_profiling/report/presentation/flavours/html/table.py
index c5d71412b..59aa0eccf 100644
--- a/src/ydata_profiling/report/presentation/flavours/html/table.py
+++ b/src/ydata_profiling/report/presentation/flavours/html/table.py
@@ -1,4 +1,4 @@
-from ydata_profiling.report.presentation.core.table import Table
+from ydata_profiling.report.presentation.core import Table
 from ydata_profiling.report.presentation.flavours.html import templates
 
 
diff --git a/src/ydata_profiling/report/presentation/flavours/html/templates.py b/src/ydata_profiling/report/presentation/flavours/html/templates.py
index 85e24a46a..30fcecda7 100644
--- a/src/ydata_profiling/report/presentation/flavours/html/templates.py
+++ b/src/ydata_profiling/report/presentation/flavours/html/templates.py
@@ -1,6 +1,7 @@
 """Contains all templates used for generating the HTML profile report"""
 import shutil
 from pathlib import Path
+from typing import Any
 
 import jinja2
 
diff --git a/src/ydata_profiling/report/presentation/frequency_table_utils.py b/src/ydata_profiling/report/presentation/frequency_table_utils.py
index f194bc514..6517cf621 100644
--- a/src/ydata_profiling/report/presentation/frequency_table_utils.py
+++ b/src/ydata_profiling/report/presentation/frequency_table_utils.py
@@ -7,8 +7,6 @@
 def _frequency_table(
     freqtable: pd.Series, n: int, max_number_to_print: int
 ) -> List[Dict[str, Any]]:
-    # TODO: replace '' by '(Empty)' ?
-
     if max_number_to_print > n:
         max_number_to_print = n
 
@@ -26,7 +24,6 @@ def _frequency_table(
 
     max_freq = max(freqtable.values[0], freq_other, freq_missing)
 
-    # TODO: Correctly sort missing and other
     # No values
     if max_freq == 0:
         return []
@@ -77,7 +74,7 @@ def freq_table(
     freqtable: Union[pd.Series, List[pd.Series]],
     n: Union[int, List[int]],
     max_number_to_print: int,
-) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
+) -> List[List[Dict[str, Any]]]:
     """Render the rows for a frequency table (value, count).
 
     Args:
@@ -94,7 +91,7 @@ def freq_table(
             _frequency_table(v, n2, max_number_to_print) for v, n2 in zip(freqtable, n)
         ]
     else:
-        return [_frequency_table(freqtable, n, max_number_to_print)]  # type: ignore
+        return [_frequency_table(freqtable, n, max_number_to_print)]
 
 
 def _extreme_obs_table(
@@ -138,4 +135,4 @@ def extreme_obs_table(
             _extreme_obs_table(v, number_to_print, n1) for v, n1 in zip(freqtable, n)
         ]
 
-    return [_extreme_obs_table(freqtable, number_to_print, n)]  # type: ignore
+    return [_extreme_obs_table(freqtable, number_to_print, n)]