From c39e0b3407a4cf6dccc7c28314733b3191388ab7 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 15:59:52 +0800
Subject: [PATCH 1/5] feat: initial release

---
 Dockerfile                                    |  21 +++
 src/ydata_profiling/config.py                 | 142 ++++++++----------
 src/ydata_profiling/model/handler.py          |  20 +--
 src/ydata_profiling/model/summarizer.py       |  31 ++--
 src/ydata_profiling/profile_report.py         |   6 +-
 .../report/structure/__init__.py              |  22 +++
 .../report/structure/report.py                |   2 +-
 7 files changed, 130 insertions(+), 114 deletions(-)
 create mode 100644 Dockerfile

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 000000000..7bb15bf5d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,21 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . .
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
+    pip install --no-cache-dir . && \
+    pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
+    pip install --no-cache-dir jupyter
+
+EXPOSE 8888
+
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
+
+
diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py
index 09dbecdde..2bb934ed1 100644
--- a/src/ydata_profiling/config.py
+++ b/src/ydata_profiling/config.py
@@ -6,24 +6,7 @@
 import yaml
 from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr
 
-
-def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
-    """
-    Recursive merge dictionaries.
-
-    :param dict1: Base dictionary to merge.
-    :param dict2: Dictionary to merge on top of base dictionary.
-    :return: Merged dictionary
-    """
-    for key, val in dict1.items():
-        if isinstance(val, dict):
-            dict2_node = dict2.setdefault(key, {})
-            _merge_dictionaries(val, dict2_node)
-        else:
-            if key not in dict2:
-                dict2[key] = val
-
-    return dict2
+from ydata_profiling.utils.common import update
 
 
 class Dataset(BaseModel):
@@ -355,60 +338,7 @@ class Config:
     html: Html = Html()
     notebook: Notebook = Notebook()
 
-    def update(self, updates: dict) -> "Settings":
-        update = _merge_dictionaries(self.dict(), updates)
-        return self.parse_obj(self.copy(update=update))
-
-    @staticmethod
-    def from_file(config_file: Union[Path, str]) -> "Settings":
-        """Create a Settings object from a yaml file.
-
-        Args:
-            config_file: yaml file path
-        Returns:
-            Settings
-        """
-        with open(config_file) as f:
-            data = yaml.safe_load(f)
-
-        return Settings.parse_obj(data)
-
-
-class SparkSettings(Settings):
-    """
-    Setting class with the standard report configuration for Spark DataFrames
-    All the supported analysis are set to true
-    """
-
-    vars: Univariate = Univariate()
-
-    vars.num.low_categorical_threshold = 0
-
-    infer_dtypes: bool = False
-
-    correlations: Dict[str, Correlation] = {
-        "spearman": Correlation(key="spearman", calculate=True),
-        "pearson": Correlation(key="pearson", calculate=True),
-    }
-
-    correlation_table: bool = True
-
-    interactions: Interactions = Interactions()
-    interactions.continuous = False
-
-    missing_diagrams: Dict[str, bool] = {
-        "bar": False,
-        "matrix": False,
-        "dendrogram": False,
-        "heatmap": False,
-    }
-    samples: Samples = Samples()
-    samples.tail = 0
-    samples.random = 0
-
-
-class Config:
-    arg_groups: Dict[str, Any] = {
+    _arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -475,8 +405,8 @@ class Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        kwargs = Config.arg_groups[key]
-        shorthand_args, _ = Config.shorthands(kwargs, split=False)
+        kwargs = Settings._arg_groups[key]
+        shorthand_args, _ = Settings.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
@@ -485,8 +415,8 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in Config._shorthands:
-                shorthand_args[key] = Config._shorthands[key]
+            if value is None and key in Settings._shorthands:
+                shorthand_args[key] = Settings._shorthands[key]
                 if split:
                     del kwargs[key]
 
@@ -494,3 +424,63 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
+
+    def update(self, updates: dict) -> "Settings":
+        merged = update(self.dict().copy(), updates)
+        return self.parse_obj(self.copy(update=merged))
+
+    @staticmethod
+    def from_file(config_file: Union[Path, str]) -> "Settings":
+        """Create a Settings object from a yaml file.
+
+        Args:
+            config_file: yaml file path
+        Returns:
+            Settings
+        """
+        with open(config_file) as f:
+            data = yaml.safe_load(f)
+
+        return Settings.parse_obj(data)
+
+
+class SparkSettings(Settings):
+    """
+    Setting class with the standard report configuration for Spark DataFrames
+    All the supported analysis are set to true
+    """
+
+    vars: Univariate = Univariate()
+
+    vars.num.low_categorical_threshold = 0
+
+    infer_dtypes: bool = False
+
+    correlations: Dict[str, Correlation] = {
+        "spearman": Correlation(key="spearman", calculate=True),
+        "pearson": Correlation(key="pearson", calculate=True),
+    }
+
+    correlation_table: bool = True
+
+    interactions: Interactions = Interactions()
+    interactions.continuous = False
+
+    missing_diagrams: Dict[str, bool] = {
+        "bar": False,
+        "matrix": False,
+        "dendrogram": False,
+        "heatmap": False,
+    }
+    samples: Samples = Samples()
+    samples.tail = 0
+    samples.random = 0
+
+
+class Config(Settings):
+    """
+    Deprecated: Use Settings instead.
+    Backward compatibility alias for Settings class.
+    """
+
+    pass
diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 992c1840c..e983ce2a1 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,22 +60,6 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
-def get_render_map() -> Dict[str, Callable]:
-    import ydata_profiling.report.structure.variables as render_algorithms
+from ydata_profiling.report.structure import get_render_map
 
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map
+__all__ = ["compose", "Handler", "get_render_map"]
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..a57ed1c97 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,9 +50,8 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
-# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer for Pandas DataFrames."""
+    """A summarizer supporting both Pandas and Spark DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
@@ -65,7 +64,15 @@ def summary_map(self) -> Dict[str, List[Callable]]:
         return self._summary_map
 
     def _create_summary_map(self) -> Dict[str, List[Callable]]:
-        """Creates the summary map for Pandas summarization."""
+        """Creates the summary map based on the backend."""
+        common_map = {
+            "URL": [describe_url_1d],
+            "Path": [describe_path_1d],
+            "File": [describe_file_1d],
+            "Image": [describe_image_1d],
+            "TimeSeries": [describe_timeseries_1d],
+        }
+
         if self.use_spark:
             from ydata_profiling.model.spark import (
                 describe_boolean_1d_spark,
@@ -78,7 +85,7 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 describe_text_1d_spark,
             )
 
-            summary_map = {
+            base_map = {
                 "Unsupported": [
                     describe_counts_spark,
                     describe_generic_spark,
@@ -89,14 +96,9 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [describe_text_1d_spark],
                 "Categorical": [describe_categorical_1d_spark],
                 "Boolean": [describe_boolean_1d_spark],
-                "URL": [describe_url_1d],
-                "Path": [describe_path_1d],
-                "File": [describe_file_1d],
-                "Image": [describe_image_1d],
-                "TimeSeries": [describe_timeseries_1d],
             }
         else:
-            summary_map = {
+            base_map = {
                 "Unsupported": [
                     pandas_describe_counts,
                     pandas_describe_generic,
@@ -107,13 +109,10 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [pandas_describe_text_1d],
                 "Categorical": [pandas_describe_categorical_1d],
                 "Boolean": [pandas_describe_boolean_1d],
-                "URL": [pandas_describe_url_1d],
-                "Path": [pandas_describe_path_1d],
-                "File": [pandas_describe_file_1d],
-                "Image": [pandas_describe_image_1d],
-                "TimeSeries": [pandas_describe_timeseries_1d],
             }
-        return summary_map
+
+        base_map.update(common_map)
+        return base_map
 
 
 def format_summary(summary: Union[BaseDescription, dict]) -> dict:
diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py
index a7d6d9134..916b4681e 100644
--- a/src/ydata_profiling/profile_report.py
+++ b/src/ydata_profiling/profile_report.py
@@ -25,7 +25,7 @@
 from typeguard import typechecked
 from visions import VisionsTypeset
 
-from ydata_profiling.config import Config, Settings, SparkSettings
+from ydata_profiling.config import Settings, SparkSettings
 from ydata_profiling.expectations_report import ExpectationsReport
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
@@ -132,11 +132,11 @@ def __init__(
             cfg = Settings()
             for condition, key in groups:
                 if condition:
-                    cfg = cfg.update(Config.get_arg_groups(key))
+                    cfg = cfg.update(Settings.get_arg_groups(key))
             report_config = report_config.update(cfg.dict(exclude_defaults=True))
 
         if len(kwargs) > 0:
-            shorthands, kwargs = Config.shorthands(kwargs)
+            shorthands, kwargs = Settings.shorthands(kwargs)
             report_config = report_config.update(
                 Settings().update(shorthands).dict(exclude_defaults=True)
             )
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index 8324d248d..a2efd029a 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -1 +1,23 @@
 """Data structure for the report"""
+from typing import Callable, Dict
+
+
+def get_render_map() -> Dict[str, Callable]:
+    import ydata_profiling.report.structure.variables as render_algorithms
+
+    render_map = {
+        "Boolean": render_algorithms.render_boolean,
+        "Numeric": render_algorithms.render_real,
+        "Complex": render_algorithms.render_complex,
+        "Text": render_algorithms.render_text,
+        "DateTime": render_algorithms.render_date,
+        "Categorical": render_algorithms.render_categorical,
+        "URL": render_algorithms.render_url,
+        "Path": render_algorithms.render_path,
+        "File": render_algorithms.render_file,
+        "Image": render_algorithms.render_image,
+        "Unsupported": render_algorithms.render_generic,
+        "TimeSeries": render_algorithms.render_timeseries,
+    }
+
+    return render_map
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index 482b410b2..b64a41aae 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.model.handler import get_render_map
+from ydata_profiling.report.structure import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,

From 27a314be64b586f58de6a2956d456e2a3d03da1f Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 17:14:23 +0800
Subject: [PATCH 2/5] feat: initial release

---
 src/ydata_profiling/config.py           | 142 +++++++++++++-----------
 src/ydata_profiling/model/handler.py    |   2 -
 src/ydata_profiling/model/summarizer.py |  31 +++---
 src/ydata_profiling/profile_report.py   |   6 +-
 4 files changed, 95 insertions(+), 86 deletions(-)

diff --git a/src/ydata_profiling/config.py b/src/ydata_profiling/config.py
index 2bb934ed1..09dbecdde 100644
--- a/src/ydata_profiling/config.py
+++ b/src/ydata_profiling/config.py
@@ -6,7 +6,24 @@
 import yaml
 from pydantic.v1 import BaseModel, BaseSettings, Field, PrivateAttr
 
-from ydata_profiling.utils.common import update
+
+def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
+    """
+    Recursive merge dictionaries.
+
+    :param dict1: Base dictionary to merge.
+    :param dict2: Dictionary to merge on top of base dictionary.
+    :return: Merged dictionary
+    """
+    for key, val in dict1.items():
+        if isinstance(val, dict):
+            dict2_node = dict2.setdefault(key, {})
+            _merge_dictionaries(val, dict2_node)
+        else:
+            if key not in dict2:
+                dict2[key] = val
+
+    return dict2
 
 
 class Dataset(BaseModel):
@@ -338,7 +355,60 @@ class Config:
     html: Html = Html()
     notebook: Notebook = Notebook()
 
-    _arg_groups: Dict[str, Any] = {
+    def update(self, updates: dict) -> "Settings":
+        update = _merge_dictionaries(self.dict(), updates)
+        return self.parse_obj(self.copy(update=update))
+
+    @staticmethod
+    def from_file(config_file: Union[Path, str]) -> "Settings":
+        """Create a Settings object from a yaml file.
+
+        Args:
+            config_file: yaml file path
+        Returns:
+            Settings
+        """
+        with open(config_file) as f:
+            data = yaml.safe_load(f)
+
+        return Settings.parse_obj(data)
+
+
+class SparkSettings(Settings):
+    """
+    Setting class with the standard report configuration for Spark DataFrames
+    All the supported analysis are set to true
+    """
+
+    vars: Univariate = Univariate()
+
+    vars.num.low_categorical_threshold = 0
+
+    infer_dtypes: bool = False
+
+    correlations: Dict[str, Correlation] = {
+        "spearman": Correlation(key="spearman", calculate=True),
+        "pearson": Correlation(key="pearson", calculate=True),
+    }
+
+    correlation_table: bool = True
+
+    interactions: Interactions = Interactions()
+    interactions.continuous = False
+
+    missing_diagrams: Dict[str, bool] = {
+        "bar": False,
+        "matrix": False,
+        "dendrogram": False,
+        "heatmap": False,
+    }
+    samples: Samples = Samples()
+    samples.tail = 0
+    samples.random = 0
+
+
+class Config:
+    arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -405,8 +475,8 @@ class Config:
 
     @staticmethod
     def get_arg_groups(key: str) -> dict:
-        kwargs = Settings._arg_groups[key]
-        shorthand_args, _ = Settings.shorthands(kwargs, split=False)
+        kwargs = Config.arg_groups[key]
+        shorthand_args, _ = Config.shorthands(kwargs, split=False)
         return shorthand_args
 
     @staticmethod
@@ -415,8 +485,8 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
         if not split:
             shorthand_args = kwargs
         for key, value in list(kwargs.items()):
-            if value is None and key in Settings._shorthands:
-                shorthand_args[key] = Settings._shorthands[key]
+            if value is None and key in Config._shorthands:
+                shorthand_args[key] = Config._shorthands[key]
                 if split:
                     del kwargs[key]
 
@@ -424,63 +494,3 @@ def shorthands(kwargs: dict, split: bool = True) -> Tuple[dict, dict]:
             return shorthand_args, kwargs
         else:
             return shorthand_args, {}
-
-    def update(self, updates: dict) -> "Settings":
-        merged = update(self.dict().copy(), updates)
-        return self.parse_obj(self.copy(update=merged))
-
-    @staticmethod
-    def from_file(config_file: Union[Path, str]) -> "Settings":
-        """Create a Settings object from a yaml file.
-
-        Args:
-            config_file: yaml file path
-        Returns:
-            Settings
-        """
-        with open(config_file) as f:
-            data = yaml.safe_load(f)
-
-        return Settings.parse_obj(data)
-
-
-class SparkSettings(Settings):
-    """
-    Setting class with the standard report configuration for Spark DataFrames
-    All the supported analysis are set to true
-    """
-
-    vars: Univariate = Univariate()
-
-    vars.num.low_categorical_threshold = 0
-
-    infer_dtypes: bool = False
-
-    correlations: Dict[str, Correlation] = {
-        "spearman": Correlation(key="spearman", calculate=True),
-        "pearson": Correlation(key="pearson", calculate=True),
-    }
-
-    correlation_table: bool = True
-
-    interactions: Interactions = Interactions()
-    interactions.continuous = False
-
-    missing_diagrams: Dict[str, bool] = {
-        "bar": False,
-        "matrix": False,
-        "dendrogram": False,
-        "heatmap": False,
-    }
-    samples: Samples = Samples()
-    samples.tail = 0
-    samples.random = 0
-
-
-class Config(Settings):
-    """
-    Deprecated: Use Settings instead.
-    Backward compatibility alias for Settings class.
-    """
-
-    pass
diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index e983ce2a1..bcca12a1c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,6 +60,4 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
-from ydata_profiling.report.structure import get_render_map
 
-__all__ = ["compose", "Handler", "get_render_map"]
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index a57ed1c97..d733a7d36 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -50,8 +50,9 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
+# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer supporting both Pandas and Spark DataFrames."""
+    """A summarizer for Pandas DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
@@ -64,15 +65,7 @@ def summary_map(self) -> Dict[str, List[Callable]]:
         return self._summary_map
 
     def _create_summary_map(self) -> Dict[str, List[Callable]]:
-        """Creates the summary map based on the backend."""
-        common_map = {
-            "URL": [describe_url_1d],
-            "Path": [describe_path_1d],
-            "File": [describe_file_1d],
-            "Image": [describe_image_1d],
-            "TimeSeries": [describe_timeseries_1d],
-        }
-
+        """Creates the summary map for Pandas summarization."""
         if self.use_spark:
             from ydata_profiling.model.spark import (
                 describe_boolean_1d_spark,
@@ -85,7 +78,7 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 describe_text_1d_spark,
             )
 
-            base_map = {
+            summary_map = {
                 "Unsupported": [
                     describe_counts_spark,
                     describe_generic_spark,
@@ -96,9 +89,14 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [describe_text_1d_spark],
                 "Categorical": [describe_categorical_1d_spark],
                 "Boolean": [describe_boolean_1d_spark],
+                "URL": [describe_url_1d],
+                "Path": [describe_path_1d],
+                "File": [describe_file_1d],
+                "Image": [describe_image_1d],
+                "TimeSeries": [describe_timeseries_1d],
             }
         else:
-            base_map = {
+            summary_map = {
                 "Unsupported": [
                     pandas_describe_counts,
                     pandas_describe_generic,
@@ -109,10 +107,13 @@ def _create_summary_map(self) -> Dict[str, List[Callable]]:
                 "Text": [pandas_describe_text_1d],
                 "Categorical": [pandas_describe_categorical_1d],
                 "Boolean": [pandas_describe_boolean_1d],
+                "URL": [pandas_describe_url_1d],
+                "Path": [pandas_describe_path_1d],
+                "File": [pandas_describe_file_1d],
+                "Image": [pandas_describe_image_1d],
+                "TimeSeries": [pandas_describe_timeseries_1d],
             }
-
-        base_map.update(common_map)
-        return base_map
+        return summary_map
 
 
 def format_summary(summary: Union[BaseDescription, dict]) -> dict:
diff --git a/src/ydata_profiling/profile_report.py b/src/ydata_profiling/profile_report.py
index 916b4681e..a7d6d9134 100644
--- a/src/ydata_profiling/profile_report.py
+++ b/src/ydata_profiling/profile_report.py
@@ -25,7 +25,7 @@
 from typeguard import typechecked
 from visions import VisionsTypeset
 
-from ydata_profiling.config import Settings, SparkSettings
+from ydata_profiling.config import Config, Settings, SparkSettings
 from ydata_profiling.expectations_report import ExpectationsReport
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
@@ -132,11 +132,11 @@ def __init__(
             cfg = Settings()
             for condition, key in groups:
                 if condition:
-                    cfg = cfg.update(Settings.get_arg_groups(key))
+                    cfg = cfg.update(Config.get_arg_groups(key))
             report_config = report_config.update(cfg.dict(exclude_defaults=True))
 
         if len(kwargs) > 0:
-            shorthands, kwargs = Settings.shorthands(kwargs)
+            shorthands, kwargs = Config.shorthands(kwargs)
             report_config = report_config.update(
                 Settings().update(shorthands).dict(exclude_defaults=True)
             )

From 8d8f6b71b5f46178749d0b100ba9bc8cefbfb261 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 17:39:22 +0800
Subject: [PATCH 3/5] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 123 +++++++++---------
 .../report/structure/__init__.py              |   8 ++
 src/ydata_profiling/utils/backend.py          |   2 +-
 3 files changed, 69 insertions(+), 64 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index bcca12a1c..aa36a811c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,63 +1,60 @@
-"""
-    Auxiliary handler methods for data summary extraction
-"""
-from typing import Any, Callable, Dict, List, Sequence
-
-import networkx as nx
-from visions import VisionsTypeset
-
-
-def compose(functions: Sequence[Callable]) -> Callable:
-    """
-    Compose a sequence of functions.
-
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
-    """
-
-    def composed_function(*args) -> List[Any]:
-        result = args  # Start with the input arguments
-        for func in functions:
-            result = func(*result) if isinstance(result, tuple) else func(result)
-        return result  # type: ignore
-
-    return composed_function  # type: ignore
-
-
-class Handler:
-    """A generic handler
-
-    Allows any custom mapping between data types and functions
-    """
-
-    def __init__(
-        self,
-        mapping: Dict[str, List[Callable]],
-        typeset: VisionsTypeset,
-        *args,
-        **kwargs
-    ):
-        self.mapping = mapping
-        self.typeset = typeset
-        self._complete_dag()
-
-    def _complete_dag(self) -> None:
-        for from_type, to_type in nx.topological_sort(
-            nx.line_graph(self.typeset.base_graph)
-        ):
-            self.mapping[str(to_type)] = (
-                self.mapping[str(from_type)] + self.mapping[str(to_type)]
-            )
-
-    def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
-        """
-        funcs = self.mapping.get(dtype, [])
-        op = compose(funcs)
-        summary = op(*args)[-1]
-        return summary
-
-
-
+"""
+    Auxiliary handler methods for data summary extraction
+"""
+from typing import Any, Callable, Dict, List, Sequence
+
+import networkx as nx
+from visions import VisionsTypeset
+
+
+def compose(functions: Sequence[Callable]) -> Callable:
+    """
+    Compose a sequence of functions.
+
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
+    """
+
+    def composed_function(*args) -> List[Any]:
+        result = args  # Start with the input arguments
+        for func in functions:
+            result = func(*result) if isinstance(result, tuple) else func(result)
+        return result  # type: ignore
+
+    return composed_function  # type: ignore
+
+
+class Handler:
+    """A generic handler
+
+    Allows any custom mapping between data types and functions
+    """
+
+    def __init__(
+        self,
+        mapping: Dict[str, List[Callable]],
+        typeset: VisionsTypeset,
+        *args,
+        **kwargs
+    ):
+        self.mapping = mapping
+        self.typeset = typeset
+        self._complete_dag()
+
+    def _complete_dag(self) -> None:
+        for from_type, to_type in nx.topological_sort(
+            nx.line_graph(self.typeset.base_graph)
+        ):
+            self.mapping[str(to_type)] = (
+                self.mapping[str(from_type)] + self.mapping[str(to_type)]
+            )
+
+    def handle(self, dtype: str, *args, **kwargs) -> dict:
+        """
+        Returns:
+            object: a tuple containing the config, the dataset series and the summary extracted
+        """
+        funcs = self.mapping.get(dtype, [])
+        op = compose(funcs)
+        summary = op(*args)[-1]
+        return summary
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index a2efd029a..7ba9c10c9 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -3,6 +3,14 @@
 
 
 def get_render_map() -> Dict[str, Callable]:
+    """Get the mapping of variable types to their render functions.
+
+    This function was moved from model.handler to report.structure to eliminate
+    the reverse dependency from model layer to report layer.
+
+    Returns:
+        Dictionary mapping type names to render functions.
+    """
     import ydata_profiling.report.structure.variables as render_algorithms
 
     render_map = {
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index e99d91c11..dd12f9fd3 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,5 +1,5 @@
 """
-    File with a function to check the backend being used
+Backend detection utilities for pandas and spark.
 """
 import importlib
 

From 307cba98bfab9196a8de5355022f3919539e4520 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 18:09:37 +0800
Subject: [PATCH 4/5] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 123 +++++++++---------
 src/ydata_profiling/model/summarizer.py       |   5 +-
 .../report/structure/__init__.py              |  30 -----
 .../report/structure/report.py                |   2 +-
 .../report/structure/variables/__init__.py    |  23 ++++
 src/ydata_profiling/utils/backend.py          |   2 +-
 6 files changed, 90 insertions(+), 95 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index aa36a811c..bcca12a1c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -1,60 +1,63 @@
-"""
-    Auxiliary handler methods for data summary extraction
-"""
-from typing import Any, Callable, Dict, List, Sequence
-
-import networkx as nx
-from visions import VisionsTypeset
-
-
-def compose(functions: Sequence[Callable]) -> Callable:
-    """
-    Compose a sequence of functions.
-
-    :param functions: sequence of functions
-    :return: combined function applying all functions in order.
-    """
-
-    def composed_function(*args) -> List[Any]:
-        result = args  # Start with the input arguments
-        for func in functions:
-            result = func(*result) if isinstance(result, tuple) else func(result)
-        return result  # type: ignore
-
-    return composed_function  # type: ignore
-
-
-class Handler:
-    """A generic handler
-
-    Allows any custom mapping between data types and functions
-    """
-
-    def __init__(
-        self,
-        mapping: Dict[str, List[Callable]],
-        typeset: VisionsTypeset,
-        *args,
-        **kwargs
-    ):
-        self.mapping = mapping
-        self.typeset = typeset
-        self._complete_dag()
-
-    def _complete_dag(self) -> None:
-        for from_type, to_type in nx.topological_sort(
-            nx.line_graph(self.typeset.base_graph)
-        ):
-            self.mapping[str(to_type)] = (
-                self.mapping[str(from_type)] + self.mapping[str(to_type)]
-            )
-
-    def handle(self, dtype: str, *args, **kwargs) -> dict:
-        """
-        Returns:
-            object: a tuple containing the config, the dataset series and the summary extracted
-        """
-        funcs = self.mapping.get(dtype, [])
-        op = compose(funcs)
-        summary = op(*args)[-1]
-        return summary
+"""
+    Auxiliary handler methods for data summary extraction
+"""
+from typing import Any, Callable, Dict, List, Sequence
+
+import networkx as nx
+from visions import VisionsTypeset
+
+
+def compose(functions: Sequence[Callable]) -> Callable:
+    """
+    Compose a sequence of functions.
+
+    :param functions: sequence of functions
+    :return: combined function applying all functions in order.
+    """
+
+    def composed_function(*args) -> List[Any]:
+        result = args  # Start with the input arguments
+        for func in functions:
+            result = func(*result) if isinstance(result, tuple) else func(result)
+        return result  # type: ignore
+
+    return composed_function  # type: ignore
+
+
+class Handler:
+    """A generic handler
+
+    Allows any custom mapping between data types and functions
+    """
+
+    def __init__(
+        self,
+        mapping: Dict[str, List[Callable]],
+        typeset: VisionsTypeset,
+        *args,
+        **kwargs
+    ):
+        self.mapping = mapping
+        self.typeset = typeset
+        self._complete_dag()
+
+    def _complete_dag(self) -> None:
+        for from_type, to_type in nx.topological_sort(
+            nx.line_graph(self.typeset.base_graph)
+        ):
+            self.mapping[str(to_type)] = (
+                self.mapping[str(from_type)] + self.mapping[str(to_type)]
+            )
+
+    def handle(self, dtype: str, *args, **kwargs) -> dict:
+        """
+        Returns:
+            object: a tuple containing the config, the dataset series and the summary extracted
+        """
+        funcs = self.mapping.get(dtype, [])
+        op = compose(funcs)
+        summary = op(*args)[-1]
+        return summary
+
+
+
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..54d839915 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -27,7 +27,7 @@
 from ydata_profiling.model.pandas.describe_supported_pandas import (
     pandas_describe_supported,
 )
-from ydata_profiling.model.summary_algorithms import (  # Check what is this method used for
+from ydata_profiling.model.summary_algorithms import (
     describe_file_1d,
     describe_image_1d,
     describe_path_1d,
@@ -50,9 +50,8 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
-# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer for Pandas DataFrames."""
+    """A summarizer supporting both Pandas and Spark DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py
index 7ba9c10c9..8324d248d 100644
--- a/src/ydata_profiling/report/structure/__init__.py
+++ b/src/ydata_profiling/report/structure/__init__.py
@@ -1,31 +1 @@
 """Data structure for the report"""
-from typing import Callable, Dict
-
-
-def get_render_map() -> Dict[str, Callable]:
-    """Get the mapping of variable types to their render functions.
-
-    This function was moved from model.handler to report.structure to eliminate
-    the reverse dependency from model layer to report layer.
-
-    Returns:
-        Dictionary mapping type names to render functions.
-    """
-    import ydata_profiling.report.structure.variables as render_algorithms
-
-    render_map = {
-        "Boolean": render_algorithms.render_boolean,
-        "Numeric": render_algorithms.render_real,
-        "Complex": render_algorithms.render_complex,
-        "Text": render_algorithms.render_text,
-        "DateTime": render_algorithms.render_date,
-        "Categorical": render_algorithms.render_categorical,
-        "URL": render_algorithms.render_url,
-        "Path": render_algorithms.render_path,
-        "File": render_algorithms.render_file,
-        "Image": render_algorithms.render_image,
-        "Unsupported": render_algorithms.render_generic,
-        "TimeSeries": render_algorithms.render_timeseries,
-    }
-
-    return render_map
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index b64a41aae..0f027f23f 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.report.structure import get_render_map
+from ydata_profiling.report.structure.variables import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,
diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py
index 64f1d6d54..a8aa301b5 100644
--- a/src/ydata_profiling/report/structure/variables/__init__.py
+++ b/src/ydata_profiling/report/structure/variables/__init__.py
@@ -1,3 +1,5 @@
+from typing import Callable, Dict
+
 from ydata_profiling.report.structure.variables.render_boolean import render_boolean
 from ydata_profiling.report.structure.variables.render_categorical import (
     render_categorical,
@@ -17,6 +19,26 @@
 )
 from ydata_profiling.report.structure.variables.render_url import render_url
 
+
+def get_render_map() -> Dict[str, Callable]:
+    render_map = {
+        "Boolean": render_boolean,
+        "Numeric": render_real,
+        "Complex": render_complex,
+        "Text": render_text,
+        "DateTime": render_date,
+        "Categorical": render_categorical,
+        "URL": render_url,
+        "Path": render_path,
+        "File": render_file,
+        "Image": render_image,
+        "Unsupported": render_generic,
+        "TimeSeries": render_timeseries,
+    }
+
+    return render_map
+
+
 __all__ = [
     "render_boolean",
     "render_categorical",
@@ -32,4 +54,5 @@
     "render_text",
     "render_timeseries",
     "render_url",
+    "get_render_map",
 ]
diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py
index dd12f9fd3..e99d91c11 100644
--- a/src/ydata_profiling/utils/backend.py
+++ b/src/ydata_profiling/utils/backend.py
@@ -1,5 +1,5 @@
 """
-Backend detection utilities for pandas and spark.
+    File with a function to check the backend being used
 """
 import importlib
 

From 1e2fa10eaf7a951acea663fa270784244ac18404 Mon Sep 17 00:00:00 2001
From: Pkcha <pkcha@PkchadeMacBook-Air.local>
Date: Sun, 12 Apr 2026 19:27:37 +0800
Subject: [PATCH 5/5] feat: initial release

---
 src/ydata_profiling/model/handler.py          | 18 +++++++++++++++
 src/ydata_profiling/model/summarizer.py       |  5 ++--
 .../report/structure/report.py                |  2 +-
 .../report/structure/variables/__init__.py    | 23 -------------------
 4 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index bcca12a1c..992c1840c 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -60,4 +60,22 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
         return summary
 
 
+def get_render_map() -> Dict[str, Callable]:
+    import ydata_profiling.report.structure.variables as render_algorithms
 
+    render_map = {
+        "Boolean": render_algorithms.render_boolean,
+        "Numeric": render_algorithms.render_real,
+        "Complex": render_algorithms.render_complex,
+        "Text": render_algorithms.render_text,
+        "DateTime": render_algorithms.render_date,
+        "Categorical": render_algorithms.render_categorical,
+        "URL": render_algorithms.render_url,
+        "Path": render_algorithms.render_path,
+        "File": render_algorithms.render_file,
+        "Image": render_algorithms.render_image,
+        "Unsupported": render_algorithms.render_generic,
+        "TimeSeries": render_algorithms.render_timeseries,
+    }
+
+    return render_map
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index 54d839915..d733a7d36 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -27,7 +27,7 @@
 from ydata_profiling.model.pandas.describe_supported_pandas import (
     pandas_describe_supported,
 )
-from ydata_profiling.model.summary_algorithms import (
+from ydata_profiling.model.summary_algorithms import (  # Check what is this method used for
     describe_file_1d,
     describe_image_1d,
     describe_path_1d,
@@ -50,8 +50,9 @@ def summarize(
         return self.handle(str(dtype), config, series, {"type": str(dtype)})
 
 
+# Revisit this with the correct support for Spark as well.
 class ProfilingSummarizer(BaseSummarizer):
-    """A summarizer supporting both Pandas and Spark DataFrames."""
+    """A summarizer for Pandas DataFrames."""
 
     def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
         self.use_spark = use_spark and is_pyspark_installed()
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index 0f027f23f..482b410b2 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -7,7 +7,7 @@
 from ydata_profiling.config import Settings
 from ydata_profiling.model import BaseDescription
 from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.report.structure.variables import get_render_map
+from ydata_profiling.model.handler import get_render_map
 from ydata_profiling.report.presentation.core import (
     HTML,
     Collapse,
diff --git a/src/ydata_profiling/report/structure/variables/__init__.py b/src/ydata_profiling/report/structure/variables/__init__.py
index a8aa301b5..64f1d6d54 100644
--- a/src/ydata_profiling/report/structure/variables/__init__.py
+++ b/src/ydata_profiling/report/structure/variables/__init__.py
@@ -1,5 +1,3 @@
-from typing import Callable, Dict
-
 from ydata_profiling.report.structure.variables.render_boolean import render_boolean
 from ydata_profiling.report.structure.variables.render_categorical import (
     render_categorical,
@@ -19,26 +17,6 @@
 )
 from ydata_profiling.report.structure.variables.render_url import render_url
 
-
-def get_render_map() -> Dict[str, Callable]:
-    render_map = {
-        "Boolean": render_boolean,
-        "Numeric": render_real,
-        "Complex": render_complex,
-        "Text": render_text,
-        "DateTime": render_date,
-        "Categorical": render_categorical,
-        "URL": render_url,
-        "Path": render_path,
-        "File": render_file,
-        "Image": render_image,
-        "Unsupported": render_generic,
-        "TimeSeries": render_timeseries,
-    }
-
-    return render_map
-
-
 __all__ = [
     "render_boolean",
     "render_categorical",
@@ -54,5 +32,4 @@ def get_render_map() -> Dict[str, Callable]:
     "render_text",
     "render_timeseries",
     "render_url",
-    "get_render_map",
 ]