diff --git a/src/ydata_profiling/model/alerts.py b/src/ydata_profiling/model/alerts.py
index 1b16d27a0..6c0bfa193 100644
--- a/src/ydata_profiling/model/alerts.py
+++ b/src/ydata_profiling/model/alerts.py
@@ -9,27 +9,10 @@
from ydata_profiling.config import Settings
from ydata_profiling.model.correlations import perform_check_correlation
+from ydata_profiling.utils.formatters import fmt_percent
from ydata_profiling.utils.styles import get_alert_styles
-def fmt_percent(value: float, edge_cases: bool = True) -> str:
- """Format a ratio as a percentage.
-
- Args:
- edge_cases: Check for edge cases?
- value: The ratio.
-
- Returns:
- The percentage with 1 point precision.
- """
- if edge_cases and round(value, 3) == 0 and value > 0:
- return "< 0.1%"
- if edge_cases and round(value, 3) == 1 and value < 1:
- return "> 99.9%"
-
- return f"{value*100:2.1f}%"
-
-
@unique
class AlertType(Enum):
"""Alert types"""
diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
index 992c1840c..d4e07418d 100644
--- a/src/ydata_profiling/model/handler.py
+++ b/src/ydata_profiling/model/handler.py
@@ -58,24 +58,3 @@ def handle(self, dtype: str, *args, **kwargs) -> dict:
op = compose(funcs)
summary = op(*args)[-1]
return summary
-
-
-def get_render_map() -> Dict[str, Callable]:
- import ydata_profiling.report.structure.variables as render_algorithms
-
- render_map = {
- "Boolean": render_algorithms.render_boolean,
- "Numeric": render_algorithms.render_real,
- "Complex": render_algorithms.render_complex,
- "Text": render_algorithms.render_text,
- "DateTime": render_algorithms.render_date,
- "Categorical": render_algorithms.render_categorical,
- "URL": render_algorithms.render_url,
- "Path": render_algorithms.render_path,
- "File": render_algorithms.render_file,
- "Image": render_algorithms.render_image,
- "Unsupported": render_algorithms.render_generic,
- "TimeSeries": render_algorithms.render_timeseries,
- }
-
- return render_map
diff --git a/src/ydata_profiling/model/summarizer.py b/src/ydata_profiling/model/summarizer.py
index d733a7d36..67bd3e7bf 100644
--- a/src/ydata_profiling/model/summarizer.py
+++ b/src/ydata_profiling/model/summarizer.py
@@ -1,207 +1,237 @@
-# mypy: ignore-errors
-
-from dataclasses import asdict
-from typing import Any, Callable, Dict, List, Type, Union
-
-import numpy as np
-import pandas as pd
-from visions import VisionsBaseType, VisionsTypeset
-
-from ydata_profiling.config import Settings
-from ydata_profiling.model import BaseDescription
-from ydata_profiling.model.handler import Handler
-from ydata_profiling.model.pandas import (
- pandas_describe_boolean_1d,
- pandas_describe_categorical_1d,
- pandas_describe_counts,
- pandas_describe_date_1d,
- pandas_describe_file_1d,
- pandas_describe_generic,
- pandas_describe_image_1d,
- pandas_describe_numeric_1d,
- pandas_describe_path_1d,
- pandas_describe_text_1d,
- pandas_describe_timeseries_1d,
- pandas_describe_url_1d,
-)
-from ydata_profiling.model.pandas.describe_supported_pandas import (
- pandas_describe_supported,
-)
-from ydata_profiling.model.summary_algorithms import ( # Check what is this method used for
- describe_file_1d,
- describe_image_1d,
- describe_path_1d,
- describe_timeseries_1d,
- describe_url_1d,
-)
-from ydata_profiling.utils.backend import is_pyspark_installed
-
-
-class BaseSummarizer(Handler):
- """A base summarizer
-
- Can be used to define custom summarizations
- """
-
- def summarize(
- self, config: Settings, series: pd.Series, dtype: Type[VisionsBaseType]
- ) -> dict:
- """Generates the summary for a given series"""
- return self.handle(str(dtype), config, series, {"type": str(dtype)})
-
-
-# Revisit this with the correct support for Spark as well.
-class ProfilingSummarizer(BaseSummarizer):
- """A summarizer for Pandas DataFrames."""
-
- def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
- self.use_spark = use_spark and is_pyspark_installed()
- self._summary_map = self._create_summary_map()
- super().__init__(self._summary_map, typeset)
-
- @property
- def summary_map(self) -> Dict[str, List[Callable]]:
- """Allows users to modify the summary map after initialization."""
- return self._summary_map
-
- def _create_summary_map(self) -> Dict[str, List[Callable]]:
- """Creates the summary map for Pandas summarization."""
- if self.use_spark:
- from ydata_profiling.model.spark import (
- describe_boolean_1d_spark,
- describe_categorical_1d_spark,
- describe_counts_spark,
- describe_date_1d_spark,
- describe_generic_spark,
- describe_numeric_1d_spark,
- describe_supported_spark,
- describe_text_1d_spark,
- )
-
- summary_map = {
- "Unsupported": [
- describe_counts_spark,
- describe_generic_spark,
- describe_supported_spark,
- ],
- "Numeric": [describe_numeric_1d_spark],
- "DateTime": [describe_date_1d_spark],
- "Text": [describe_text_1d_spark],
- "Categorical": [describe_categorical_1d_spark],
- "Boolean": [describe_boolean_1d_spark],
- "URL": [describe_url_1d],
- "Path": [describe_path_1d],
- "File": [describe_file_1d],
- "Image": [describe_image_1d],
- "TimeSeries": [describe_timeseries_1d],
- }
- else:
- summary_map = {
- "Unsupported": [
- pandas_describe_counts,
- pandas_describe_generic,
- pandas_describe_supported,
- ],
- "Numeric": [pandas_describe_numeric_1d],
- "DateTime": [pandas_describe_date_1d],
- "Text": [pandas_describe_text_1d],
- "Categorical": [pandas_describe_categorical_1d],
- "Boolean": [pandas_describe_boolean_1d],
- "URL": [pandas_describe_url_1d],
- "Path": [pandas_describe_path_1d],
- "File": [pandas_describe_file_1d],
- "Image": [pandas_describe_image_1d],
- "TimeSeries": [pandas_describe_timeseries_1d],
- }
- return summary_map
-
-
-def format_summary(summary: Union[BaseDescription, dict]) -> dict:
- """Prepare summary for export to json file.
-
- Args:
- summary (Union[BaseDescription, dict]): summary to export
-
- Returns:
- dict: summary as dict
- """
-
- def fmt(v: Any) -> Any:
- if isinstance(v, dict):
- return {k: fmt(va) for k, va in v.items()}
- else:
- if isinstance(v, pd.Series):
- return fmt(v.to_dict())
- elif (
- isinstance(v, tuple)
- and len(v) == 2
- and all(isinstance(x, np.ndarray) for x in v)
- ):
- return {"counts": v[0].tolist(), "bin_edges": v[1].tolist()}
- else:
- return v
-
- if isinstance(summary, BaseDescription):
- summary = asdict(summary)
-
- summary = {k: fmt(v) for k, v in summary.items()}
- return summary
-
-
-def _redact_column(column: Dict[str, Any]) -> Dict[str, Any]:
- def redact_key(data: Dict[str, Any]) -> Dict[str, Any]:
- return {f"REDACTED_{i}": v for i, (_, v) in enumerate(data.items())}
-
- def redact_value(data: Dict[str, Any]) -> Dict[str, Any]:
- return {k: f"REDACTED_{i}" for i, (k, _) in enumerate(data.items())}
-
- keys_to_redact = [
- "block_alias_char_counts",
- "block_alias_values",
- "category_alias_char_counts",
- "category_alias_values",
- "character_counts",
- "script_char_counts",
- "value_counts_index_sorted",
- "value_counts_without_nan",
- "word_counts",
- ]
-
- values_to_redact = ["first_rows"]
-
- for field in keys_to_redact:
- if field not in column:
- continue
- is_dict = (isinstance(v, dict) for v in column[field].values())
- if any(is_dict):
- column[field] = {k: redact_key(v) for k, v in column[field].items()}
- else:
- column[field] = redact_key(column[field])
-
- for field in values_to_redact:
- if field not in column:
- continue
- is_dict = (isinstance(v, dict) for v in column[field].values())
- if any(is_dict):
- column[field] = {k: redact_value(v) for k, v in column[field].items()}
- else:
- column[field] = redact_value(column[field])
-
- return column
-
-
-def redact_summary(summary: dict, config: Settings) -> dict:
- """Redact summary to export to json file.
-
- Args:
- summary (dict): summary to redact
-
- Returns:
- dict: redacted summary
- """
- for _, col in summary["variables"].items():
- if (config.vars.cat.redact and col["type"] == "Categorical") or (
- config.vars.text.redact and col["type"] == "Text"
- ):
- col = _redact_column(col)
- return summary
+# mypy: ignore-errors
+
+from dataclasses import asdict
+from typing import Any, Callable, Dict, List, Type, Union
+
+import numpy as np
+import pandas as pd
+from visions import VisionsBaseType, VisionsTypeset
+
+from ydata_profiling.config import Settings
+from ydata_profiling.model import BaseDescription
+from ydata_profiling.model.handler import Handler
+from ydata_profiling.utils.backend import is_pyspark_installed
+
+
+class BaseSummarizer(Handler):
+ """Base class for data summarization.
+
+ Provides a flexible framework to define custom summarization strategies
+ for different data types and dataframe backends.
+ """
+
+ def summarize(
+ self, config: Settings, series: pd.Series, dtype: Type[VisionsBaseType]
+ ) -> dict:
+ """Generates the summary statistics for a given series.
+
+ Args:
+ config: Report configuration settings
+ series: Data series to summarize
+ dtype: Detected data type from visions typeset
+
+ Returns:
+ Dictionary containing summary statistics
+ """
+ return self.handle(str(dtype), config, series, {"type": str(dtype)})
+
+
+def _create_pandas_summary_map() -> Dict[str, List[Callable]]:
+ """Create summary function mapping for Pandas backend."""
+ from ydata_profiling.model.pandas import (
+ pandas_describe_boolean_1d,
+ pandas_describe_categorical_1d,
+ pandas_describe_counts,
+ pandas_describe_date_1d,
+ pandas_describe_file_1d,
+ pandas_describe_generic,
+ pandas_describe_image_1d,
+ pandas_describe_numeric_1d,
+ pandas_describe_path_1d,
+ pandas_describe_text_1d,
+ pandas_describe_timeseries_1d,
+ pandas_describe_url_1d,
+ )
+ from ydata_profiling.model.pandas.describe_supported_pandas import (
+ pandas_describe_supported,
+ )
+
+ return {
+ "Unsupported": [
+ pandas_describe_counts,
+ pandas_describe_generic,
+ pandas_describe_supported,
+ ],
+ "Numeric": [pandas_describe_numeric_1d],
+ "DateTime": [pandas_describe_date_1d],
+ "Text": [pandas_describe_text_1d],
+ "Categorical": [pandas_describe_categorical_1d],
+ "Boolean": [pandas_describe_boolean_1d],
+ "URL": [pandas_describe_url_1d],
+ "Path": [pandas_describe_path_1d],
+ "File": [pandas_describe_file_1d],
+ "Image": [pandas_describe_image_1d],
+ "TimeSeries": [pandas_describe_timeseries_1d],
+ }
+
+
+def _create_spark_summary_map() -> Dict[str, List[Callable]]:
+ """Create summary function mapping for Spark backend."""
+ from ydata_profiling.model.spark import (
+ describe_boolean_1d_spark,
+ describe_categorical_1d_spark,
+ describe_counts_spark,
+ describe_date_1d_spark,
+ describe_generic_spark,
+ describe_numeric_1d_spark,
+ describe_supported_spark,
+ describe_text_1d_spark,
+ )
+ from ydata_profiling.model.summary_algorithms import (
+ describe_file_1d,
+ describe_image_1d,
+ describe_path_1d,
+ describe_timeseries_1d,
+ describe_url_1d,
+ )
+
+ return {
+ "Unsupported": [
+ describe_counts_spark,
+ describe_generic_spark,
+ describe_supported_spark,
+ ],
+ "Numeric": [describe_numeric_1d_spark],
+ "DateTime": [describe_date_1d_spark],
+ "Text": [describe_text_1d_spark],
+ "Categorical": [describe_categorical_1d_spark],
+ "Boolean": [describe_boolean_1d_spark],
+ "URL": [describe_url_1d],
+ "Path": [describe_path_1d],
+ "File": [describe_file_1d],
+ "Image": [describe_image_1d],
+ "TimeSeries": [describe_timeseries_1d],
+ }
+
+
+def _create_summary_map_factory(use_spark: bool) -> Dict[str, List[Callable]]:
+ """Factory function to create appropriate summary map based on backend.
+
+ Args:
+ use_spark: If True, create Spark-compatible summary map
+
+ Returns:
+ Mapping from data types to summary functions
+ """
+ if use_spark:
+ return _create_spark_summary_map()
+ return _create_pandas_summary_map()
+
+
+class ProfilingSummarizer(BaseSummarizer):
+ """Standard summarizer for data profiling.
+
+ Supports both Pandas and Spark backends, providing comprehensive
+ statistical summaries for all standard data types.
+ """
+
+ def __init__(self, typeset: VisionsTypeset, use_spark: bool = False):
+ self.use_spark = use_spark and is_pyspark_installed()
+ self._summary_map = _create_summary_map_factory(self.use_spark)
+ super().__init__(self._summary_map, typeset)
+
+ @property
+ def summary_map(self) -> Dict[str, List[Callable]]:
+ """Allows users to modify the summary map after initialization."""
+ return self._summary_map
+
+
+def format_summary(summary: Union[BaseDescription, dict]) -> dict:
+ """Prepare summary for export to json file.
+
+ Args:
+ summary (Union[BaseDescription, dict]): summary to export
+
+ Returns:
+ dict: summary as dict
+ """
+
+ def fmt(v: Any) -> Any:
+ if isinstance(v, dict):
+ return {k: fmt(va) for k, va in v.items()}
+ else:
+ if isinstance(v, pd.Series):
+ return fmt(v.to_dict())
+ elif (
+ isinstance(v, tuple)
+ and len(v) == 2
+ and all(isinstance(x, np.ndarray) for x in v)
+ ):
+ return {"counts": v[0].tolist(), "bin_edges": v[1].tolist()}
+ else:
+ return v
+
+ if isinstance(summary, BaseDescription):
+ summary = asdict(summary)
+
+ summary = {k: fmt(v) for k, v in summary.items()}
+ return summary
+
+
+def _redact_column(column: Dict[str, Any]) -> Dict[str, Any]:
+ def redact_key(data: Dict[str, Any]) -> Dict[str, Any]:
+ return {f"REDACTED_{i}": v for i, (_, v) in enumerate(data.items())}
+
+ def redact_value(data: Dict[str, Any]) -> Dict[str, Any]:
+ return {k: f"REDACTED_{i}" for i, (k, _) in enumerate(data.items())}
+
+ keys_to_redact = [
+ "block_alias_char_counts",
+ "block_alias_values",
+ "category_alias_char_counts",
+ "category_alias_values",
+ "character_counts",
+ "script_char_counts",
+ "value_counts_index_sorted",
+ "value_counts_without_nan",
+ "word_counts",
+ ]
+
+ values_to_redact = ["first_rows"]
+
+ for field in keys_to_redact:
+ if field not in column:
+ continue
+ is_dict = (isinstance(v, dict) for v in column[field].values())
+ if any(is_dict):
+ column[field] = {k: redact_key(v) for k, v in column[field].items()}
+ else:
+ column[field] = redact_key(column[field])
+
+ for field in values_to_redact:
+ if field not in column:
+ continue
+ is_dict = (isinstance(v, dict) for v in column[field].values())
+ if any(is_dict):
+ column[field] = {k: redact_value(v) for k, v in column[field].items()}
+ else:
+ column[field] = redact_value(column[field])
+
+ return column
+
+
+def redact_summary(summary: dict, config: Settings) -> dict:
+ """Redact summary to export to json file.
+
+ Args:
+ summary (dict): summary to redact
+
+ Returns:
+ dict: redacted summary
+ """
+ for _, col in summary["variables"].items():
+ if (config.vars.cat.redact and col["type"] == "Categorical") or (
+ config.vars.text.redact and col["type"] == "Text"
+ ):
+ col = _redact_column(col)
+ return summary
diff --git a/src/ydata_profiling/model/summary_algorithms.py b/src/ydata_profiling/model/summary_algorithms.py
index 9c3e5ef38..09a1fa374 100644
--- a/src/ydata_profiling/model/summary_algorithms.py
+++ b/src/ydata_profiling/model/summary_algorithms.py
@@ -74,13 +74,11 @@ def histogram_compute(
hist_config = config.plot.histogram
- # Compute data range
finite = finite_values[np.isfinite(finite_values)]
vmin = float(np.min(finite))
vmax = float(np.max(finite))
data_range = vmax - vmin
- # Choose of Bins based on observed data values
if data_range == 0:
eps = 0.5 if vmin == 0 else abs(vmin) * 0.1
bins = np.array([vmin - eps, vmin + eps])
@@ -113,16 +111,13 @@ def chi_square(
values: Optional[np.ndarray] = None,
histogram: Optional[np.ndarray] = None,
) -> dict:
- # Case 1: histogram not passed → we compute it
if histogram is None:
if values is None:
return {"statistic": 0, "pvalue": 0}
- # Try NumPy "auto" binning (may fail under NumPy 2)
try:
bins = np.histogram_bin_edges(values, bins="auto")
except ValueError:
- # Fallback: basic 1-bin histogram covering the min→max range
finite = values[np.isfinite(values)]
if finite.size == 0:
return {"statistic": 0, "pvalue": 0}
@@ -136,7 +131,6 @@ def chi_square(
histogram, _ = np.histogram(values, bins=bins)
- # Case 2: histogram exists but is empty
if histogram.size == 0 or histogram.sum() == 0:
return {"statistic": 0, "pvalue": 0}
diff --git a/src/ydata_profiling/report/formatters.py b/src/ydata_profiling/report/formatters.py
index 199ea854d..06fab6ac3 100644
--- a/src/ydata_profiling/report/formatters.py
+++ b/src/ydata_profiling/report/formatters.py
@@ -9,24 +9,13 @@
import pandas as pd
from markupsafe import escape
-
-def list_args(func: Callable) -> Callable:
- """Extend the function to allow taking a list as the first argument, and apply the function on each of the elements.
-
- Args:
- func: the function to extend
-
- Returns:
- The extended function
- """
-
- def inner(arg: Any, *args: Any, **kwargs: Any) -> Any:
- if isinstance(arg, list):
- return [func(v, *args, **kwargs) for v in arg]
-
- return func(arg, *args, **kwargs)
-
- return inner
+from ydata_profiling.utils.formatters import (
+ fmt_array,
+ fmt_number,
+ fmt_numeric,
+ fmt_percent,
+ list_args,
+)
@list_args
@@ -75,25 +64,6 @@ def fmt_bytesize(num: float, suffix: str = "B") -> str:
return f"{num:.1f} Yi{suffix}"
-@list_args
-def fmt_percent(value: float, edge_cases: bool = True) -> str:
- """Format a ratio as a percentage.
-
- Args:
- edge_cases: Check for edge cases?
- value: The ratio.
-
- Returns:
- The percentage with 1 point precision.
- """
- if edge_cases and round(value, 3) == 0 and value > 0:
- return "< 0.1%"
- if edge_cases and round(value, 3) == 1 and value < 1:
- return "> 99.9%"
-
- return f"{value*100:2.1f}%"
-
-
@list_args
def fmt_timespan(num_seconds: Any, detailed: bool = False, max_units: int = 3) -> str:
# From the `humanfriendly` module (without additional dependency)
@@ -234,61 +204,6 @@ def fmt_timespan_timedelta(
return fmt_numeric(delta, precision)
-@list_args
-def fmt_numeric(value: float, precision: int = 10) -> str:
- """Format any numeric value.
-
- Args:
- value: The numeric value to format.
- precision: The numeric precision
-
- Returns:
- The numeric value with the given precision.
- """
- if value is None:
- fmtted = "N/A"
- else:
- fmtted = f"{{:.{precision}g}}".format(value)
- for v in ["e+", "e-"]:
- if v in fmtted:
- sign = "-" if v in "e-" else ""
- fmtted = fmtted.replace(v, " × 10") + ""
- fmtted = fmtted.replace("0", "")
- fmtted = fmtted.replace("", f"{sign}")
-
- return fmtted
-
-
-@list_args
-def fmt_number(value: int) -> str:
- """Format any numeric value.
-
- Args:
- value: The numeric value to format.
-
- Returns:
- The numeric value with the given precision.
- """
- return f"{value:n}"
-
-
-@list_args
-def fmt_array(value: np.ndarray, threshold: Any = np.nan) -> str:
- """Format numpy arrays.
-
- Args:
- value: Array to format.
- threshold: Threshold at which to show ellipsis
-
- Returns:
- The string representation of the numpy array.
- """
- with np.printoptions(threshold=3, edgeitems=threshold):
- return_value = str(value)
-
- return return_value
-
-
@list_args
def fmt(value: Any) -> str:
"""Format any value.
diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py
index 482b410b2..64bec9fd8 100644
--- a/src/ydata_profiling/report/structure/report.py
+++ b/src/ydata_profiling/report/structure/report.py
@@ -1,5 +1,5 @@
import os
-from typing import List, Sequence
+from typing import Callable, Dict, List, Sequence
import pandas as pd
from tqdm.auto import tqdm
@@ -7,7 +7,6 @@
from ydata_profiling.config import Settings
from ydata_profiling.model import BaseDescription
from ydata_profiling.model.alerts import AlertType
-from ydata_profiling.model.handler import get_render_map
from ydata_profiling.report.presentation.core import (
HTML,
Collapse,
@@ -24,6 +23,30 @@
from ydata_profiling.utils.dataframe import slugify
+def get_render_map() -> Dict[str, Callable]:
+ """Create mapping from data types to rendering functions.
+
+ Returns:
+ Dictionary mapping data type names to their respective render functions
+ """
+ import ydata_profiling.report.structure.variables as render_algorithms
+
+ return {
+ "Boolean": render_algorithms.render_boolean,
+ "Numeric": render_algorithms.render_real,
+ "Complex": render_algorithms.render_complex,
+ "Text": render_algorithms.render_text,
+ "DateTime": render_algorithms.render_date,
+ "Categorical": render_algorithms.render_categorical,
+ "URL": render_algorithms.render_url,
+ "Path": render_algorithms.render_path,
+ "File": render_algorithms.render_file,
+ "Image": render_algorithms.render_image,
+ "Unsupported": render_algorithms.render_generic,
+ "TimeSeries": render_algorithms.render_timeseries,
+ }
+
+
def get_missing_items(config: Settings, summary: BaseDescription) -> list:
"""Return the missing diagrams
diff --git a/src/ydata_profiling/utils/formatters.py b/src/ydata_profiling/utils/formatters.py
new file mode 100644
index 000000000..1349701f4
--- /dev/null
+++ b/src/ydata_profiling/utils/formatters.py
@@ -0,0 +1,98 @@
+"""Basic formatting utility functions."""
+from typing import Any, Callable
+
+import numpy as np
+import pandas as pd
+
+
+def list_args(func: Callable) -> Callable:
+ """Extend the function to allow taking a list as the first argument, and apply the function on each of the elements.
+
+ Args:
+ func: the function to extend
+
+ Returns:
+ The extended function
+ """
+
+ def inner(arg: Any, *args: Any, **kwargs: Any) -> Any:
+ if isinstance(arg, list):
+ return [func(v, *args, **kwargs) for v in arg]
+
+ return func(arg, *args, **kwargs)
+
+ return inner
+
+
+@list_args
+def fmt_percent(value: float, edge_cases: bool = True) -> str:
+ """Format a ratio as a percentage.
+
+ Args:
+ edge_cases: Check for edge cases?
+ value: The ratio.
+
+ Returns:
+ The percentage with 1 point precision.
+ """
+ if edge_cases and round(value, 3) == 0 and value > 0:
+ return "< 0.1%"
+ if edge_cases and round(value, 3) == 1 and value < 1:
+ return "> 99.9%"
+
+ return f"{value*100:2.1f}%"
+
+
+@list_args
+def fmt_numeric(value: float, precision: int = 10) -> str:
+ """Format any numeric value.
+
+ Args:
+ value: The numeric value to format.
+ precision: The numeric precision
+
+ Returns:
+ The numeric value with the given precision.
+ """
+ if value is None:
+ fmtted = "N/A"
+ else:
+ fmtted = f"{{:.{precision}g}}".format(value)
+ for v in ["e+", "e-"]:
+ if v in fmtted:
+ sign = "-" if v in "e-" else ""
+ fmtted = fmtted.replace(v, " × 10") + ""
+ fmtted = fmtted.replace("0", "")
+ fmtted = fmtted.replace("", f"{sign}")
+
+ return fmtted
+
+
+@list_args
+def fmt_number(value: int) -> str:
+ """Format any numeric value.
+
+ Args:
+ value: The numeric value to format.
+
+ Returns:
+ The numeric value with the given precision.
+ """
+ return f"{value:n}"
+
+
+@list_args
+def fmt_array(value: np.ndarray, threshold: Any = np.nan) -> str:
+ """Format numpy arrays.
+
+ Args:
+ value: Array to format.
+ threshold: Threshold at which to show ellipsis
+
+ Returns:
+ The string representation of the numpy array.
+ """
+ with np.printoptions(threshold=3, edgeitems=threshold):
+ return_value = str(value)
+
+ return return_value
diff --git a/venv/Dockerfile b/venv/Dockerfile
new file mode 100644
index 000000000..9e0a68801
--- /dev/null
+++ b/venv/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY . .
+
+RUN pip install --no-cache-dir --upgrade pip && \
+ pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
+ pip install --no-cache-dir . && \
+ pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
+ pip install --no-cache-dir jupyter
+
+EXPOSE 8888
+
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]