diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..7bb15bf5d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.10-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +COPY . . + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \ + pip install --no-cache-dir . && \ + pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \ + pip install --no-cache-dir jupyter + +EXPOSE 8888 + +CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"] + + diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py index 992c1840c..aa36a811c 100644 --- a/src/ydata_profiling/model/handler.py +++ b/src/ydata_profiling/model/handler.py @@ -1,81 +1,60 @@ -""" - Auxiliary handler methods for data summary extraction -""" -from typing import Any, Callable, Dict, List, Sequence - -import networkx as nx -from visions import VisionsTypeset - - -def compose(functions: Sequence[Callable]) -> Callable: - """ - Compose a sequence of functions. - - :param functions: sequence of functions - :return: combined function applying all functions in order. - """ - - def composed_function(*args) -> List[Any]: - result = args # Start with the input arguments - for func in functions: - result = func(*result) if isinstance(result, tuple) else func(result) - return result # type: ignore - - return composed_function # type: ignore - - -class Handler: - """A generic handler - - Allows any custom mapping between data types and functions - """ - - def __init__( - self, - mapping: Dict[str, List[Callable]], - typeset: VisionsTypeset, - *args, - **kwargs - ): - self.mapping = mapping - self.typeset = typeset - self._complete_dag() - - def _complete_dag(self) -> None: - for from_type, to_type in nx.topological_sort( - nx.line_graph(self.typeset.base_graph) - ): - self.mapping[str(to_type)] = ( - self.mapping[str(from_type)] + self.mapping[str(to_type)] - ) - - def handle(self, dtype: str, *args, **kwargs) -> dict: - """ - Returns: - object: a tuple containing the config, the dataset series and the summary extracted - """ - funcs = self.mapping.get(dtype, []) - op = compose(funcs) - summary = op(*args)[-1] - return summary - - -def get_render_map() -> Dict[str, Callable]: - import ydata_profiling.report.structure.variables as render_algorithms - - render_map = { - "Boolean": render_algorithms.render_boolean, - "Numeric": render_algorithms.render_real, - "Complex": render_algorithms.render_complex, - "Text": render_algorithms.render_text, - "DateTime": render_algorithms.render_date, - "Categorical": render_algorithms.render_categorical, - "URL": render_algorithms.render_url, - "Path": render_algorithms.render_path, - "File": render_algorithms.render_file, - "Image": render_algorithms.render_image, - "Unsupported": render_algorithms.render_generic, - "TimeSeries": render_algorithms.render_timeseries, - } - - return render_map +""" + Auxiliary handler methods for data summary extraction +""" +from typing import Any, Callable, Dict, List, Sequence + +import networkx as nx +from visions import VisionsTypeset + + +def compose(functions: Sequence[Callable]) -> Callable: + """ + Compose a sequence of functions. + + :param functions: sequence of functions + :return: combined function applying all functions in order. + """ + + def composed_function(*args) -> List[Any]: + result = args # Start with the input arguments + for func in functions: + result = func(*result) if isinstance(result, tuple) else func(result) + return result # type: ignore + + return composed_function # type: ignore + + +class Handler: + """A generic handler + + Allows any custom mapping between data types and functions + """ + + def __init__( + self, + mapping: Dict[str, List[Callable]], + typeset: VisionsTypeset, + *args, + **kwargs + ): + self.mapping = mapping + self.typeset = typeset + self._complete_dag() + + def _complete_dag(self) -> None: + for from_type, to_type in nx.topological_sort( + nx.line_graph(self.typeset.base_graph) + ): + self.mapping[str(to_type)] = ( + self.mapping[str(from_type)] + self.mapping[str(to_type)] + ) + + def handle(self, dtype: str, *args, **kwargs) -> dict: + """ + Returns: + object: a tuple containing the config, the dataset series and the summary extracted + """ + funcs = self.mapping.get(dtype, []) + op = compose(funcs) + summary = op(*args)[-1] + return summary diff --git a/src/ydata_profiling/report/structure/__init__.py b/src/ydata_profiling/report/structure/__init__.py index 8324d248d..7ba9c10c9 100644 --- a/src/ydata_profiling/report/structure/__init__.py +++ b/src/ydata_profiling/report/structure/__init__.py @@ -1 +1,31 @@ """Data structure for the report""" +from typing import Callable, Dict + + +def get_render_map() -> Dict[str, Callable]: + """Get the mapping of variable types to their render functions. + + This function was moved from model.handler to report.structure to eliminate + the reverse dependency from model layer to report layer. + + Returns: + Dictionary mapping type names to render functions. + """ + import ydata_profiling.report.structure.variables as render_algorithms + + render_map = { + "Boolean": render_algorithms.render_boolean, + "Numeric": render_algorithms.render_real, + "Complex": render_algorithms.render_complex, + "Text": render_algorithms.render_text, + "DateTime": render_algorithms.render_date, + "Categorical": render_algorithms.render_categorical, + "URL": render_algorithms.render_url, + "Path": render_algorithms.render_path, + "File": render_algorithms.render_file, + "Image": render_algorithms.render_image, + "Unsupported": render_algorithms.render_generic, + "TimeSeries": render_algorithms.render_timeseries, + } + + return render_map diff --git a/src/ydata_profiling/report/structure/report.py b/src/ydata_profiling/report/structure/report.py index 482b410b2..b64a41aae 100644 --- a/src/ydata_profiling/report/structure/report.py +++ b/src/ydata_profiling/report/structure/report.py @@ -7,7 +7,7 @@ from ydata_profiling.config import Settings from ydata_profiling.model import BaseDescription from ydata_profiling.model.alerts import AlertType -from ydata_profiling.model.handler import get_render_map +from ydata_profiling.report.structure import get_render_map from ydata_profiling.report.presentation.core import ( HTML, Collapse, diff --git a/src/ydata_profiling/utils/backend.py b/src/ydata_profiling/utils/backend.py index e99d91c11..dd12f9fd3 100644 --- a/src/ydata_profiling/utils/backend.py +++ b/src/ydata_profiling/utils/backend.py @@ -1,5 +1,5 @@ """ - File with a function to check the backend being used +Backend detection utilities for pandas and spark. """ import importlib