Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
FROM python:3.10-slim

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*

COPY . .

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" wheel && \
pip install --no-cache-dir . && \
pip install --no-cache-dir "setuptools>=72.0.0,<80.0.0" && \
pip install --no-cache-dir jupyter

EXPOSE 8888

CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]


141 changes: 60 additions & 81 deletions src/ydata_profiling/model/handler.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,60 @@
"""
Auxiliary handler methods for data summary extraction
"""
from typing import Any, Callable, Dict, List, Sequence

import networkx as nx
from visions import VisionsTypeset


def compose(functions: Sequence[Callable]) -> Callable:
"""
Compose a sequence of functions.

:param functions: sequence of functions
:return: combined function applying all functions in order.
"""

def composed_function(*args) -> List[Any]:
result = args # Start with the input arguments
for func in functions:
result = func(*result) if isinstance(result, tuple) else func(result)
return result # type: ignore

return composed_function # type: ignore


class Handler:
"""A generic handler

Allows any custom mapping between data types and functions
"""

def __init__(
self,
mapping: Dict[str, List[Callable]],
typeset: VisionsTypeset,
*args,
**kwargs
):
self.mapping = mapping
self.typeset = typeset
self._complete_dag()

def _complete_dag(self) -> None:
for from_type, to_type in nx.topological_sort(
nx.line_graph(self.typeset.base_graph)
):
self.mapping[str(to_type)] = (
self.mapping[str(from_type)] + self.mapping[str(to_type)]
)

def handle(self, dtype: str, *args, **kwargs) -> dict:
"""
Returns:
object: a tuple containing the config, the dataset series and the summary extracted
"""
funcs = self.mapping.get(dtype, [])
op = compose(funcs)
summary = op(*args)[-1]
return summary


def get_render_map() -> Dict[str, Callable]:
import ydata_profiling.report.structure.variables as render_algorithms

render_map = {
"Boolean": render_algorithms.render_boolean,
"Numeric": render_algorithms.render_real,
"Complex": render_algorithms.render_complex,
"Text": render_algorithms.render_text,
"DateTime": render_algorithms.render_date,
"Categorical": render_algorithms.render_categorical,
"URL": render_algorithms.render_url,
"Path": render_algorithms.render_path,
"File": render_algorithms.render_file,
"Image": render_algorithms.render_image,
"Unsupported": render_algorithms.render_generic,
"TimeSeries": render_algorithms.render_timeseries,
}

return render_map
"""
Auxiliary handler methods for data summary extraction
"""
from typing import Any, Callable, Dict, List, Sequence

import networkx as nx
from visions import VisionsTypeset


def compose(functions: Sequence[Callable]) -> Callable:
"""
Compose a sequence of functions.

:param functions: sequence of functions
:return: combined function applying all functions in order.
"""

def composed_function(*args) -> List[Any]:
result = args # Start with the input arguments
for func in functions:
result = func(*result) if isinstance(result, tuple) else func(result)
return result # type: ignore

return composed_function # type: ignore


class Handler:
"""A generic handler

Allows any custom mapping between data types and functions
"""

def __init__(
self,
mapping: Dict[str, List[Callable]],
typeset: VisionsTypeset,
*args,
**kwargs
):
self.mapping = mapping
self.typeset = typeset
self._complete_dag()

def _complete_dag(self) -> None:
for from_type, to_type in nx.topological_sort(
nx.line_graph(self.typeset.base_graph)
):
self.mapping[str(to_type)] = (
self.mapping[str(from_type)] + self.mapping[str(to_type)]
)

def handle(self, dtype: str, *args, **kwargs) -> dict:
"""
Returns:
object: a tuple containing the config, the dataset series and the summary extracted
"""
funcs = self.mapping.get(dtype, [])
op = compose(funcs)
summary = op(*args)[-1]
return summary
30 changes: 30 additions & 0 deletions src/ydata_profiling/report/structure/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,31 @@
"""Data structure for the report"""
from typing import Callable, Dict


def get_render_map() -> Dict[str, Callable]:
"""Get the mapping of variable types to their render functions.

This function was moved from model.handler to report.structure to eliminate
the reverse dependency from model layer to report layer.

Returns:
Dictionary mapping type names to render functions.
"""
import ydata_profiling.report.structure.variables as render_algorithms

render_map = {
"Boolean": render_algorithms.render_boolean,
"Numeric": render_algorithms.render_real,
"Complex": render_algorithms.render_complex,
"Text": render_algorithms.render_text,
"DateTime": render_algorithms.render_date,
"Categorical": render_algorithms.render_categorical,
"URL": render_algorithms.render_url,
"Path": render_algorithms.render_path,
"File": render_algorithms.render_file,
"Image": render_algorithms.render_image,
"Unsupported": render_algorithms.render_generic,
"TimeSeries": render_algorithms.render_timeseries,
}

return render_map
2 changes: 1 addition & 1 deletion src/ydata_profiling/report/structure/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ydata_profiling.config import Settings
from ydata_profiling.model import BaseDescription
from ydata_profiling.model.alerts import AlertType
from ydata_profiling.model.handler import get_render_map
from ydata_profiling.report.structure import get_render_map
from ydata_profiling.report.presentation.core import (
HTML,
Collapse,
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/utils/backend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
File with a function to check the backend being used
Backend detection utilities for pandas and spark.
"""
import importlib

Expand Down