Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: '3.11'

- uses: actions/cache@v4
name: Cache pip dependencies
Expand All @@ -52,7 +52,7 @@ jobs:

- name: Install pip dependencies
run: |
python -m pip install --upgrade pip setuptools
python -m pip install --upgrade pip
python -m pip install ".[dev,test]"

- name: Install pre-commit hooks
Expand Down Expand Up @@ -93,7 +93,7 @@ jobs:
- name: Setup Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: '3.11'

- name: Cache pip dependencies
id: cache
Expand All @@ -105,7 +105,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[dev,test,docs]"
python -m pip install ".[docs]"

- name: Install the package
run: make install
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ jobs:
name: built-artifacts
path: dist/

- uses: pypa/gh-action-pypi-publish@v1.12.4
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
packages-dir: dist/
4 changes: 1 addition & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ include requirements*.txt
# Include license, Readme, etc.
include LICENSE
include *.md
include mypy.ini
include src/ydata_profiling/py.typed

# Templates and static resources
recursive-include src/ydata_profiling/report/presentation/flavours/html/templates *.html *.js *.css
Expand All @@ -20,7 +18,7 @@ recursive-include venv *.yml
exclude .pre-commit-config.yaml
exclude commitlint.config.js
exclude .releaserc.json
include Makefile make.bat
exclude Makefile make.bat
exclude docs examples tests docsrc .devcontainer
recursive-exclude docs *
recursive-exclude docsrc *
Expand Down
81 changes: 50 additions & 31 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
requires = [
"setuptools>=72.0.0,<80.0.0",
"setuptools-scm>=8.0.0,<9.0.0",
"wheel>=0.38.4,<1.0.0"
]

[packaging]
package_name = "ydata-profiling"

[project]
name = "ydata-profiling"
requires-python = ">=3.7,<3.13"
authors = [
{name = "YData Labs Inc", email = "opensource@ydata.ai"},
{name = "YData Labs Inc", email = "opensource@ydata.ai"}
]
description="Generate profile report for pandas DataFrame"
description = "Generate profile report for pandas DataFrame"
keywords = ["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"]
readme = "README.md"
requires-python=">=3.7, <3.13"
keywords=["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"]
license = {text = "MIT"}
classifiers=[
license = {file = "LICENSE.md"}
classifiers = [
"Development Status :: 5 - Production/Stable",
"Topic :: Software Development :: Build Tools",
"License :: OSI Approved :: MIT License",
Expand Down Expand Up @@ -63,10 +70,11 @@ dependencies = [
"numba>=0.56.0, <1",
]

dynamic = ["version"]
dynamic = [
"version",
]

[project.optional-dependencies]
# dependencies for development and testing
dev = [
"black>=20.8b1",
"isort>=5.0.7",
Expand All @@ -80,6 +88,22 @@ dev = [
"sphinx-multiversion>=0.2.3",
"autodoc_pydantic",
]

docs = [
"mkdocs>=1.6.0,<1.7.0",
"mkdocs-material>=9.0.12,<10.0.0",
"mkdocs-material-extensions>=1.1.1,<2.0.0",
"mkdocs-table-reader-plugin<=2.2.0",
"mike>=2.1.1,<2.2.0",
"mkdocstrings[python]>=0.20.0,<1.0.0",
"mkdocs-badges",
]

notebook = [
"jupyter>=1.0.0",
"ipywidgets>=7.5.1",
]

# this provides the recommended pyspark and pyarrow versions for spark to work on pandas-profiling
# note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
# set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
Expand All @@ -90,6 +114,7 @@ spark = [
"numpy>=1.16.0,<1.24",
"visions[type_image_path]>=0.7.5, <0.7.7",
]

test = [
"pytest",
"coverage>=6.5, <8",
Expand All @@ -100,35 +125,29 @@ test = [
"twine>=3.1.1",
"kaggle",
]
notebook = [
"jupyter>=1.0.0",
"ipywidgets>=7.5.1",
]
docs = [
"mkdocs>=1.6.0,<1.7.0",
"mkdocs-material>=9.0.12,<10.0.0",
"mkdocs-material-extensions>=1.1.1,<2.0.0",
"mkdocs-table-reader-plugin<=2.2.0",
"mike>=2.1.1,<2.2.0",
"mkdocstrings[python]>=0.20.0,<1.0.0",
"mkdocs-badges",
]

unicode= [
"tangled-up-in-unicode==0.2.0",
]

[tool.setuptools.packages.find]
where = ["src"]
[project.urls]
Homepage = "https://ydata.ai"
Repository = "https://github.com/ydataai/ydata-profiling"

[tool.setuptools.package-data]
ydata_profiling = ["py.typed"]
[project.scripts]
ydata_profiling = "ydata_profiling.controller.console:main"
pandas_profiling = "ydata_profiling.controller.console:main"

# setuptools relative

[tool.setuptools]
include-package-data = true

[project.scripts]
ydata_profiling = "ydata_profiling.controller.console:main"
pandas_profiling = "ydata_profiling.controller.console:main"
[tool.setuptools.package-data]
ydata_profiling = ["py.typed"]

[project.urls]
homepage = "https://github.com/ydataai/ydata-profiling"
[tool.distutils.bdist_wheel]
universal = true

[tool.setuptools.package-dir]
"" = "src"
5 changes: 0 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

# Read the contents of README file
source_root = Path(".")
with (source_root / "README.md").open(encoding="utf-8") as f:
long_description = f.read()

try:
version = (source_root / "VERSION").read_text().rstrip("\n")
Expand All @@ -17,7 +15,4 @@

setup(
version=version,
long_description=long_description,
long_description_content_type="text/markdown",
options={"bdist_wheel": {"universal": True}},
)
9 changes: 6 additions & 3 deletions src/ydata_profiling/model/correlations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# mypy: ignore-errors

"""Correlations between variables."""

import warnings
from typing import Dict, List, Optional, Sized

Expand All @@ -20,16 +23,16 @@ def __init__(self, df: Sized):
"""Determine backend once and store it for all correlation computations."""
if isinstance(df, pd.DataFrame):
from ydata_profiling.model.pandas import (
correlations_pandas as correlation_backend, #type: ignore
correlations_pandas as correlation_backend, # type: ignore
)
else:
from ydata_profiling.model.spark import (
correlations_spark as correlation_backend, # type: ignore
correlations_spark as correlation_backend, # type: ignore
)

self.backend = correlation_backend

def get_method(self, method_name: str):
def get_method(self, method_name: str): # noqa: ANN201
"""Retrieve the appropriate correlation method class from the backend."""
if hasattr(self.backend, method_name):
return getattr(self.backend, method_name)
Expand Down
7 changes: 4 additions & 3 deletions src/ydata_profiling/model/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@

import pandas as pd

from ydata_profiling.config import Settings
from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess

spec = importlib.util.find_spec("pyspark")
if spec is None:
from typing import TypeVar

sparkDataFrame = TypeVar("sparkDataFrame")
else:
from pyspark.sql import DataFrame as sparkDataFrame # type: ignore
from ydata_profiling.model.spark.dataframe_spark import spark_preprocess

from ydata_profiling.config import Settings
from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess
from ydata_profiling.model.spark.dataframe_spark import spark_preprocess


def preprocess(config: Settings, df: Any) -> Any:
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/model/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

def describe(
config: Settings,
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # type: ignore
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # type: ignore[name-defined] # noqa: F821
summarizer: BaseSummarizer,
typeset: VisionsTypeset,
sample: Optional[dict] = None,
Expand Down
6 changes: 3 additions & 3 deletions src/ydata_profiling/model/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ def compose(functions: Sequence[Callable]) -> Callable:
:return: combined function applying all functions in order.
"""

def composed_function(*args):
def composed_function(*args) -> List[Any]:
result = args # Start with the input arguments
for func in functions:
result = func(*result) if isinstance(result, tuple) else func(result)
return result
return result # type: ignore

return composed_function
return composed_function # type: ignore


class Handler:
Expand Down
2 changes: 0 additions & 2 deletions src/ydata_profiling/model/pandas/dataframe_pandas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import warnings

import pandas as pd

from ydata_profiling.config import Settings
Expand Down
8 changes: 5 additions & 3 deletions src/ydata_profiling/model/pandas/summary_pandas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Compute statistical description of datasets."""
import multiprocessing
from concurrent.futures import ThreadPoolExecutor
from typing import Tuple
from typing import Any, Tuple

import numpy as np
import pandas as pd
Expand All @@ -12,6 +12,8 @@
from ydata_profiling.model.typeset import ProfilingTypeSet
from ydata_profiling.utils.dataframe import sort_column_names

BaseSummarizer: Any = "BaseSummarizer" # type: ignore


def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
return isinstance(typeset, ProfilingTypeSet) and series in typeset.type_schema
Expand All @@ -20,7 +22,7 @@ def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
def pandas_describe_1d(
config: Settings,
series: pd.Series,
summarizer: "BaseSummarizer", # type:ignore
summarizer: BaseSummarizer,
typeset: VisionsTypeset,
) -> dict:
"""Describe a series (infer the variable type, then calculate type-specific values).
Expand Down Expand Up @@ -67,7 +69,7 @@ def pandas_describe_1d(
def pandas_get_series_descriptions(
config: Settings,
df: pd.DataFrame,
summarizer: "BaseSummarizer", # type:ignore
summarizer: BaseSummarizer,
typeset: VisionsTypeset,
pbar: tqdm,
) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion src/ydata_profiling/model/spark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
for name in dir(module)
if not name.startswith("_")
}
) # type: ignore
) # type: ignore

# Explicitly list all available functions
__all__ = [
Expand Down
Loading