Data-Centric-AI-Community · fabclmnt · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml
@@ -40,7 +40,7 @@ jobs:
     - name: Set up Python 3.11
       uses: actions/setup-python@v5
       with:
-        python-version: "3.13"
+        python-version: '3.11'
 
     - uses: actions/cache@v4
       name: Cache pip dependencies
@@ -52,7 +52,7 @@ jobs:
 
     - name: Install pip dependencies
       run: |
-        python -m pip install --upgrade pip setuptools
+        python -m pip install --upgrade pip
         python -m pip install ".[dev,test]"
 
     - name: Install pre-commit hooks
@@ -93,7 +93,7 @@ jobs:
     - name: Setup Python 3.11
       uses: actions/setup-python@v5
       with:
-        python-version: "3.13"
+        python-version: '3.11'
 
     - name: Cache pip dependencies
       id: cache
@@ -105,7 +105,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install ".[dev,test,docs]"
+        python -m pip install ".[docs]"
 
     - name: Install the package
       run: make install

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -80,7 +80,6 @@ jobs:
         name: built-artifacts
         path: dist/
 
-    - uses: pypa/gh-action-pypi-publish@v1.12.4
+    - uses: pypa/gh-action-pypi-publish@release/v1
       with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+        packages-dir: dist/
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,8 +4,6 @@ include requirements*.txt
 # Include license, Readme, etc.
 include LICENSE
 include *.md
-include mypy.ini
-include src/ydata_profiling/py.typed
 
 # Templates and static resources
 recursive-include src/ydata_profiling/report/presentation/flavours/html/templates *.html *.js *.css
@@ -20,7 +18,7 @@ recursive-include venv *.yml
 exclude .pre-commit-config.yaml
 exclude commitlint.config.js
 exclude .releaserc.json
-include Makefile make.bat
+exclude Makefile make.bat
 exclude docs examples tests docsrc .devcontainer
 recursive-exclude docs *
 recursive-exclude docsrc *

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,18 +1,25 @@
 [build-system]
-requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
+requires = [
+    "setuptools>=72.0.0,<80.0.0",
+    "setuptools-scm>=8.0.0,<9.0.0",
+    "wheel>=0.38.4,<1.0.0"
+]
+
+[packaging]
+package_name = "ydata-profiling"
 
 [project]
 name = "ydata-profiling"
+requires-python = ">=3.7,<3.13"
 authors = [
-    {name = "YData Labs Inc", email = "opensource@ydata.ai"},
+    {name = "YData Labs Inc", email = "opensource@ydata.ai"}
 ]
-description="Generate profile report for pandas DataFrame"
+description = "Generate profile report for pandas DataFrame"
+keywords = ["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"]
 readme = "README.md"
-requires-python=">=3.7, <3.13"
-keywords=["pandas", "data-science", "data-analysis", "python", "jupyter", "ipython"]
-license = {text = "MIT"}
-classifiers=[
+license = {file = "LICENSE.md"}
+classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Topic :: Software Development :: Build Tools",
     "License :: OSI Approved :: MIT License",
@@ -63,10 +70,11 @@ dependencies = [
     "numba>=0.56.0, <1",
 ]
 
-dynamic = ["version"]
+dynamic = [
+    "version",
+]
 
 [project.optional-dependencies]
-# dependencies for development and testing
 dev = [
     "black>=20.8b1",
     "isort>=5.0.7",
@@ -80,6 +88,22 @@ dev = [
     "sphinx-multiversion>=0.2.3",
     "autodoc_pydantic",
 ]
+
+docs = [
+    "mkdocs>=1.6.0,<1.7.0",
+    "mkdocs-material>=9.0.12,<10.0.0",
+    "mkdocs-material-extensions>=1.1.1,<2.0.0",
+    "mkdocs-table-reader-plugin<=2.2.0",
+    "mike>=2.1.1,<2.2.0",
+    "mkdocstrings[python]>=0.20.0,<1.0.0",
+    "mkdocs-badges",
+]
+
+notebook = [
+    "jupyter>=1.0.0",
+    "ipywidgets>=7.5.1",
+]
+
 # this provides the recommended pyspark and pyarrow versions for spark to work on pandas-profiling
 # note that if you are using pyspark 2.3 or 2.4 and pyarrow >= 0.15, you might need to
 # set ARROW_PRE_0_15_IPC_FORMAT=1 in your conf/spark-env.sh for toPandas functions to work properly
@@ -90,6 +114,7 @@ spark = [
     "numpy>=1.16.0,<1.24",
     "visions[type_image_path]>=0.7.5, <0.7.7",
 ]
+
 test = [
     "pytest",
     "coverage>=6.5, <8",
@@ -100,35 +125,29 @@ test = [
     "twine>=3.1.1",
     "kaggle",
 ]
-notebook = [
-    "jupyter>=1.0.0",
-    "ipywidgets>=7.5.1",
-]
-docs = [
-    "mkdocs>=1.6.0,<1.7.0",
-    "mkdocs-material>=9.0.12,<10.0.0",
-    "mkdocs-material-extensions>=1.1.1,<2.0.0",
-    "mkdocs-table-reader-plugin<=2.2.0",
-    "mike>=2.1.1,<2.2.0",
-    "mkdocstrings[python]>=0.20.0,<1.0.0",
-    "mkdocs-badges",
-]
+
 unicode= [
     "tangled-up-in-unicode==0.2.0",
 ]
 
-[tool.setuptools.packages.find]
-where = ["src"]
+[project.urls]
+Homepage = "https://ydata.ai"
+Repository = "https://github.com/ydataai/ydata-profiling"
 
-[tool.setuptools.package-data]
-ydata_profiling = ["py.typed"]
+[project.scripts]
+ydata_profiling = "ydata_profiling.controller.console:main"
+pandas_profiling = "ydata_profiling.controller.console:main"
+
+# setuptools relative
 
 [tool.setuptools]
 include-package-data = true
 
-[project.scripts]
-ydata_profiling = "ydata_profiling.controller.console:main"
-pandas_profiling = "ydata_profiling.controller.console:main"
+[tool.setuptools.package-data]
+ydata_profiling = ["py.typed"]
 
-[project.urls]
-homepage = "https://github.com/ydataai/ydata-profiling"
+[tool.distutils.bdist_wheel]
+universal = true
+
+[tool.setuptools.package-dir]
+"" = "src"
diff --git a/setup.py b/setup.py
@@ -4,8 +4,6 @@
 
 # Read the contents of README file
 source_root = Path(".")
-with (source_root / "README.md").open(encoding="utf-8") as f:
-    long_description = f.read()
 
 try:
     version = (source_root / "VERSION").read_text().rstrip("\n")
@@ -17,7 +15,4 @@
 
 setup(
     version=version,
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    options={"bdist_wheel": {"universal": True}},
 )
diff --git a/src/ydata_profiling/model/correlations.py b/src/ydata_profiling/model/correlations.py
@@ -1,4 +1,7 @@
+# mypy: ignore-errors
+
 """Correlations between variables."""
+
 import warnings
 from typing import Dict, List, Optional, Sized
 
@@ -20,16 +23,16 @@ def __init__(self, df: Sized):
         """Determine backend once and store it for all correlation computations."""
         if isinstance(df, pd.DataFrame):
             from ydata_profiling.model.pandas import (
-                correlations_pandas as correlation_backend, #type: ignore
+                correlations_pandas as correlation_backend,  # type: ignore
             )
         else:
             from ydata_profiling.model.spark import (
-                correlations_spark as correlation_backend, # type: ignore
+                correlations_spark as correlation_backend,  # type: ignore
             )
 
         self.backend = correlation_backend
 
-    def get_method(self, method_name: str):
+    def get_method(self, method_name: str):  # noqa: ANN201
         """Retrieve the appropriate correlation method class from the backend."""
         if hasattr(self.backend, method_name):
             return getattr(self.backend, method_name)

diff --git a/src/ydata_profiling/model/dataframe.py b/src/ydata_profiling/model/dataframe.py
@@ -3,17 +3,18 @@
 
 import pandas as pd
 
+from ydata_profiling.config import Settings
+from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess
+
 spec = importlib.util.find_spec("pyspark")
 if spec is None:
     from typing import TypeVar
 
     sparkDataFrame = TypeVar("sparkDataFrame")
 else:
     from pyspark.sql import DataFrame as sparkDataFrame  # type: ignore
-    from ydata_profiling.model.spark.dataframe_spark import spark_preprocess
 
-from ydata_profiling.config import Settings
-from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess
+    from ydata_profiling.model.spark.dataframe_spark import spark_preprocess
 
 
 def preprocess(config: Settings, df: Any) -> Any:

diff --git a/src/ydata_profiling/model/describe.py b/src/ydata_profiling/model/describe.py
@@ -29,7 +29,7 @@
 
 def describe(
     config: Settings,
-    df: Union[pd.DataFrame, "pyspark.sql.DataFrame"],  # type: ignore
+    df: Union[pd.DataFrame, "pyspark.sql.DataFrame"],  # type: ignore[name-defined] # noqa: F821
     summarizer: BaseSummarizer,
     typeset: VisionsTypeset,
     sample: Optional[dict] = None,

diff --git a/src/ydata_profiling/model/handler.py b/src/ydata_profiling/model/handler.py
@@ -15,13 +15,13 @@ def compose(functions: Sequence[Callable]) -> Callable:
     :return: combined function applying all functions in order.
     """
 
-    def composed_function(*args):
+    def composed_function(*args) -> List[Any]:
         result = args  # Start with the input arguments
         for func in functions:
             result = func(*result) if isinstance(result, tuple) else func(result)
-        return result
+        return result  # type: ignore
 
-    return composed_function
+    return composed_function  # type: ignore
 
 
 class Handler:

diff --git a/src/ydata_profiling/model/pandas/dataframe_pandas.py b/src/ydata_profiling/model/pandas/dataframe_pandas.py
@@ -1,5 +1,3 @@
-import warnings
-
 import pandas as pd
 
 from ydata_profiling.config import Settings

diff --git a/src/ydata_profiling/model/pandas/summary_pandas.py b/src/ydata_profiling/model/pandas/summary_pandas.py
@@ -1,7 +1,7 @@
 """Compute statistical description of datasets."""
 import multiprocessing
 from concurrent.futures import ThreadPoolExecutor
-from typing import Tuple
+from typing import Any, Tuple
 
 import numpy as np
 import pandas as pd
@@ -12,6 +12,8 @@
 from ydata_profiling.model.typeset import ProfilingTypeSet
 from ydata_profiling.utils.dataframe import sort_column_names
 
+BaseSummarizer: Any = "BaseSummarizer"  # type: ignore
+
 
 def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
     return isinstance(typeset, ProfilingTypeSet) and series in typeset.type_schema
@@ -20,7 +22,7 @@ def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
 def pandas_describe_1d(
     config: Settings,
     series: pd.Series,
-    summarizer: "BaseSummarizer",  # type:ignore
+    summarizer: BaseSummarizer,
     typeset: VisionsTypeset,
 ) -> dict:
     """Describe a series (infer the variable type, then calculate type-specific values).
@@ -67,7 +69,7 @@ def pandas_describe_1d(
 def pandas_get_series_descriptions(
     config: Settings,
     df: pd.DataFrame,
-    summarizer: "BaseSummarizer",  # type:ignore
+    summarizer: BaseSummarizer,
     typeset: VisionsTypeset,
     pbar: tqdm,
 ) -> dict:

diff --git a/src/ydata_profiling/model/spark/__init__.py b/src/ydata_profiling/model/spark/__init__.py
@@ -29,7 +29,7 @@
             for name in dir(module)
             if not name.startswith("_")
         }
-    ) # type: ignore
+    )  # type: ignore
 
 # Explicitly list all available functions
 __all__ = [