Skip to content

Commit 88035d8

Browse files
committed
chore: fix linting errors
1 parent 3d5301d commit 88035d8

22 files changed

Lines changed: 60 additions & 47 deletions

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
default_stages: [commit, push, manual]
1+
default_stages: [pre-commit, pre-push, manual]
22

33
repos:
44
- repo: https://github.com/commitizen-tools/commitizen

src/ydata_profiling/model/correlations.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Correlations between variables."""
22
import warnings
3-
from typing import Dict, List, Optional, Sized
3+
from typing import Dict, List, Optional, Sized, no_type_check
44

55
import numpy as np
66
import pandas as pd
@@ -16,20 +16,21 @@
1616
class CorrelationBackend:
1717
"""Helper class to select and cache the appropriate correlation backend (Pandas or Spark)."""
1818

19+
@no_type_check
1920
def __init__(self, df: Sized):
2021
"""Determine backend once and store it for all correlation computations."""
2122
if isinstance(df, pd.DataFrame):
2223
from ydata_profiling.model.pandas import (
23-
correlations_pandas as correlation_backend, #type: ignore
24+
correlations_pandas as correlation_backend,
2425
)
2526
else:
2627
from ydata_profiling.model.spark import (
27-
correlations_spark as correlation_backend, # type: ignore
28+
correlations_spark as correlation_backend,
2829
)
2930

3031
self.backend = correlation_backend
3132

32-
def get_method(self, method_name: str):
33+
def get_method(self, method_name: str): # noqa: ANN201
3334
"""Retrieve the appropriate correlation method class from the backend."""
3435
if hasattr(self.backend, method_name):
3536
return getattr(self.backend, method_name)

src/ydata_profiling/model/dataframe.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
from pyspark.sql import DataFrame as sparkDataFrame # type: ignore
1313
from ydata_profiling.model.spark.dataframe_spark import spark_preprocess
1414

15-
from ydata_profiling.config import Settings
16-
from ydata_profiling.model.pandas.dataframe_pandas import pandas_preprocess
15+
from ydata_profiling.config import Settings # noqa: E402
16+
from ydata_profiling.model.pandas.dataframe_pandas import ( # noqa: E402
17+
pandas_preprocess,
18+
)
1719

1820

1921
def preprocess(config: Settings, df: Any) -> Any:

src/ydata_profiling/model/describe.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Organize the calculation of statistics for each series in this DataFrame."""
22
from datetime import datetime
3-
from typing import Any, Dict, Optional, Union
3+
from typing import Any, Dict, Optional, Union, no_type_check
44

55
import pandas as pd
66
from tqdm.auto import tqdm
@@ -27,9 +27,10 @@
2727
from ydata_profiling.version import __version__
2828

2929

30+
@no_type_check
3031
def describe(
3132
config: Settings,
32-
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # type: ignore
33+
df: Union[pd.DataFrame, "pyspark.sql.DataFrame"], # noqa
3334
summarizer: BaseSummarizer,
3435
typeset: VisionsTypeset,
3536
sample: Optional[dict] = None,

src/ydata_profiling/model/handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Auxiliary handler methods for data summary extraction
33
"""
4-
from typing import Any, Callable, Dict, List, Sequence
4+
from typing import Callable, Dict, List, Sequence
55

66
import networkx as nx
77
from visions import VisionsTypeset
@@ -15,7 +15,7 @@ def compose(functions: Sequence[Callable]) -> Callable:
1515
:return: combined function applying all functions in order.
1616
"""
1717

18-
def composed_function(*args):
18+
def composed_function(*args): # noqa: ANN201
1919
result = args # Start with the input arguments
2020
for func in functions:
2121
result = func(*result) if isinstance(result, tuple) else func(result)

src/ydata_profiling/model/pandas/dataframe_pandas.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import warnings
2-
31
import pandas as pd
42

53
from ydata_profiling.config import Settings

src/ydata_profiling/model/pandas/summary_pandas.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from visions import VisionsTypeset
1010

1111
from ydata_profiling.config import Settings
12+
from ydata_profiling.model.summarizer import BaseSummarizer
1213
from ydata_profiling.model.typeset import ProfilingTypeSet
1314
from ydata_profiling.utils.dataframe import sort_column_names
1415

@@ -20,7 +21,7 @@ def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
2021
def pandas_describe_1d(
2122
config: Settings,
2223
series: pd.Series,
23-
summarizer: "BaseSummarizer", # type:ignore
24+
summarizer: BaseSummarizer,
2425
typeset: VisionsTypeset,
2526
) -> dict:
2627
"""Describe a series (infer the variable type, then calculate type-specific values).

src/ydata_profiling/model/spark/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
for name in dir(module)
3030
if not name.startswith("_")
3131
}
32-
) # type: ignore
32+
) # type: ignore
3333

3434
# Explicitly list all available functions
3535
__all__ = [

src/ydata_profiling/model/spark/describe_counts_spark.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
Pyspark counts
33
"""
44
from typing import Tuple
5+
56
import pandas as pd
7+
from pyspark.sql import DataFrame
8+
from pyspark.sql import functions as F
69

710
from ydata_profiling.config import Settings
8-
9-
from pyspark.sql import DataFrame, functions as F
1011
from ydata_profiling.model.summary_algorithms import describe_counts
1112

13+
1214
@describe_counts.register
1315
def describe_counts_spark(
1416
config: Settings, series: DataFrame, summary: dict
@@ -34,7 +36,9 @@ def describe_counts_spark(
3436
value_counts_index_sorted = value_counts.orderBy(F.asc(series.columns[0]))
3537

3638
# Count missing values
37-
n_missing = value_counts.filter(F.col(series.columns[0]).isNull()).select("count").first()
39+
n_missing = (
40+
value_counts.filter(F.col(series.columns[0]).isNull()).select("count").first()
41+
)
3842
n_missing = n_missing["count"] if n_missing else 0
3943

4044
# Convert top 200 values to Pandas for frequency table display
@@ -51,11 +55,9 @@ def describe_counts_spark(
5155

5256
column = series.columns[0]
5357

54-
55-
if series.dtypes[0][1] in ('int', 'float', 'bigint', 'double'):
58+
if series.dtypes[0][1] in ("int", "float", "bigint", "double"):
5659
value_counts_no_nan = (
57-
value_counts
58-
.filter(F.col(column).isNotNull()) # Exclude NaNs
60+
value_counts.filter(F.col(column).isNotNull()) # Exclude NaNs
5961
.filter(~F.isnan(F.col(column))) # Remove implicit NaNs (if numeric column)
6062
.groupBy(column) # Group by unique values
6163
.count() # Count occurrences
@@ -64,8 +66,7 @@ def describe_counts_spark(
6466
)
6567
else:
6668
value_counts_no_nan = (
67-
value_counts
68-
.filter(F.col(column).isNotNull()) # Exclude NULLs
69+
value_counts.filter(F.col(column).isNotNull()) # Exclude NULLs
6970
.groupBy(column) # Group by unique timestamp values
7071
.count() # Count occurrences
7172
.orderBy(F.desc("count")) # Sort by most frequent timestamps
@@ -75,8 +76,12 @@ def describe_counts_spark(
7576
# Convert to Pandas Series, forcing proper structure
7677
if value_counts_no_nan.count() > 0:
7778
pdf = value_counts_no_nan.toPandas().set_index(column)["count"]
78-
summary["value_counts_without_nan"] = pd.Series(pdf) # Ensures it's always a Series
79+
summary["value_counts_without_nan"] = pd.Series(
80+
pdf
81+
) # Ensures it's always a Series
7982
else:
80-
summary["value_counts_without_nan"] = pd.Series(dtype=int) # Ensures an empty Series
83+
summary["value_counts_without_nan"] = pd.Series(
84+
dtype=int
85+
) # Ensures an empty Series
8186

82-
return config, series, summary
87+
return config, series, summary

src/ydata_profiling/model/spark/duplicates_spark.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from ydata_profiling.config import Settings
77
from ydata_profiling.model.duplicates import get_duplicates
88

9+
910
@get_duplicates.register
1011
def get_duplicates_spark(
1112
config: Settings, df: DataFrame, supported_columns: Sequence

0 commit comments

Comments
 (0)