Skip to content

Commit eb8660b

Browse files
aquemyazory-ydata
andauthored
fix: consider all nan series as numerical (#1513)
* fix: consider all nan series as numerical * fix(linting): code formatting --------- Co-authored-by: Azory YData Bot <azory@ydata.ai>
1 parent 5bacd6b commit eb8660b

3 files changed

Lines changed: 49 additions & 35 deletions

File tree

src/ydata_profiling/model/pandas/describe_numeric_pandas.py

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,30 @@ def numeric_stats_numpy(
5050
index_values = vc.index.values
5151

5252
# FIXME: can be performance optimized by using weights in std, var, kurt and skew...
53-
54-
return {
55-
"mean": np.average(index_values, weights=vc.values),
56-
"std": np.std(present_values, ddof=1),
57-
"variance": np.var(present_values, ddof=1),
58-
"min": np.min(index_values),
59-
"max": np.max(index_values),
60-
# Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1.
61-
"kurtosis": series.kurt(),
62-
# Unbiased skew normalized by N-1
63-
"skewness": series.skew(),
64-
"sum": np.dot(index_values, vc.values),
65-
}
53+
if len(index_values):
54+
return {
55+
"mean": np.average(index_values, weights=vc.values),
56+
"std": np.std(present_values, ddof=1),
57+
"variance": np.var(present_values, ddof=1),
58+
"min": np.min(index_values),
59+
"max": np.max(index_values),
60+
# Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1.
61+
"kurtosis": series.kurt(),
62+
# Unbiased skew normalized by N-1
63+
"skewness": series.skew(),
64+
"sum": np.dot(index_values, vc.values),
65+
}
66+
else: # Empty numerical series
67+
return {
68+
"mean": np.nan,
69+
"std": 0.0,
70+
"variance": 0.0,
71+
"min": np.nan,
72+
"max": np.nan,
73+
"kurtosis": 0.0,
74+
"skewness": 0.0,
75+
"sum": 0,
76+
}
6677

6778

6879
@describe_numeric_1d.register
@@ -151,13 +162,14 @@ def pandas_describe_numeric_1d(
151162
else:
152163
stats["monotonic"] = 0
153164

154-
stats.update(
155-
histogram_compute(
156-
config,
157-
value_counts[~infinity_index].index.values,
158-
summary["n_distinct"],
159-
weights=value_counts[~infinity_index].values,
165+
if len(value_counts[~infinity_index].index.values) > 0:
166+
stats.update(
167+
histogram_compute(
168+
config,
169+
value_counts[~infinity_index].index.values,
170+
summary["n_distinct"],
171+
weights=value_counts[~infinity_index].values,
172+
)
160173
)
161-
)
162174

163175
return config, series, stats

src/ydata_profiling/report/structure/variables/render_real.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,12 @@ def render_real(config: Settings, summary: dict) -> dict:
118118
style=config.html.style,
119119
)
120120

121-
if isinstance(summary["histogram"], list):
121+
if isinstance(summary.get("histogram", []), list):
122122
mini_histo = Image(
123123
mini_histogram(
124124
config,
125-
[x[0] for x in summary["histogram"]],
126-
[x[1] for x in summary["histogram"]],
125+
[x[0] for x in summary.get("histogram", [])],
126+
[x[1] for x in summary.get("histogram", [])],
127127
),
128128
image_format=image_format,
129129
alt="Mini histogram",
@@ -243,13 +243,14 @@ def render_real(config: Settings, summary: dict) -> dict:
243243
sequence_type="grid",
244244
)
245245

246-
if isinstance(summary["histogram"], list):
246+
if isinstance(summary.get("histogram", []), list):
247247
hist_data = histogram(
248248
config,
249-
[x[0] for x in summary["histogram"]],
250-
[x[1] for x in summary["histogram"]],
249+
[x[0] for x in summary.get("histogram", [])],
250+
[x[1] for x in summary.get("histogram", [])],
251251
)
252-
hist_caption = f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][0][1]) - 1})"
252+
bins = len(summary["histogram"][0][1]) - 1 if "histogram" in summary else 0
253+
hist_caption = f"<strong>Histogram with fixed size bins</strong> (bins={bins})"
253254
else:
254255
hist_data = histogram(config, *summary["histogram"])
255256
hist_caption = f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})"

src/ydata_profiling/visualisation/plot.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,15 @@ def _plot_histogram(
7373
plot = fig.add_subplot(111)
7474

7575
for idx in reversed(list(range(n_labels))):
76-
diff = np.diff(bins[idx])
77-
plot.bar(
78-
bins[idx][:-1] + diff / 2, # type: ignore
79-
series[idx],
80-
diff,
81-
facecolor=config.html.style.primary_colors[idx],
82-
alpha=0.6,
83-
)
76+
if len(bins):
77+
diff = np.diff(bins[idx])
78+
plot.bar(
79+
bins[idx][:-1] + diff / 2, # type: ignore
80+
series[idx],
81+
diff,
82+
facecolor=config.html.style.primary_colors[idx],
83+
alpha=0.6,
84+
)
8485

8586
if date:
8687
plot.xaxis.set_major_formatter(FuncFormatter(format_fn))

0 commit comments

Comments
 (0)