11"""Logic for alerting the user on possibly problematic patterns in the data (e.g. high number of zeros , constant
22values, high correlations)."""
33from enum import Enum , auto , unique
4- from typing import Any , Dict , List , Optional , Set
4+ from typing import Dict , List , Optional , Set
55
66import numpy as np
77import pandas as pd
88
99from ydata_profiling .config import Settings
1010from ydata_profiling .model .correlations import perform_check_correlation
11+ from ydata_profiling .model .var_description .default import VarDescription
1112
1213
1314def fmt_percent (value : float , edge_cases : bool = True ) -> str :
@@ -143,13 +144,13 @@ def __repr__(self):
143144class ConstantLengthAlert (Alert ):
144145 def __init__ (
145146 self ,
146- values : Optional [ Dict ] = None ,
147+ values : VarDescription ,
147148 column_name : Optional [str ] = None ,
148149 is_empty : bool = False ,
149150 ):
150151 super ().__init__ (
151152 alert_type = AlertType .CONSTANT_LENGTH ,
152- values = values ,
153+ values = values . var_specific ,
153154 column_name = column_name ,
154155 fields = {"composition_min_length" , "composition_max_length" },
155156 is_empty = is_empty ,
@@ -162,15 +163,14 @@ def _get_description(self) -> str:
162163class ConstantAlert (Alert ):
163164 def __init__ (
164165 self ,
165- values : Optional [ Dict ] = None ,
166+ values : VarDescription ,
166167 column_name : Optional [str ] = None ,
167168 is_empty : bool = False ,
168169 ):
169170 super ().__init__ (
170171 alert_type = AlertType .CONSTANT ,
171- values = values ,
172+ values = { "n_distinct" : values . n_distinct } ,
172173 column_name = column_name ,
173- fields = {"n_distinct" },
174174 is_empty = is_empty ,
175175 )
176176
@@ -181,7 +181,7 @@ def _get_description(self) -> str:
181181class DuplicatesAlert (Alert ):
182182 def __init__ (
183183 self ,
184- values : Optional [ Dict ] = None ,
184+ values : dict ,
185185 column_name : Optional [str ] = None ,
186186 is_empty : bool = False ,
187187 ):
@@ -203,15 +203,14 @@ def _get_description(self) -> str:
203203class EmptyAlert (Alert ):
204204 def __init__ (
205205 self ,
206- values : Optional [ Dict ] = None ,
206+ values : VarDescription ,
207207 column_name : Optional [str ] = None ,
208208 is_empty : bool = False ,
209209 ):
210210 super ().__init__ (
211211 alert_type = AlertType .EMPTY ,
212- values = values ,
212+ values = { "n" : values . n } ,
213213 column_name = column_name ,
214- fields = {"n" },
215214 is_empty = is_empty ,
216215 )
217216
@@ -222,15 +221,14 @@ def _get_description(self) -> str:
222221class HighCardinalityAlert (Alert ):
223222 def __init__ (
224223 self ,
225- values : Optional [ Dict ] = None ,
224+ values : VarDescription ,
226225 column_name : Optional [str ] = None ,
227226 is_empty : bool = False ,
228227 ):
229228 super ().__init__ (
230229 alert_type = AlertType .HIGH_CARDINALITY ,
231- values = values ,
230+ values = { "n_distinct" : values . n_distinct } ,
232231 column_name = column_name ,
233- fields = {"n_distinct" },
234232 is_empty = is_empty ,
235233 )
236234
@@ -244,7 +242,7 @@ def _get_description(self) -> str:
244242class HighCorrelationAlert (Alert ):
245243 def __init__ (
246244 self ,
247- values : Optional [ Dict ] = None ,
245+ values : Dict ,
248246 column_name : Optional [str ] = None ,
249247 is_empty : bool = False ,
250248 ):
@@ -270,13 +268,13 @@ def _get_description(self) -> str:
270268class ImbalanceAlert (Alert ):
271269 def __init__ (
272270 self ,
273- values : Optional [ Dict ] = None ,
271+ values : VarDescription ,
274272 column_name : Optional [str ] = None ,
275273 is_empty : bool = False ,
276274 ):
277275 super ().__init__ (
278276 alert_type = AlertType .IMBALANCE ,
279- values = values ,
277+ values = values . var_specific ,
280278 column_name = column_name ,
281279 fields = {"imbalance" },
282280 is_empty = is_empty ,
@@ -293,13 +291,13 @@ def _get_description(self) -> str:
293291class InfiniteAlert (Alert ):
294292 def __init__ (
295293 self ,
296- values : Optional [ Dict ] = None ,
294+ values : VarDescription ,
297295 column_name : Optional [str ] = None ,
298296 is_empty : bool = False ,
299297 ):
300298 super ().__init__ (
301299 alert_type = AlertType .INFINITE ,
302- values = values ,
300+ values = values . var_specific ,
303301 column_name = column_name ,
304302 fields = {"p_infinite" , "n_infinite" },
305303 is_empty = is_empty ,
@@ -315,15 +313,14 @@ def _get_description(self) -> str:
315313class MissingAlert (Alert ):
316314 def __init__ (
317315 self ,
318- values : Optional [ Dict ] = None ,
316+ values : VarDescription ,
319317 column_name : Optional [str ] = None ,
320318 is_empty : bool = False ,
321319 ):
322320 super ().__init__ (
323321 alert_type = AlertType .MISSING ,
324- values = values ,
322+ values = { "p_missing" : values . p_missing , "n_missing" : values . n_missing } ,
325323 column_name = column_name ,
326- fields = {"p_missing" , "n_missing" },
327324 is_empty = is_empty ,
328325 )
329326
@@ -373,13 +370,13 @@ def _get_description(self) -> str:
373370class SkewedAlert (Alert ):
374371 def __init__ (
375372 self ,
376- values : Optional [ Dict ] = None ,
373+ values : VarDescription ,
377374 column_name : Optional [str ] = None ,
378375 is_empty : bool = False ,
379376 ):
380377 super ().__init__ (
381378 alert_type = AlertType .SKEWED ,
382- values = values ,
379+ values = values . var_specific ,
383380 column_name = column_name ,
384381 fields = {"skewness" },
385382 is_empty = is_empty ,
@@ -432,15 +429,19 @@ def _get_description(self) -> str:
432429class UniqueAlert (Alert ):
433430 def __init__ (
434431 self ,
435- values : Optional [ Dict ] = None ,
432+ values : VarDescription ,
436433 column_name : Optional [str ] = None ,
437434 is_empty : bool = False ,
438435 ):
439436 super ().__init__ (
440437 alert_type = AlertType .UNIQUE ,
441- values = values ,
438+ values = {
439+ "n_distinct" : values .n_distinct ,
440+ "p_distinct" : values .p_distinct ,
441+ "n_unique" : values .n_unique ,
442+ "p_unique" : values .p_unique ,
443+ },
442444 column_name = column_name ,
443- fields = {"n_distinct" , "p_distinct" , "n_unique" , "p_unique" },
444445 is_empty = is_empty ,
445446 )
446447
@@ -469,13 +470,13 @@ def _get_description(self) -> str:
469470class ZerosAlert (Alert ):
470471 def __init__ (
471472 self ,
472- values : Optional [ Dict ] = None ,
473+ values : VarDescription ,
473474 column_name : Optional [str ] = None ,
474475 is_empty : bool = False ,
475476 ):
476477 super ().__init__ (
477478 alert_type = AlertType .ZEROS ,
478- values = values ,
479+ values = values . var_specific ,
479480 column_name = column_name ,
480481 fields = {"n_zeros" , "p_zeros" },
481482 is_empty = is_empty ,
@@ -531,7 +532,7 @@ def check_table_alerts(table: dict) -> List[Alert]:
531532 return alerts
532533
533534
534- def numeric_alerts (config : Settings , summary : dict ) -> List [Alert ]:
535+ def numeric_alerts (config : Settings , summary : VarDescription ) -> List [Alert ]:
535536 alerts : List [Alert ] = []
536537
537538 # Skewness
@@ -555,7 +556,7 @@ def numeric_alerts(config: Settings, summary: dict) -> List[Alert]:
555556 return alerts
556557
557558
558- def timeseries_alerts (config : Settings , summary : dict ) -> List [Alert ]:
559+ def timeseries_alerts (config : Settings , summary : VarDescription ) -> List [Alert ]:
559560 alerts : List [Alert ] = numeric_alerts (config , summary )
560561
561562 if not summary ["stationary" ]:
@@ -567,7 +568,7 @@ def timeseries_alerts(config: Settings, summary: dict) -> List[Alert]:
567568 return alerts
568569
569570
570- def categorical_alerts (config : Settings , summary : dict ) -> List [Alert ]:
571+ def categorical_alerts (config : Settings , summary : VarDescription ) -> List [Alert ]:
571572 alerts : List [Alert ] = []
572573
573574 # High cardinality
@@ -585,7 +586,7 @@ def categorical_alerts(config: Settings, summary: dict) -> List[Alert]:
585586
586587 # Constant length
587588 if "composition" in summary and summary ["min_length" ] == summary ["max_length" ]:
588- alerts .append (ConstantLengthAlert ())
589+ alerts .append (ConstantLengthAlert (summary ))
589590
590591 # Imbalance
591592 if (
@@ -596,46 +597,48 @@ def categorical_alerts(config: Settings, summary: dict) -> List[Alert]:
596597 return alerts
597598
598599
599- def boolean_alerts (config : Settings , summary : dict ) -> List [Alert ]:
600+ def boolean_alerts (config : Settings , summary : VarDescription ) -> List [Alert ]:
600601 alerts : List [Alert ] = []
601602
602603 if (
603604 "imbalance" in summary
604605 and summary ["imbalance" ] > config .vars .bool .imbalance_threshold
605606 ):
606- alerts .append (ImbalanceAlert ())
607+ alerts .append (ImbalanceAlert (summary ))
607608 return alerts
608609
609610
610- def generic_alerts (summary : dict ) -> List [Alert ]:
611+ def generic_alerts (summary : VarDescription ) -> List [Alert ]:
611612 alerts : List [Alert ] = []
612613
613614 # Missing
614- if alert_value (summary [ " p_missing" ] ):
615- alerts .append (MissingAlert ())
615+ if alert_value (summary . p_missing ):
616+ alerts .append (MissingAlert (summary ))
616617
617618 return alerts
618619
619620
620- def supported_alerts (summary : dict ) -> List [Alert ]:
621+ def supported_alerts (summary : VarDescription ) -> List [Alert ]:
621622 alerts : List [Alert ] = []
622623
623- if summary .get ( " n_distinct" , np . nan ) == summary [ "n" ] :
624- alerts .append (UniqueAlert ())
625- if summary .get ( " n_distinct" , np . nan ) == 1 :
624+ if summary .n_distinct == summary . n :
625+ alerts .append (UniqueAlert (summary ))
626+ if summary .n_distinct == 1 :
626627 alerts .append (ConstantAlert (summary ))
627628 return alerts
628629
629630
630- def unsupported_alerts (summary : Dict [ str , Any ] ) -> List [Alert ]:
631+ def unsupported_alerts (summary : VarDescription ) -> List [Alert ]:
631632 alerts : List [Alert ] = [
632633 UnsupportedAlert (),
633634 RejectedAlert (),
634635 ]
635636 return alerts
636637
637638
638- def check_variable_alerts (config : Settings , col : str , description : dict ) -> List [Alert ]:
639+ def check_variable_alerts (
640+ config : Settings , col : str , description : VarDescription
641+ ) -> List [Alert ]:
639642 """Checks individual variables for alerts.
640643
641644 Args:
@@ -665,7 +668,6 @@ def check_variable_alerts(config: Settings, col: str, description: dict) -> List
665668
666669 for idx in range (len (alerts )):
667670 alerts [idx ].column_name = col
668- alerts [idx ].values = description
669671 return alerts
670672
671673
@@ -693,7 +695,10 @@ def check_correlation_alerts(config: Settings, correlations: dict) -> List[Alert
693695
694696
695697def get_alerts (
696- config : Settings , table_stats : dict , series_description : dict , correlations : dict
698+ config : Settings ,
699+ table_stats : dict ,
700+ series_description : dict [str , VarDescription ],
701+ correlations : dict ,
697702) -> List [Alert ]:
698703 alerts : List [Alert ] = check_table_alerts (table_stats )
699704 for col , description in series_description .items ():
0 commit comments