-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Expand file tree
/
Copy pathdescription.py
More file actions
110 lines (93 loc) · 3.88 KB
/
description.py
File metadata and controls
110 lines (93 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Union
from pandas import Timedelta
from ydata_profiling.model.var_description.default import VarDescription
@dataclass
class BaseAnalysis:
"""Description of base analysis module of report.
Overall info about report.
Attributes
title (str): Title of report.
date_start (Union[datetime, List[datetime]]): Start of generating description.
date_end (Union[datetime, List[datetime]]): End of generating description.
"""
title: str
date_start: Union[datetime, List[datetime]]
date_end: Union[datetime, List[datetime]]
def __init__(self, title: str, date_start: datetime, date_end: datetime) -> None:
self.title = title
self.date_start = date_start
self.date_end = date_end
@property
def duration(self) -> Union[timedelta, List[timedelta]]:
if isinstance(self.date_start, datetime) and isinstance(
self.date_end, datetime
):
return self.date_end - self.date_start
if isinstance(self.date_start, list) and isinstance(self.date_end, list):
return [
self.date_end[i] - self.date_start[i]
for i in range(len(self.date_start))
]
else:
raise TypeError()
@dataclass
class TimeIndexAnalysis:
"""Description of timeseries index analysis module of report.
Attributes:
n_series (Union[int, List[int]): Number of time series identified in the dataset.
length (Union[int, List[int]): Number of data points in the time series.
start (Any): Starting point of the time series.
end (Any): Ending point of the time series.
period (Union[float, List[float]): Average interval between data points in the time series.
frequency (Union[Optional[str], List[Optional[str]]): A string alias given to useful common time series frequencies, e.g. H - hours.
"""
n_series: Union[int, List[int]]
length: Union[int, List[int]]
start: Any
end: Any
period: Union[float, List[float], Timedelta, List[Timedelta]]
frequency: Union[Optional[str], List[Optional[str]]]
def __init__(
self,
n_series: int,
length: int,
start: Any,
end: Any,
period: float,
frequency: Optional[str] = None,
) -> None:
self.n_series = n_series
self.length = length
self.start = start
self.end = end
self.period = period
self.frequency = frequency
@dataclass
class BaseDescription:
"""Description of DataFrame.
Attributes:
analysis (BaseAnalysis): Base info about report. Title, start time and end time of description generating.
time_index_analysis (Optional[TimeIndexAnalysis]): Description of timeseries index analysis module of report.
table (Any): DataFrame statistic. Base information about DataFrame.
variables (Dict[str, Any]): Description of variables (columns) of DataFrame. Key is column name, value is description dictionary.
scatter (Any): Pairwise scatter for all variables. Plot interactions between variables.
correlations (Dict[str, Any]): Prepare correlation matrix for DataFrame
missing (Dict[str, Any]): Describe missing values.
alerts (Any): Take alerts from all modules (variables, scatter, correlations), and group them.
package (Dict[str, Any]): Contains version of ydata-profiling and config.
sample (Any): Sample of data.
duplicates (Any): Description of duplicates.
"""
analysis: BaseAnalysis
time_index_analysis: Optional[TimeIndexAnalysis]
table: Any
variables: Dict[str, VarDescription]
scatter: Any
correlations: Dict[str, Any]
missing: Dict[str, Any]
alerts: Any
package: Dict[str, Any]
sample: Any
duplicates: Any