|
1 | 1 | import os |
2 | | -from typing import Dict |
| 2 | +from typing import Dict, Sequence |
| 3 | + |
| 4 | +from urllib.parse import urlparse |
3 | 5 |
|
4 | 6 | import numpy as np |
5 | 7 | import pandas as pd |
|
18 | 20 | from ydata_profiling.model.typeset import ProfilingTypeSet |
19 | 21 | from ydata_profiling.profile_report import ProfileReport |
20 | 22 |
|
| 23 | +def get_sequences() -> Dict[str, Sequence]: |
| 24 | + sequences = { |
| 25 | + "complex_series_float": [ |
| 26 | + complex(0, 0), |
| 27 | + complex(1, 0), |
| 28 | + complex(3, 0), |
| 29 | + complex(-1, 0), |
| 30 | + ], |
| 31 | + "url_nan_series": [ |
| 32 | + urlparse("http://www.cwi.nl:80/%7Eguido/Python.html"), |
| 33 | + urlparse("https://github.com/dylan-profiling/hurricane"), |
| 34 | + np.nan, |
| 35 | + ], |
| 36 | + "mixed": [True, False, np.nan], |
| 37 | + "float_nan_series": [1.0, 2.5, np.nan], |
| 38 | + "float_series5": [np.nan, 1.2], |
| 39 | + "float_with_inf": [np.inf, -np.inf, 1000000.0, 5.5], |
| 40 | + "inf_series": [np.inf, -np.inf], |
| 41 | + "int_nan_series": [1, 2, np.nan], |
| 42 | + "nan_series": [np.nan], |
| 43 | + "nan_series_2": [np.nan, np.nan, np.nan, np.nan], |
| 44 | + "string_num_nan": ["1.0", "2.0", np.nan], |
| 45 | + "string_with_sep_num_nan": ["1,000.0", "2.1", np.nan], |
| 46 | + "string_flt_nan": ["1.0", "45.67", np.nan], |
| 47 | + "string_str_nan": [ |
| 48 | + "I was only robbing the register,", |
| 49 | + "I hope you understand", |
| 50 | + "One of us had better call up the cops", |
| 51 | + "In the hot New Jersey night", |
| 52 | + np.nan, |
| 53 | + ], |
| 54 | + "float_series3": np.array([1.2, 2, 3, 4], dtype=np.float64), |
| 55 | + "np_uint32": np.array([1, 2, 3, 4], dtype=np.uint32), |
| 56 | + "string_np_unicode_series": np.array(["upper", "hall"], dtype=np.str_), |
| 57 | + "complex_series": [ |
| 58 | + complex(0, 0), |
| 59 | + complex(1, 2), |
| 60 | + complex(3, -1), |
| 61 | + ], |
| 62 | + "bool_series3": np.array([1, 0, 0, 1], dtype=np.bool_), |
| 63 | + "complex_series_nan": [complex(0, 0), complex(1, 2), complex(3, -1), None], |
| 64 | + "complex_series_nan_2": [ |
| 65 | + complex(0, 0), |
| 66 | + complex(1, 2), |
| 67 | + complex(3, -1), |
| 68 | + np.nan, |
| 69 | + ], |
| 70 | + "complex_series_py_nan": [ |
| 71 | + complex(0, 0), |
| 72 | + complex(1, 2), |
| 73 | + complex(3, -1), |
| 74 | + np.nan, |
| 75 | + ], |
| 76 | + } |
| 77 | + return sequences |
21 | 78 |
|
22 | 79 | def get_series() -> Dict[str, pd.Series]: |
23 | 80 | """ |
24 | 81 | Taken from Vision to remove the `complex_series_nan` that causes an exception due to a bug |
25 | 82 | in pandas 2 and numpy with the value `np.nan * 0j` and `complex(np.nan, np.nan)`. |
26 | 83 | See: https://github.com/numpy/numpy/issues/12919 |
27 | 84 | """ |
28 | | - from visions.backends.numpy.sequences import get_sequences as get_numpy_sequences |
29 | 85 | from visions.backends.pandas.sequences import get_sequences as get_pandas_sequences |
30 | 86 | from visions.backends.python.sequences import get_sequences as get_builtin_sequences |
31 | 87 |
|
32 | 88 | sequences = get_builtin_sequences() |
33 | | - sequences.update(get_numpy_sequences()) |
| 89 | + sequences.update(get_sequences()) |
34 | 90 |
|
35 | 91 | del sequences["complex_series_nan"] |
36 | 92 |
|
|
0 commit comments