Skip to content

Commit 1314e0d

Browse files
Fabiana ClementeFabiana Clemente
authored andcommitted
docs: update and fix documentatio and package examples
1 parent 2d2ccbc commit 1314e0d

8 files changed

Lines changed: 153 additions & 46 deletions

File tree

docs/getting-started/installation.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,13 @@ If you are in a notebook (locally, LambdaLabs, Google Colab or Kaggle), you can
1212
```python linenums="1"
1313
import sys
1414
!{sys.executable} -m pip install -U ydata-profiling[notebook]
15-
!jupyter nbextension enable --py widgetsnbextension
15+
!pip install jupyter-contrib-nbextensions
1616
```
17+
Afterwards you can run the following command
1718

19+
```python linenums="1"
20+
!jupyter nbextension enable --py widgetsnbextension
21+
```
1822
You may have to restart the kernel or runtime for the package to work.
1923

2024
## Using conda

examples/integrations/ydata_fabric_pipelines/data_profiling.ipynb

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111
"start_time": "2022-10-24T06:29:02.519523",
1212
"status": "completed"
1313
},
14-
"pycharm": {
15-
"name": "#%% md\n"
16-
},
1714
"tags": []
1815
},
1916
"source": [
@@ -31,9 +28,6 @@
3128
"start_time": "2022-10-24T06:29:02.636283",
3229
"status": "completed"
3330
},
34-
"pycharm": {
35-
"name": "#%% md\n"
36-
},
3731
"tags": []
3832
},
3933
"source": [
@@ -57,9 +51,6 @@
5751
"start_time": "2022-10-24T06:29:02.737117",
5852
"status": "completed"
5953
},
60-
"pycharm": {
61-
"name": "#%% md\n"
62-
},
6354
"tags": []
6455
},
6556
"source": [
@@ -77,9 +68,6 @@
7768
"start_time": "2022-10-24T06:29:06.291913",
7869
"status": "completed"
7970
},
80-
"pycharm": {
81-
"name": "#%% md\n"
82-
},
8371
"tags": []
8472
},
8573
"source": [
@@ -98,9 +86,6 @@
9886
"start_time": "2022-10-24T06:29:06.401327",
9987
"status": "completed"
10088
},
101-
"pycharm": {
102-
"name": "#%%\n"
103-
},
10489
"tags": []
10590
},
10691
"outputs": [],
@@ -127,14 +112,11 @@
127112
"start_time": "2022-10-24T06:29:08.641388",
128113
"status": "completed"
129114
},
130-
"pycharm": {
131-
"name": "#%%\n"
132-
},
133115
"tags": []
134116
},
135117
"outputs": [],
136118
"source": [
137-
"dataset = DataSources.get(uid=\"973d95c7-e6bd-4535-a0ea-d3dd1e893b13\").read()"
119+
"dataset = DataSources.get(uid=\"insert-uid\").read()"
138120
]
139121
},
140122
{
@@ -149,9 +131,6 @@
149131
"start_time": "2022-10-24T06:29:14.867782",
150132
"status": "completed"
151133
},
152-
"pycharm": {
153-
"name": "#%%\n"
154-
},
155134
"tags": []
156135
},
157136
"outputs": [],
@@ -171,9 +150,6 @@
171150
"start_time": "2022-10-24T06:29:15.001031",
172151
"status": "completed"
173152
},
174-
"pycharm": {
175-
"name": "#%% md\n"
176-
},
177153
"tags": []
178154
},
179155
"source": [
@@ -192,9 +168,6 @@
192168
"start_time": "2022-10-24T06:29:15.107441",
193169
"status": "completed"
194170
},
195-
"pycharm": {
196-
"name": "#%%\n"
197-
},
198171
"tags": []
199172
},
200173
"outputs": [],
@@ -215,9 +188,6 @@
215188
"start_time": "2022-10-24T06:29:15.322696",
216189
"status": "completed"
217190
},
218-
"pycharm": {
219-
"name": "#%%\n"
220-
},
221191
"tags": []
222192
},
223193
"outputs": [],
@@ -244,9 +214,6 @@
244214
"start_time": "2022-10-24T06:29:51.135648",
245215
"status": "completed"
246216
},
247-
"pycharm": {
248-
"name": "#%%\n"
249-
},
250217
"tags": []
251218
},
252219
"outputs": [],

examples/meteorites/meteorites.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
"source": [
4141
"import sys\n",
4242
"\n",
43-
"!{sys.executable} -m pip install -U pandas-profiling[notebook]\n",
43+
"!{sys.executable} -m pip install -U ydata-profiling[notebook]\n",
44+
"!pip install jupyter-contrib-nbextensions\n",
4445
"!jupyter nbextension enable --py widgetsnbextension"
4546
]
4647
},

examples/meteorites/meteorites_cloud.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"metadata": {},
2222
"outputs": [],
2323
"source": [
24-
"!pip install -U pandas-profiling"
24+
"!pip install -U ydata-profiling"
2525
]
2626
},
2727
{

examples/titanic/titanic.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
"source": [
3333
"import sys\n",
3434
"\n",
35-
"!\"{sys.executable}\" -m pip install -U pandas-profiling[notebook]\n",
35+
"!\"{sys.executable}\" -m pip install -U ydata-profiling[notebook]\n",
36+
"!pip install jupyter-contrib-nbextensions\n",
3637
"!jupyter nbextension enable --py widgetsnbextension"
3738
]
3839
},

examples/titanic/titanic_cloud.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
16-
"!pip install -U pandas-profiling"
16+
"!pip install -U ydata-profiling"
1717
]
1818
},
1919
{

examples/usaairquality/usaairquality.ipynb

Lines changed: 121 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
},
2020
{
2121
"cell_type": "code",
22-
"execution_count": null,
22+
"execution_count": 1,
2323
"id": "b514dd38-2ebd-4c96-aed5-4e3695e20fa2",
2424
"metadata": {},
2525
"outputs": [],
@@ -47,6 +47,7 @@
4747
"import sys\n",
4848
"\n",
4949
"!{sys.executable} -m pip install -U pandas-profiling[notebook]\n",
50+
"!pip install jupyter-contrib-nbextensions\n",
5051
"!jupyter nbextension enable --py widgetsnbextension"
5152
]
5253
},
@@ -68,7 +69,7 @@
6869
},
6970
{
7071
"cell_type": "code",
71-
"execution_count": null,
72+
"execution_count": 1,
7273
"id": "b33a26ed-4e1e-4689-93ce-fa0f98f48e89",
7374
"metadata": {},
7475
"outputs": [],
@@ -89,7 +90,7 @@
8990
},
9091
{
9192
"cell_type": "code",
92-
"execution_count": null,
93+
"execution_count": 2,
9394
"id": "7dab0b47-537d-4402-af71-1bdfd0cf6cdd",
9495
"metadata": {},
9596
"outputs": [],
@@ -139,10 +140,41 @@
139140
},
140141
{
141142
"cell_type": "code",
142-
"execution_count": null,
143+
"execution_count": 3,
143144
"id": "15e613a6",
144145
"metadata": {},
145-
"outputs": [],
146+
"outputs": [
147+
{
148+
"name": "stderr",
149+
"output_type": "stream",
150+
"text": [
151+
"/Users/fabianaclemente/miniconda3/envs/yprof/lib/python3.11/site-packages/ydata_profiling/visualisation/plot.py:835: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
152+
" df = df.groupby([entity_column, \"__bins\"])[sortbykey].count()\n",
153+
"/Users/fabianaclemente/miniconda3/envs/yprof/lib/python3.11/site-packages/ydata_profiling/visualisation/plot.py:836: FutureWarning: The default value of observed=False is deprecated and will change to observed=True in a future version of pandas. Specify observed=False to silence this warning and retain the current behavior\n",
154+
" df = df.reset_index().pivot_table(entity_column, \"__bins\", sortbykey).T\n"
155+
]
156+
},
157+
{
158+
"data": {
159+
"text/plain": [
160+
"<Axes: xlabel='Time'>"
161+
]
162+
},
163+
"execution_count": 3,
164+
"metadata": {},
165+
"output_type": "execute_result"
166+
},
167+
{
168+
"data": {
169+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA8gAAABvCAYAAAAwo+nJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAMW0lEQVR4nO3dXYhc5cEH8P9kqxPNzowfIashK5YmjViTGO0HacGmNkaDhHrTllZsuheCssaG3MiCRCzYtTelSkRFEPtCxVBKWii4QSQqSK35wNYPiG9CLxZidmuLM5vty7TsznvVLYtG92x2ZjfZ3w8O7Dl5zj7/uZnln3POc0qtVqsVAAAAWOSWzHcAAAAAWAgUZAAAAIiCDAAAAEkUZAAAAEiiIAMAAEASBRkAAACSKMgAAACQREEGAACAJMnnOj3h5ORkTp48mUqlklKp1OnpAQAAWGRarVbGxsaycuXKLFly5uvEHS/IJ0+eTG9vb6enBQAAYJEbHh7OqlWrzvjvHS/IlUolSbLuvv9JV/niGZ/36p5b2hUJgHPIUy/9b+Fz7rllTRuScK45MTpW+JwvrKi0IQmcX4p+L/tOZj40Go309vZO9dEz6XhB/s9t1V3li9NVXjbj86rVarsiAXAOWbqsu/A5/oaQJJX/K/5oV7WqIMNnKfq97DuZ+fRZj/nOapGuJ554IldffXWWLl2ar33ta3nzzTdnFQ4AAAAWisIFed++fdm9e3ceeuihHD16NBs2bMitt96a0dHRduQDAACAjihckH/xi1/k7rvvTl9fX6699to89dRTufjii/Pss8+2Ix8AAAB0RKGC/K9//StHjhzJli1b/vsLlizJli1b8sc//vETz2k2m2k0GtM2AAAAWGgKFeQPP/wwExMT6enpmXa8p6cnp06d+sRzBgcHU6vVpjaveAIAAGAhmtUiXUUMDAykXq9PbcPDw+2eEgAAAAor9Jqn5cuXp6urKyMjI9OOj4yM5IorrvjEc8rlcsrl8uwTAgAAQAcUuoJ84YUX5sYbb8zLL788dWxycjIvv/xyNm3aNOfhAAAAoFMKXUFOkt27d2fHjh358pe/nK9+9av55S9/mfHx8fT19bUjHwAAAHRE4YL8/e9/P3/729+yZ8+enDp1Ktdff32GhoY+tnAXAAAAnEtKrVar1ckJG41GarVa6vV6qtVqJ6cGAABgEZppD237KtYAAABwLih8i/Vc+eHe13PB0mUzHr9/901tTAMAAHPv8aFjhcbff9vaNiXhXHN8ZKzQ+NU9lTYlWVxcQQYAAIDMoiC/9tpr2b59e1auXJlSqZTf/e53bYgFAAAAnVW4II+Pj2fDhg154okn2pEHAAAA5kXhZ5C3bduWbdu2tSMLAAAAzJu2L9LVbDbTbDan9huNRrunBAAAgMLavkjX4OBgarXa1Nbb29vuKQEAAKCwthfkgYGB1Ov1qW14eLjdUwIAAEBhbb/Fulwup1wut3saAAAAOCvegwwAAACZxRXk06dP5/jx41P7f/3rX/PWW2/lsssuy1VXXTWn4QAAAKBTChfkw4cP51vf+tbU/u7du5MkO3bsyHPPPTdnwQAAAKCTChfkzZs3p9VqtSMLAAAAzJtSq8Ntt9FopFarpV6vp1qtdnJqAAAAFqGZ9lCLdAEAAEA68JqnM/nmT19KV3nZjMcffuS2NqYBoNMeHzpWaPz9t61tUxJgsfC9s7AcHxkrfM7qnkobksB/FbqCPDg4mK985SupVCpZsWJF7rjjjhw7VuyLBgAAABaiQgX51VdfTX9/f95444289NJL+fe//52tW7dmfHy8XfkAAACgIwrdYj00NDRt/7nnnsuKFSty5MiR3HTTTXMaDAAAADrprBbpqtfrSZLLLrtsTsIAAADAfJn1Il2Tk5PZtWtXvvGNb+S6664747hms5lmszm132g0ZjslAAAAtM2sryD39/fnnXfeyQsvvPCp4wYHB1Or1aa23t7e2U4JAAAAbTOrgnzfffflD3/4Qw4ePJhVq1Z96tiBgYHU6/WpbXh4eFZBAQAAoJ0K3WLdarWyc+fO7N+/P6+88ko+//nPf+Y55XI55XJ51gEBAACgEwoV5P7+/jz//PP5/e9/n0qlklOnTiVJarVaLrroorYEBAAAgE4odIv1k08+mXq9ns2bN+fKK6+c2vbt29eufAAAANARhW+xBgAAgPNRqdXh1ttoNFKr1VKv11OtVjs5NQAAAIvQTHvorF/zBAAAAOeTQrdYz6Vv/vSldJWXzXj84Udua2MaAFjcjo+MFRq/uqdy1nM+PnSs0Pj7b1t71nMCzIbvq8XDFWQAAADILFaxXr9+farVaqrVajZt2pQXX3yxXdkAAACgYwoV5FWrVuXRRx/NkSNHcvjw4dx88835zne+k3fffbdd+QAAAKAjCj2DvH379mn7jzzySJ588sm88cYb+dKXvjSnwQAAAKCTZr1I18TERH7zm99kfHw8mzZtOuO4ZrOZZrM5td9oNGY7JQAAALRN4UW63n777XR3d6dcLueee+7J/v37c+21155x/ODgYGq12tTW29t7VoEBAACgHQoX5LVr1+att97Kn/70p9x7773ZsWNH3nvvvTOOHxgYSL1en9qGh4fPKjAAAAC0Q+FbrC+88MKsXr06SXLjjTfm0KFDeeyxx/L0009/4vhyuZxyuXx2KQEAAKDNzvo9yJOTk9OeMQYAAIBzUaEryAMDA9m2bVuuuuqqjI2N5fnnn88rr7ySAwcOtCsfAAAAdEShgjw6Opof/ehH+eCDD1Kr1bJ+/focOHAgt9xyS7vyAQAAQEeUWq1Wq5MTNhqN1Gq11Ov1VKvVTk4NAADAIjTTHnrWzyADAADA+aDwKtZz5Yd7X88FS5fNePz+3Te1MQ0AANBJjw8dK3zO/betbUOShen4yFih8at7Km1Ksri4ggwAAAA5y4L86KOPplQqZdeuXXMUBwAAAObHrAvyoUOH8vTTT2f9+vVzmQcAAADmxawK8unTp3PnnXfmmWeeyaWXXjrXmQAAAKDjZlWQ+/v7c/vtt2fLli2fObbZbKbRaEzbAAAAYKEpvIr1Cy+8kKNHj+bQoUMzGj84OJiHH364cDAAAADopEJXkIeHh/OTn/wkv/71r7N06dIZnTMwMJB6vT61DQ8PzyooAAAAtFOhK8hHjhzJ6OhobrjhhqljExMTee2117J37940m810dXVNO6dcLqdcLs9NWgAAAGiTQgX529/+dt5+++1px/r6+nLNNdfkgQce+Fg5BgAAgHNFoYJcqVRy3XXXTTu2bNmyXH755R87DgAAAOeSWb8HGQAAAM4npVar1erkhPV6PZdcckmGh4dTrVY7OTUAAACLUKPRSG9vbz766KPUarUzjiv8mqezNTY2liTp7e3t9NQAAAAsYmNjY59akDt+BXlycjInT55MpVJJqVTq5NQAAAAsQq1WK2NjY1m5cmWWLDnzk8YdL8gAAACwEFmkCwAAAKIgAwAAQBIFGQAAAJIoyACwoPz4xz/OHXfcMd8xAGBR6vhrngBgsfqstzc89NBDeeyxx2L9TACYHwoyAHTIBx98MPXzvn37smfPnhw7dmzqWHd3d7q7u+cjGgAQt1gDQMdcccUVU1utVkupVJp2rLu7+2O3WG/evDk7d+7Mrl27cumll6anpyfPPPNMxsfH09fXl0qlktWrV+fFF1+cNtc777yTbdu2pbu7Oz09Pbnrrrvy4YcfdvgTA8C5RUEGgAXuV7/6VZYvX54333wzO3fuzL333pvvfve7+frXv56jR49m69atueuuu/LPf/4zSfLRRx/l5ptvzsaNG3P48OEMDQ1lZGQk3/ve9+b5kwDAwqYgA8ACt2HDhjz44INZs2ZNBgYGsnTp0ixfvjx333131qxZkz179uTvf/97/vKXvyRJ9u7dm40bN+ZnP/tZrrnmmmzcuDHPPvtsDh48mPfff3+ePw0ALFyeQQaABW79+vVTP3d1deXyyy/PunXrpo719PQkSUZHR5Mkf/7zn3Pw4MFPfJ75xIkT+eIXv9jmxABwblKQAWCBu+CCC6btl0qlacf+szr25ORkkuT06dPZvn17fv7zn3/sd1155ZVtTAoA5zYFGQDOMzfccEN++9vf5uqrr87nPudPPQDMlGeQAeA809/fn3/84x/5wQ9+kEOHDuXEiRM5cOBA+vr6MjExMd/xAGDBUpAB4DyzcuXKvP7665mYmMjWrVuzbt267Nq1K5dcckmWLPGnHwDOpNRqtVrzHQIAAADmm/9GBgAAgCjIAAAAkERBBgAAgCQKMgAAACRRkAEAACCJggwAAABJFGQAAABIoiADAABAEgUZAAAAkijIAAAAkERBBgAAgCQKMgAAACRJ/h8Hkxjnl6dfBgAAAABJRU5ErkJggg==",
170+
"text/plain": [
171+
"<Figure size 1200x500 with 1 Axes>"
172+
]
173+
},
174+
"metadata": {},
175+
"output_type": "display_data"
176+
}
177+
],
146178
"source": [
147179
"from ydata_profiling.visualisation.plot import timeseries_heatmap\n",
148180
"\n",
@@ -151,7 +183,7 @@
151183
},
152184
{
153185
"cell_type": "code",
154-
"execution_count": null,
186+
"execution_count": 4,
155187
"id": "b29a7e78-d52d-458d-ac9a-e509ffd373d1",
156188
"metadata": {},
157189
"outputs": [],
@@ -168,6 +200,88 @@
168200
"\n",
169201
" profile.to_file(f\"Ts_Profile_{group[0]}.html\")"
170202
]
203+
},
204+
{
205+
"cell_type": "code",
206+
"execution_count": 5,
207+
"id": "7327cb70-3db8-441e-837e-4ac2a5a57eaa",
208+
"metadata": {},
209+
"outputs": [
210+
{
211+
"data": {
212+
"application/vnd.jupyter.widget-view+json": {
213+
"model_id": "ac531d9e9574493083522ec56b68c3cc",
214+
"version_major": 2,
215+
"version_minor": 0
216+
},
217+
"text/plain": [
218+
"Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]"
219+
]
220+
},
221+
"metadata": {},
222+
"output_type": "display_data"
223+
},
224+
{
225+
"data": {
226+
"application/vnd.jupyter.widget-view+json": {
227+
"model_id": "6ccba6e512c84b01be36afb26e250000",
228+
"version_major": 2,
229+
"version_minor": 0
230+
},
231+
"text/plain": [
232+
"Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]"
233+
]
234+
},
235+
"metadata": {},
236+
"output_type": "display_data"
237+
},
238+
{
239+
"data": {
240+
"application/vnd.jupyter.widget-view+json": {
241+
"model_id": "",
242+
"version_major": 2,
243+
"version_minor": 0
244+
},
245+
"text/plain": [
246+
"Render widgets: 0%| | 0/1 [00:00<?, ?it/s]"
247+
]
248+
},
249+
"metadata": {},
250+
"output_type": "display_data"
251+
},
252+
{
253+
"data": {
254+
"application/vnd.jupyter.widget-view+json": {
255+
"model_id": "427e682fb36b4017a8f1db4f714bb5e3",
256+
"version_major": 2,
257+
"version_minor": 0
258+
},
259+
"text/plain": [
260+
"VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…"
261+
]
262+
},
263+
"metadata": {},
264+
"output_type": "display_data"
265+
}
266+
],
267+
"source": [
268+
"profile = ProfileReport(\n",
269+
" group[1],\n",
270+
" tsmode=True,\n",
271+
" sortby=\"Date Local\",\n",
272+
" # title=f\"Air Quality profiling - Site Num: {group[0]}\"\n",
273+
")\n",
274+
"\n",
275+
"profile.to_file()"
276+
]
277+
},
278+
{
279+
"cell_type": "code",
280+
"execution_count": null,
281+
"id": "3c53d119-d014-4104-a5b2-76fd7a45181e",
282+
"metadata": {},
283+
"outputs": [],
284+
"source": []
171285
}
172286
],
173287
"metadata": {
@@ -186,7 +300,7 @@
186300
"name": "python",
187301
"nbconvert_exporter": "python",
188302
"pygments_lexer": "ipython3",
189-
"version": "3.8.12"
303+
"version": "3.11.9"
190304
},
191305
"varInspector": {
192306
"cols": {

examples/zero_division.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import pandas as pd
2+
from ydata_profiling import ProfileReport
3+
4+
5+
if __name__ == '__main__':
6+
import numpy as np
7+
8+
df = pd.read_csv("c6cb4c3d-e735-4b55-bd5c-b7c78ab152aa.csv", sep=',', encoding = "latin")
9+
#df['empty_col'] = [None]*len(df)
10+
11+
df.sample(10000)
12+
13+
df.to_csv('Validation.csv')
14+
15+
16+
#df.to_csv('teste.csv')
17+
18+
report = ProfileReport(df, title='Testing the null values')
19+
report.to_file('report.html')
20+

0 commit comments

Comments
 (0)