|
8 | 8 | "outputs": [], |
9 | 9 | "source": [ |
10 | 10 | "# Import required libraries\n", |
| 11 | + "import pyarrow as pa\n", |
| 12 | + "\n", |
11 | 13 | "import pyiceberg\n", |
12 | 14 | "from pyiceberg.catalog import load_catalog\n", |
13 | | - "from pyiceberg.table import Table\n", |
14 | | - "import pandas as pd\n", |
15 | | - "import pyarrow as pa\n", |
| 15 | + "\n", |
16 | 16 | "print(f\"PyIceberg version: {pyiceberg.__version__}\")" |
17 | 17 | ] |
18 | 18 | }, |
|
34 | 34 | "outputs": [], |
35 | 35 | "source": [ |
36 | 36 | "# Import required libraries\n", |
37 | | - "from pyiceberg.catalog import load_catalog\n", |
38 | | - "import pyarrow.parquet as pq\n", |
39 | | - "import pyarrow.compute as pc\n", |
| 37 | + "import os\n", |
40 | 38 | "import tempfile\n", |
41 | | - "import os" |
| 39 | + "\n", |
| 40 | + "import pyarrow.compute as pc" |
42 | 41 | ] |
43 | 42 | }, |
44 | 43 | { |
|
63 | 62 | "# Configure and load the catalog\n", |
64 | 63 | "catalog = load_catalog(\n", |
65 | 64 | " \"default\",\n", |
66 | | - " **{\n", |
67 | | - " 'type': 'sql',\n", |
68 | | - " \"uri\": f\"sqlite:///{warehouse_path}/pyiceberg_catalog.db\",\n", |
69 | | - " \"warehouse\": f\"file://{warehouse_path}\",\n", |
70 | | - " },\n", |
| 65 | + " type=\"sql\",\n", |
| 66 | + " uri=f\"sqlite:///{warehouse_path}/pyiceberg_catalog.db\",\n", |
| 67 | + " warehouse=f\"file://{warehouse_path}\",\n", |
71 | 68 | ")\n", |
72 | 69 | "\n", |
73 | 70 | "print(\"Catalog loaded successfully!\")\n", |
|
114 | 111 | "outputs": [], |
115 | 112 | "source": [ |
116 | 113 | "# Create sample data using PyArrow\n", |
117 | | - "import pyarrow as pa\n", |
118 | 114 | "\n", |
119 | 115 | "# Sample taxi-like data\n", |
120 | 116 | "data = {\n", |
121 | | - " 'vendor_id': [1, 2, 1, 2, 1],\n", |
122 | | - " 'trip_distance': [1.5, 2.3, 0.8, 5.2, 3.1],\n", |
123 | | - " 'fare_amount': [10.0, 15.5, 6.0, 22.0, 18.0],\n", |
124 | | - " 'tip_amount': [2.0, 3.0, 1.0, 4.5, 3.5],\n", |
125 | | - " 'passenger_count': [1, 2, 1, 3, 2]\n", |
| 117 | + " \"vendor_id\": [1, 2, 1, 2, 1],\n", |
| 118 | + " \"trip_distance\": [1.5, 2.3, 0.8, 5.2, 3.1],\n", |
| 119 | + " \"fare_amount\": [10.0, 15.5, 6.0, 22.0, 18.0],\n", |
| 120 | + " \"tip_amount\": [2.0, 3.0, 1.0, 4.5, 3.5],\n", |
| 121 | + " \"passenger_count\": [1, 2, 1, 3, 2],\n", |
126 | 122 | "}\n", |
127 | 123 | "\n", |
128 | 124 | "df = pa.table(data)\n", |
|
313 | 309 | "outputs": [], |
314 | 310 | "source": [ |
315 | 311 | "# List all files in the warehouse\n", |
316 | | - "import os\n", |
317 | | - "for root, dirs, files in os.walk(warehouse_path):\n", |
318 | | - " level = root.replace(warehouse_path, '').count(os.sep)\n", |
319 | | - " indent = ' ' * 2 * level\n", |
320 | | - " print(f'{indent}{os.path.basename(root)}/')\n", |
321 | | - " subindent = ' ' * 2 * (level + 1)\n", |
| 312 | + "for root, _dirs, files in os.walk(warehouse_path):\n", |
| 313 | + " level = root.replace(warehouse_path, \"\").count(os.sep)\n", |
| 314 | + " indent = \" \" * 2 * level\n", |
| 315 | + " print(f\"{indent}{os.path.basename(root)}/\")\n", |
| 316 | + " subindent = \" \" * 2 * (level + 1)\n", |
322 | 317 | " for file in files:\n", |
323 | | - " print(f'{subindent}{file}')" |
| 318 | + " print(f\"{subindent}{file}\")" |
324 | 319 | ] |
325 | 320 | }, |
326 | 321 | { |
|
0 commit comments