Skip to content

Commit cec6bae

Browse files
committed
fix: handle null_values in the CSV parsing step
1 parent 7037224 commit cec6bae

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

src/polars_access_mdbtools/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
import subprocess
88
import warnings
9+
from collections.abc import Sequence
910
from pathlib import Path
1011

1112
import polars as pl
@@ -192,13 +193,15 @@ def read_table(
192193
table_name: str,
193194
*,
194195
implicit_string: bool = True,
196+
null_values: Sequence[str] = (),
195197
) -> pl.DataFrame:
196198
"""Read a MS Access database as a Polars DataFrame.
197199
198200
:param db_path: The MS Access database file.
199201
:param table_name: The name of the table to process.
200202
:param implicit_string: If True, mark strings and unknown datatypes as `pl.String`.
201203
Otherwise, raise an error on unhandled SQL data types.
204+
:param null_values: Additional string values to treat as nulls.
202205
:return: a `pl.DataFrame`
203206
"""
204207
mdb_schema = _read_table_mdb_schema(db_path, table_name)
@@ -262,6 +265,11 @@ def read_table(
262265
df = pl.read_csv(
263266
csv_io,
264267
schema=pl_schema_read,
268+
null_values=[
269+
"1900-01-00T00:00:00", # Insane datetime value.
270+
"1900-01-00", # Insane date value.
271+
*null_values,
272+
],
265273
)
266274

267275
# Convert binary columns to hex.

0 commit comments

Comments
 (0)