Skip to content

Commit 0877ac5

Browse files
test: add test case for non-ASCII characters (#7)
* test: add test case for non-ASCII characters * fix: non-ASCII table names fail on Windows
1 parent 833341b commit 0877ac5

6 files changed

Lines changed: 73 additions & 8 deletions

File tree

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,15 @@ jobs:
2828
- os: macos-latest
2929
python-version: "3.12"
3030
polars-version: "1.31"
31+
- os: macos-latest # Another mac variant.
32+
python-version: "3.11"
33+
polars-version: "1.34"
3134
- os: windows-latest
3235
python-version: "3.12"
3336
polars-version: "1.31"
37+
- os: windows-latest # Another Windows variant.
38+
python-version: "3.13"
39+
polars-version: "1.34"
3440

3541
# Python variations
3642
- os: ubuntu-latest

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ A library for reading tables from an Access database into Polars dataframes, usi
55

66
A tiny, `subprocess`-based tool for reading a
77
[MS Access](https://products.office.com/en-us/access)
8-
database (`.rdb` or `.accdb`) as a [Python Polars Dataframe](https://docs.pola.rs).
8+
database (`.mdb`, `.accdb`, `.rdb`) as a [Python Polars Dataframe](https://docs.pola.rs).
99

1010
## Installation
1111

src/polars_access_mdbtools/__init__.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def list_table_names(db_path: str | Path) -> list[str]:
4141
subprocess.check_output( # noqa: S603
4242
["mdb-tables", "--single-column", _path_to_cmd_str(db_path)], # noqa: S607
4343
)
44-
.decode()
44+
.decode(locale.getpreferredencoding())
4545
.replace("\r\n", "\n")
4646
.strip()
4747
)
@@ -106,12 +106,10 @@ def _extract_data_type_definitions(defs_str: str) -> dict[str, str]:
106106
def _read_table_mdb_schema(
107107
db_path: str | Path,
108108
table_name: str,
109-
encoding: str = "utf-8",
110109
) -> dict[str, str]:
111110
"""Read the schema of a given database into a dictionary of the mdb-schema output.
112111
113112
:param db_path: The MS Access database file.
114-
:param encoding: The schema encoding.
115113
:return: a dictionary of `{column_name: access_data_type}`
116114
"""
117115
cmd = [
@@ -135,7 +133,7 @@ def _read_table_mdb_schema(
135133
raise ValueError(msg) from e
136134
raise
137135

138-
cmd_output = cmd_output.decode(encoding)
136+
cmd_output = cmd_output.decode(locale.getpreferredencoding())
139137
lines = cmd_output.splitlines()
140138
schema_ddl = "\n".join(line for line in lines if line and not line.startswith("-"))
141139

@@ -203,8 +201,7 @@ def read_table(
203201
Otherwise, raise an error on unhandled SQL data types.
204202
:return: a `pl.DataFrame`
205203
"""
206-
schema_encoding = "utf-8"
207-
mdb_schema = _read_table_mdb_schema(db_path, table_name, schema_encoding)
204+
mdb_schema = _read_table_mdb_schema(db_path, table_name)
208205
pl_schema_target = _convert_mdb_schema_to_polars_schema(
209206
mdb_schema,
210207
implicit_string=implicit_string,

tests/conftest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,26 @@ def sample_db_1() -> Iterator[Path]:
4343

4444
yield db_path # Provide file path to test.
4545
# Tempdir automatically cleaned up on exit.
46+
47+
48+
@pytest.fixture
49+
def sample_db_2() -> Iterator[Path]:
50+
"""Give another sample access database file for tests.
51+
52+
Download the Access Sample .mdb file to a temporary directory,
53+
yield its path for tests, and delete it afterward.
54+
"""
55+
url = "https://github.com/el3um4s/mdbtools/raw/refs/heads/main/src/__tests__/test%202.mdb"
56+
with tempfile.TemporaryDirectory() as temp_dir_str:
57+
db_path = Path(temp_dir_str) / "file_example_MDB_250kB.mdb"
58+
response = requests.get(url, stream=True, timeout=10)
59+
response.raise_for_status()
60+
61+
db_path.write_bytes(response.content)
62+
assert (
63+
_sha256_checksum(db_path)
64+
== "560bfd44ad5a6efbab4c86622c92a7071eda9d73c3b453e4bba227d82d725fec"
65+
), "Downloaded file checksum does not match expected value."
66+
67+
yield db_path # Provide file path to test.
68+
# Tempdir automatically cleaned up on exit.

tests/test_list_table_names.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,16 @@ def test_list_table_names_db_1(sample_db_1: Path) -> None:
2121
"tblFileList",
2222
"USysRibbons",
2323
]
24+
25+
26+
def test_list_table_names_db_2(sample_db_2: Path) -> None:
27+
table_names = list_table_names(sample_db_2)
28+
assert isinstance(table_names, list)
29+
assert table_names == [
30+
"Colors",
31+
"Colors Table Two",
32+
"Colors-Table Others",
33+
"Dictionary",
34+
"Users",
35+
"Colors 1°à",
36+
]

tests/test_read_table.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,30 @@ def test_reading_specific_table_1a(sample_db_1: Path) -> None:
6262
},
6363
)
6464
assert_frame_equal(df, df_expected)
65-
assert df.schema == df_expected.schema
65+
66+
67+
def test_reading_specific_table_2a(sample_db_2: Path) -> None:
68+
"""Test reading a specific table and checking its schema.
69+
70+
Table contains string columns with non-ASCII characters.
71+
"""
72+
df = read_table(sample_db_2, table_name="Colors-Table Others")
73+
74+
df_expected = pl.DataFrame(
75+
[
76+
{"Colors": "Red", "Value": 10, "Second Value": 5, "Others-A": "à"},
77+
{"Colors": "Green", "Value": 5, "Second Value": 3, "Others-A": "1a"},
78+
{"Colors": "Blue", "Value": 16, "Second Value": 4, "Others-A": "ò"},
79+
{"Colors": "Black", "Value": 1, "Second Value": 3, "Others-A": "2°"},
80+
{"Colors": "Yellow", "Value": 12, "Second Value": 3, "Others-A": "Y"},
81+
{"Colors": "White", "Value": 10, "Second Value": 1, "Others-A": "W"},
82+
{"Colors": "Others", "Value": 0, "Second Value": 0, "Others-A": "A"},
83+
],
84+
schema={
85+
"Colors": pl.String,
86+
"Value": pl.Int64,
87+
"Second Value": pl.Int64,
88+
"Others-A": pl.String,
89+
},
90+
)
91+
assert_frame_equal(df, df_expected)

0 commit comments

Comments
 (0)