Skip to content

Commit d7949e2

Browse files
committed
add scan tests
1 parent 1a5e32a commit d7949e2

1 file changed

Lines changed: 67 additions & 0 deletions

File tree

tests/io/test_pyarrow.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,12 @@
5050
BoundNotStartsWith,
5151
BoundReference,
5252
BoundStartsWith,
53+
EqualTo,
5354
GreaterThan,
55+
IsNull,
5456
Not,
57+
NotEqualTo,
58+
NotNull,
5559
Or,
5660
)
5761
from pyiceberg.expressions.literals import literal
@@ -2317,3 +2321,66 @@ def test_pyarrow_io_multi_fs() -> None:
23172321

23182322
# Same PyArrowFileIO instance resolves local file input to LocalFileSystem
23192323
assert isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, LocalFileSystem)
2324+
2325+
2326+
def test_scan_nulls(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) -> None:
2327+
import pyarrow.compute as pc
2328+
2329+
catalog.create_namespace("default")
2330+
table = catalog.create_table(
2331+
"default.test_scan_nulls",
2332+
schema=arrow_table_with_null.schema,
2333+
)
2334+
table.append(arrow_table_with_null)
2335+
2336+
# "string": ["a", None, "z"]
2337+
assert len(table.scan(row_filter="string is null").to_arrow()) == 1
2338+
assert len(table.scan(row_filter=IsNull("string")).to_arrow()) == 1
2339+
assert len(table.scan().to_arrow().filter(pc.field("string").is_null())) == 1
2340+
2341+
assert len(table.scan(row_filter="string is not null").to_arrow()) == 2
2342+
assert len(table.scan(row_filter=NotNull("string")).to_arrow()) == 2
2343+
assert len(table.scan().to_arrow().filter(pc.field("string").is_valid())) == 2
2344+
2345+
assert len(table.scan(row_filter="string == 'a'").to_arrow()) == 1
2346+
assert len(table.scan(row_filter=EqualTo(term="string", literal=("a"))).to_arrow()) == 1
2347+
assert len(table.scan().to_arrow().filter(pc.field("string") == "a")) == 1
2348+
2349+
# this should be 2
2350+
assert len(table.scan(row_filter="string != 'a'").to_arrow()) == 1
2351+
assert len(table.scan(row_filter=NotEqualTo(term="string", literal=("a"))).to_arrow()) == 1
2352+
assert len(table.scan(row_filter=Not(EqualTo(term="string", literal=("a")))).to_arrow()) == 1
2353+
assert len(table.scan().to_arrow().filter(pc.field("string") != "a")) == 1
2354+
2355+
2356+
def test_scan_kleene(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) -> None:
2357+
catalog.create_namespace("default")
2358+
table = catalog.create_table(
2359+
"default.test_scan_nulls",
2360+
schema=arrow_table_with_null.schema,
2361+
)
2362+
table.append(arrow_table_with_null)
2363+
2364+
# "string": ["a", None, "z"]
2365+
assert len(table.scan(row_filter="string is null OR string = 'a'").to_arrow()) == 2 # {null, a}
2366+
assert len(table.scan(row_filter="string is null AND string = 'a'").to_arrow()) == 0 # {}
2367+
assert len(table.scan(row_filter="string is not null OR string = 'a'").to_arrow()) == 2 # {a, z}
2368+
assert len(table.scan(row_filter="string is not null AND string = 'a'").to_arrow()) == 1 # {a}
2369+
2370+
2371+
def test_scan_complements(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) -> None:
2372+
from pyiceberg.expressions.visitors import bind
2373+
from pyiceberg.io.pyarrow import _expression_to_complementary_pyarrow
2374+
2375+
catalog.create_namespace("default")
2376+
table = catalog.create_table(
2377+
"default.test_scan_complements",
2378+
schema=arrow_table_with_null.schema,
2379+
)
2380+
table.append(arrow_table_with_null)
2381+
2382+
string_equal = EqualTo(term="string", literal=("a"))
2383+
assert len(table.scan(row_filter=string_equal).to_arrow()) == 1
2384+
bound_string_equal = bind(table.schema(), string_equal, case_sensitive=False)
2385+
filter_expression = _expression_to_complementary_pyarrow(bound_string_equal)
2386+
assert len(table.scan().to_arrow().filter(filter_expression)) == 2 # complements handles null correctly

0 commit comments

Comments
 (0)