From 219b8197b5b4d85adf944a99d9a5385046516a71 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 29 Apr 2026 14:35:23 -0700 Subject: [PATCH] Add timestamp ns literal --- pyiceberg/conversions.py | 43 ++++++++++++++++++ pyiceberg/expressions/literals.py | 70 ++++++++++++++++++++++++++++++ pyiceberg/utils/datetime.py | 40 +++++++++++++++++ tests/expressions/test_literals.py | 45 +++++++++++++++++++ tests/test_conversions.py | 4 ++ 5 files changed, 202 insertions(+) diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index 0311e76d89..c8cc6db694 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -76,11 +76,15 @@ time_str_to_micros, time_to_micros, timestamp_to_micros, + timestamp_to_nanos, timestamptz_to_micros, + timestamptz_to_nanos, to_human_day, to_human_time, to_human_timestamp, + to_human_timestamp_ns, to_human_timestamptz, + to_human_timestamptz_ns, ) from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal @@ -439,6 +443,15 @@ def _(_: PrimitiveType, val: int | datetime) -> str: return to_human_timestamp(val) +@to_json.register(TimestampNanoType) +def _(_: TimestampNanoType, val: int | datetime) -> str: + """Python datetime (without timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp.""" + if isinstance(val, datetime): + val = datetime_to_nanos(val) + + return to_human_timestamp_ns(val) + + @to_json.register(TimestamptzType) def _(_: TimestamptzType, val: int | datetime) -> str: """Python datetime (with timezone) or microseconds since epoch serializes into an ISO8601 timestamp.""" @@ -447,6 +460,14 @@ def _(_: TimestamptzType, val: int | datetime) -> str: return to_human_timestamptz(val) +@to_json.register(TimestamptzNanoType) +def _(_: TimestamptzNanoType, val: int | datetime) -> str: + """Python datetime (with timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp.""" + if isinstance(val, datetime): + val = datetime_to_nanos(val) + return to_human_timestamptz_ns(val) + + @to_json.register(FloatType) @to_json.register(DoubleType) def _(_: FloatType | DoubleType, val: float) -> float: @@ -586,6 +607,17 @@ def _(_: PrimitiveType, val: str | int | datetime) -> datetime: return val +@from_json.register(TimestampNanoType) +def _(_: TimestampNanoType, val: str | int | datetime) -> int | datetime: + """JSON ISO8601 string into nanoseconds since epoch.""" + if isinstance(val, str): + return timestamp_to_nanos(val) + elif isinstance(val, int): + return val + else: + return val + + @from_json.register(TimestamptzType) def _(_: TimestamptzType, val: str | int | datetime) -> datetime: """JSON ISO8601 string into Python datetime.""" @@ -597,6 +629,17 @@ def _(_: TimestamptzType, val: str | int | datetime) -> datetime: return val +@from_json.register(TimestamptzNanoType) +def _(_: TimestamptzNanoType, val: str | int | datetime) -> int | datetime: + """JSON ISO8601 string into nanoseconds since epoch.""" + if isinstance(val, str): + return timestamptz_to_nanos(val) + elif isinstance(val, int): + return val + else: + return val + + @from_json.register(FloatType) @from_json.register(DoubleType) def _(_: FloatType | DoubleType, val: float) -> float: diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index 5bf70990b9..5c6c25f87d 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -45,7 +45,9 @@ IntegerType, LongType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UUIDType, @@ -57,10 +59,15 @@ days_to_date, micros_to_days, micros_to_timestamp, + nanos_to_days, + nanos_to_micros, time_str_to_micros, time_to_micros, timestamp_to_micros, + timestamp_to_nanos, timestamptz_to_micros, + timestamptz_to_nanos, + to_human_timestamp_ns, ) from pyiceberg.utils.decimal import decimal_to_unscaled, unscaled_to_decimal from pyiceberg.utils.singleton import Singleton @@ -332,6 +339,14 @@ def _(self, _: TimestampType) -> Literal[int]: def _(self, _: TimestamptzType) -> Literal[int]: return TimestampLiteral(self.value) + @to.register(TimestampNanoType) + def _(self, _: TimestampNanoType) -> Literal[int]: + return TimestampNanoLiteral(self.value) + + @to.register(TimestamptzNanoType) + def _(self, _: TimestamptzNanoType) -> Literal[int]: + return TimestampNanoLiteral(self.value) + @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: unscaled = Decimal(self.value) @@ -476,11 +491,58 @@ def _(self, _: TimestampType) -> Literal[int]: def _(self, _: TimestamptzType) -> Literal[int]: return self + @to.register(TimestampNanoType) + def _(self, _: TimestampNanoType) -> Literal[int]: + return TimestampNanoLiteral(self.value * 1000) + + @to.register(TimestamptzNanoType) + def _(self, _: TimestamptzNanoType) -> Literal[int]: + return TimestampNanoLiteral(self.value * 1000) + @to.register(DateType) def _(self, _: DateType) -> Literal[int]: return DateLiteral(micros_to_days(self.value)) +class TimestampNanoLiteral(Literal[int]): + def __init__(self, value: int) -> None: + super().__init__(value, int) + + @model_serializer + def ser_model(self) -> str: + return to_human_timestamp_ns(self.root) + + def increment(self) -> Literal[int]: + return TimestampNanoLiteral(self.value + 1) + + def decrement(self) -> Literal[int]: + return TimestampNanoLiteral(self.value - 1) + + @singledispatchmethod + def to(self, type_var: IcebergType) -> Literal: # type: ignore + raise TypeError(f"Cannot convert TimestampNanoLiteral into {type_var}") + + @to.register(TimestampNanoType) + def _(self, _: TimestampNanoType) -> Literal[int]: + return self + + @to.register(TimestamptzNanoType) + def _(self, _: TimestamptzNanoType) -> Literal[int]: + return self + + @to.register(TimestampType) + def _(self, _: TimestampType) -> Literal[int]: + return TimestampLiteral(nanos_to_micros(self.value)) + + @to.register(TimestamptzType) + def _(self, _: TimestamptzType) -> Literal[int]: + return TimestampLiteral(nanos_to_micros(self.value)) + + @to.register(DateType) + def _(self, _: DateType) -> Literal[int]: + return DateLiteral(nanos_to_days(self.value)) + + class DecimalLiteral(Literal[Decimal]): def __init__(self, value: Decimal) -> None: super().__init__(value, Decimal) @@ -600,6 +662,14 @@ def _(self, _: TimestampType) -> Literal[int]: def _(self, _: TimestamptzType) -> Literal[int]: return TimestampLiteral(timestamptz_to_micros(self.value)) + @to.register(TimestampNanoType) + def _(self, _: TimestampNanoType) -> Literal[int]: + return TimestampNanoLiteral(timestamp_to_nanos(self.value)) + + @to.register(TimestamptzNanoType) + def _(self, _: TimestamptzNanoType) -> Literal[int]: + return TimestampNanoLiteral(timestamptz_to_nanos(self.value)) + @to.register(UUIDType) def _(self, _: UUIDType) -> Literal[bytes]: return UUIDLiteral(UUID(self.value).bytes) diff --git a/pyiceberg/utils/datetime.py b/pyiceberg/utils/datetime.py index b5cde34f26..60d7c436aa 100644 --- a/pyiceberg/utils/datetime.py +++ b/pyiceberg/utils/datetime.py @@ -218,6 +218,46 @@ def to_human_timestamp(timestamp_micros: int) -> str: return (EPOCH_TIMESTAMP + timedelta(microseconds=timestamp_micros)).isoformat() +def to_human_timestamp_ns(timestamp_nanos: int) -> str: + """Convert a TimestampNanoType value to human string.""" + micros = timestamp_nanos // 1000 + nanos = timestamp_nanos % 1000 + ts_str = (EPOCH_TIMESTAMP + timedelta(microseconds=micros)).isoformat() + if "." not in ts_str: + ts_str += ".000000" + return f"{ts_str}{nanos:03d}" + + +def to_human_timestamptz_ns(timestamp_nanos: int) -> str: + """Convert a TimestamptzNanoType value to human string.""" + micros = timestamp_nanos // 1000 + nanos = timestamp_nanos % 1000 + dt = EPOCH_TIMESTAMPTZ + timedelta(microseconds=micros) + # dt.isoformat() will look like 2023-01-01T00:00:00+00:00 or 2023-01-01T00:00:00.000000+00:00 + ts_str = dt.isoformat() + if "." not in ts_str: + # Insert .000000 before the timezone offset + if "+" in ts_str: + parts = ts_str.split("+") + ts_str = f"{parts[0]}.000000{nanos:03d}+{parts[1]}" + else: + parts = ts_str.split("-") + # Be careful with negative years if any, but Iceberg epoch is 1970 + # Expected format: YYYY-MM-DDTHH:MM:SS-HH:MM + # The last '-' is the TZ separator. + last_dash_idx = ts_str.rfind("-") + ts_str = f"{ts_str[:last_dash_idx]}.000000{nanos:03d}{ts_str[last_dash_idx:]}" + else: + # Append nanos before the timezone offset + if "+" in ts_str: + parts = ts_str.split("+") + ts_str = f"{parts[0]}{nanos:03d}+{parts[1]}" + else: + last_dash_idx = ts_str.rfind("-") + ts_str = f"{ts_str[:last_dash_idx]}{nanos:03d}{ts_str[last_dash_idx:]}" + return ts_str + + def micros_to_hours(micros: int) -> int: """Convert a timestamp in microseconds to hours from 1970-01-01T00:00.""" return micros // 3_600_000_000 diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index c3ace5d368..3b67570d9c 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -43,6 +43,7 @@ StringLiteral, TimeLiteral, TimestampLiteral, + TimestampNanoLiteral, literal, ) from pyiceberg.types import ( @@ -57,7 +58,9 @@ LongType, PrimitiveType, StringType, + TimestampNanoType, TimestampType, + TimestamptzNanoType, TimestamptzType, TimeType, UUIDType, @@ -86,6 +89,7 @@ def test_literal_from_nan_error() -> None: DateLiteral, TimeLiteral, TimestampLiteral, + TimestampNanoLiteral, DecimalLiteral, StringLiteral, FixedLiteral, @@ -278,6 +282,47 @@ def test_timestamp_to_date() -> None: assert date_lit.value == 0 +def test_timestamp_ns_to_date() -> None: + epoch_lit = TimestampNanoLiteral(0) + date_lit = epoch_lit.to(DateType()) + + assert date_lit.value == 0 + + +def test_timestamp_to_timestamp_ns() -> None: + ts_lit = TimestampLiteral(1000) + ts_ns_lit = ts_lit.to(TimestampNanoType()) + + assert isinstance(ts_ns_lit, TimestampNanoLiteral) + assert ts_ns_lit.value == 1000000 + + +def test_timestamp_ns_to_timestamp() -> None: + ts_ns_lit = TimestampNanoLiteral(1000000) + ts_lit = ts_ns_lit.to(TimestampType()) + + assert isinstance(ts_lit, TimestampLiteral) + assert ts_lit.value == 1000 + + +def test_string_to_timestamp_ns_literal() -> None: + assert StringLiteral("1970-01-01T00:00:00.000000001").to(TimestampNanoType()) == TimestampNanoLiteral(1) + assert StringLiteral("1970-01-01T00:00:00.000000001+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(1) + assert StringLiteral("1970-01-01T00:00:00.123456789").to(TimestampNanoType()) == TimestampNanoLiteral(123456789) + assert StringLiteral("1970-01-01T00:00:00.123456789+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789) + + +def test_long_to_timestamp_ns_literal() -> None: + assert LongLiteral(123456789).to(TimestampNanoType()) == TimestampNanoLiteral(123456789) + assert LongLiteral(123456789).to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789) + + +def test_timestamp_ns_increment_decrement() -> None: + lit = TimestampNanoLiteral(123) + assert lit.increment() == TimestampNanoLiteral(124) + assert lit.decrement() == TimestampNanoLiteral(122) + + def test_string_literal() -> None: sqrt2 = literal("1.414").to(StringType()) pi = literal("3.141").to(StringType()) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index e38bdbd6f2..871a155463 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -570,7 +570,9 @@ def test_datetime_obj_to_bytes(primitive_type: PrimitiveType, value: datetime | (DateType(), date(2017, 11, 16), "2017-11-16"), (TimeType(), time(22, 31, 8, 123456), "22:31:08.123456"), (TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456), "2017-11-16T22:31:08.123456"), + (TimestampNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789"), (TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc), "2017-11-16T22:31:08.123456+00:00"), + (TimestamptzNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789+00:00"), (StringType(), "iceberg", "iceberg"), (BinaryType(), b"\x01\x02\x03\xff", "010203ff"), (FixedType(4), b"\x01\x02\x03\xff", "010203ff"), @@ -595,7 +597,9 @@ def test_json_single_serialization(primitive_type: PrimitiveType, value: Any, ex (DateType(), date(2017, 11, 16)), (TimeType(), time(22, 31, 8, 123456)), (TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456)), + (TimestampNanoType(), 1510871468123456789), (TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc)), + (TimestamptzNanoType(), 1510871468123456789), (StringType(), "iceberg"), (BinaryType(), b"\x01\x02\x03\xff"), (FixedType(4), b"\x01\x02\x03\xff"),