Skip to content

Commit 219b819

Browse files
committed
Add timestamp ns literal
1 parent df258f5 commit 219b819

5 files changed

Lines changed: 202 additions & 0 deletions

File tree

pyiceberg/conversions.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,15 @@
7676
time_str_to_micros,
7777
time_to_micros,
7878
timestamp_to_micros,
79+
timestamp_to_nanos,
7980
timestamptz_to_micros,
81+
timestamptz_to_nanos,
8082
to_human_day,
8183
to_human_time,
8284
to_human_timestamp,
85+
to_human_timestamp_ns,
8386
to_human_timestamptz,
87+
to_human_timestamptz_ns,
8488
)
8589
from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal
8690

@@ -439,6 +443,15 @@ def _(_: PrimitiveType, val: int | datetime) -> str:
439443
return to_human_timestamp(val)
440444

441445

446+
@to_json.register(TimestampNanoType)
447+
def _(_: TimestampNanoType, val: int | datetime) -> str:
448+
"""Python datetime (without timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp."""
449+
if isinstance(val, datetime):
450+
val = datetime_to_nanos(val)
451+
452+
return to_human_timestamp_ns(val)
453+
454+
442455
@to_json.register(TimestamptzType)
443456
def _(_: TimestamptzType, val: int | datetime) -> str:
444457
"""Python datetime (with timezone) or microseconds since epoch serializes into an ISO8601 timestamp."""
@@ -447,6 +460,14 @@ def _(_: TimestamptzType, val: int | datetime) -> str:
447460
return to_human_timestamptz(val)
448461

449462

463+
@to_json.register(TimestamptzNanoType)
464+
def _(_: TimestamptzNanoType, val: int | datetime) -> str:
465+
"""Python datetime (with timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp."""
466+
if isinstance(val, datetime):
467+
val = datetime_to_nanos(val)
468+
return to_human_timestamptz_ns(val)
469+
470+
450471
@to_json.register(FloatType)
451472
@to_json.register(DoubleType)
452473
def _(_: FloatType | DoubleType, val: float) -> float:
@@ -586,6 +607,17 @@ def _(_: PrimitiveType, val: str | int | datetime) -> datetime:
586607
return val
587608

588609

610+
@from_json.register(TimestampNanoType)
611+
def _(_: TimestampNanoType, val: str | int | datetime) -> int | datetime:
612+
"""JSON ISO8601 string into nanoseconds since epoch."""
613+
if isinstance(val, str):
614+
return timestamp_to_nanos(val)
615+
elif isinstance(val, int):
616+
return val
617+
else:
618+
return val
619+
620+
589621
@from_json.register(TimestamptzType)
590622
def _(_: TimestamptzType, val: str | int | datetime) -> datetime:
591623
"""JSON ISO8601 string into Python datetime."""
@@ -597,6 +629,17 @@ def _(_: TimestamptzType, val: str | int | datetime) -> datetime:
597629
return val
598630

599631

632+
@from_json.register(TimestamptzNanoType)
633+
def _(_: TimestamptzNanoType, val: str | int | datetime) -> int | datetime:
634+
"""JSON ISO8601 string into nanoseconds since epoch."""
635+
if isinstance(val, str):
636+
return timestamptz_to_nanos(val)
637+
elif isinstance(val, int):
638+
return val
639+
else:
640+
return val
641+
642+
600643
@from_json.register(FloatType)
601644
@from_json.register(DoubleType)
602645
def _(_: FloatType | DoubleType, val: float) -> float:

pyiceberg/expressions/literals.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@
4545
IntegerType,
4646
LongType,
4747
StringType,
48+
TimestampNanoType,
4849
TimestampType,
50+
TimestamptzNanoType,
4951
TimestamptzType,
5052
TimeType,
5153
UUIDType,
@@ -57,10 +59,15 @@
5759
days_to_date,
5860
micros_to_days,
5961
micros_to_timestamp,
62+
nanos_to_days,
63+
nanos_to_micros,
6064
time_str_to_micros,
6165
time_to_micros,
6266
timestamp_to_micros,
67+
timestamp_to_nanos,
6368
timestamptz_to_micros,
69+
timestamptz_to_nanos,
70+
to_human_timestamp_ns,
6471
)
6572
from pyiceberg.utils.decimal import decimal_to_unscaled, unscaled_to_decimal
6673
from pyiceberg.utils.singleton import Singleton
@@ -332,6 +339,14 @@ def _(self, _: TimestampType) -> Literal[int]:
332339
def _(self, _: TimestamptzType) -> Literal[int]:
333340
return TimestampLiteral(self.value)
334341

342+
@to.register(TimestampNanoType)
343+
def _(self, _: TimestampNanoType) -> Literal[int]:
344+
return TimestampNanoLiteral(self.value)
345+
346+
@to.register(TimestamptzNanoType)
347+
def _(self, _: TimestamptzNanoType) -> Literal[int]:
348+
return TimestampNanoLiteral(self.value)
349+
335350
@to.register(DecimalType)
336351
def _(self, type_var: DecimalType) -> Literal[Decimal]:
337352
unscaled = Decimal(self.value)
@@ -476,11 +491,58 @@ def _(self, _: TimestampType) -> Literal[int]:
476491
def _(self, _: TimestamptzType) -> Literal[int]:
477492
return self
478493

494+
@to.register(TimestampNanoType)
495+
def _(self, _: TimestampNanoType) -> Literal[int]:
496+
return TimestampNanoLiteral(self.value * 1000)
497+
498+
@to.register(TimestamptzNanoType)
499+
def _(self, _: TimestamptzNanoType) -> Literal[int]:
500+
return TimestampNanoLiteral(self.value * 1000)
501+
479502
@to.register(DateType)
480503
def _(self, _: DateType) -> Literal[int]:
481504
return DateLiteral(micros_to_days(self.value))
482505

483506

507+
class TimestampNanoLiteral(Literal[int]):
508+
def __init__(self, value: int) -> None:
509+
super().__init__(value, int)
510+
511+
@model_serializer
512+
def ser_model(self) -> str:
513+
return to_human_timestamp_ns(self.root)
514+
515+
def increment(self) -> Literal[int]:
516+
return TimestampNanoLiteral(self.value + 1)
517+
518+
def decrement(self) -> Literal[int]:
519+
return TimestampNanoLiteral(self.value - 1)
520+
521+
@singledispatchmethod
522+
def to(self, type_var: IcebergType) -> Literal: # type: ignore
523+
raise TypeError(f"Cannot convert TimestampNanoLiteral into {type_var}")
524+
525+
@to.register(TimestampNanoType)
526+
def _(self, _: TimestampNanoType) -> Literal[int]:
527+
return self
528+
529+
@to.register(TimestamptzNanoType)
530+
def _(self, _: TimestamptzNanoType) -> Literal[int]:
531+
return self
532+
533+
@to.register(TimestampType)
534+
def _(self, _: TimestampType) -> Literal[int]:
535+
return TimestampLiteral(nanos_to_micros(self.value))
536+
537+
@to.register(TimestamptzType)
538+
def _(self, _: TimestamptzType) -> Literal[int]:
539+
return TimestampLiteral(nanos_to_micros(self.value))
540+
541+
@to.register(DateType)
542+
def _(self, _: DateType) -> Literal[int]:
543+
return DateLiteral(nanos_to_days(self.value))
544+
545+
484546
class DecimalLiteral(Literal[Decimal]):
485547
def __init__(self, value: Decimal) -> None:
486548
super().__init__(value, Decimal)
@@ -600,6 +662,14 @@ def _(self, _: TimestampType) -> Literal[int]:
600662
def _(self, _: TimestamptzType) -> Literal[int]:
601663
return TimestampLiteral(timestamptz_to_micros(self.value))
602664

665+
@to.register(TimestampNanoType)
666+
def _(self, _: TimestampNanoType) -> Literal[int]:
667+
return TimestampNanoLiteral(timestamp_to_nanos(self.value))
668+
669+
@to.register(TimestamptzNanoType)
670+
def _(self, _: TimestamptzNanoType) -> Literal[int]:
671+
return TimestampNanoLiteral(timestamptz_to_nanos(self.value))
672+
603673
@to.register(UUIDType)
604674
def _(self, _: UUIDType) -> Literal[bytes]:
605675
return UUIDLiteral(UUID(self.value).bytes)

pyiceberg/utils/datetime.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,46 @@ def to_human_timestamp(timestamp_micros: int) -> str:
218218
return (EPOCH_TIMESTAMP + timedelta(microseconds=timestamp_micros)).isoformat()
219219

220220

221+
def to_human_timestamp_ns(timestamp_nanos: int) -> str:
222+
"""Convert a TimestampNanoType value to human string."""
223+
micros = timestamp_nanos // 1000
224+
nanos = timestamp_nanos % 1000
225+
ts_str = (EPOCH_TIMESTAMP + timedelta(microseconds=micros)).isoformat()
226+
if "." not in ts_str:
227+
ts_str += ".000000"
228+
return f"{ts_str}{nanos:03d}"
229+
230+
231+
def to_human_timestamptz_ns(timestamp_nanos: int) -> str:
232+
"""Convert a TimestamptzNanoType value to human string."""
233+
micros = timestamp_nanos // 1000
234+
nanos = timestamp_nanos % 1000
235+
dt = EPOCH_TIMESTAMPTZ + timedelta(microseconds=micros)
236+
# dt.isoformat() will look like 2023-01-01T00:00:00+00:00 or 2023-01-01T00:00:00.000000+00:00
237+
ts_str = dt.isoformat()
238+
if "." not in ts_str:
239+
# Insert .000000 before the timezone offset
240+
if "+" in ts_str:
241+
parts = ts_str.split("+")
242+
ts_str = f"{parts[0]}.000000{nanos:03d}+{parts[1]}"
243+
else:
244+
parts = ts_str.split("-")
245+
# Be careful with negative years if any, but Iceberg epoch is 1970
246+
# Expected format: YYYY-MM-DDTHH:MM:SS-HH:MM
247+
# The last '-' is the TZ separator.
248+
last_dash_idx = ts_str.rfind("-")
249+
ts_str = f"{ts_str[:last_dash_idx]}.000000{nanos:03d}{ts_str[last_dash_idx:]}"
250+
else:
251+
# Append nanos before the timezone offset
252+
if "+" in ts_str:
253+
parts = ts_str.split("+")
254+
ts_str = f"{parts[0]}{nanos:03d}+{parts[1]}"
255+
else:
256+
last_dash_idx = ts_str.rfind("-")
257+
ts_str = f"{ts_str[:last_dash_idx]}{nanos:03d}{ts_str[last_dash_idx:]}"
258+
return ts_str
259+
260+
221261
def micros_to_hours(micros: int) -> int:
222262
"""Convert a timestamp in microseconds to hours from 1970-01-01T00:00."""
223263
return micros // 3_600_000_000

tests/expressions/test_literals.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
StringLiteral,
4444
TimeLiteral,
4545
TimestampLiteral,
46+
TimestampNanoLiteral,
4647
literal,
4748
)
4849
from pyiceberg.types import (
@@ -57,7 +58,9 @@
5758
LongType,
5859
PrimitiveType,
5960
StringType,
61+
TimestampNanoType,
6062
TimestampType,
63+
TimestamptzNanoType,
6164
TimestamptzType,
6265
TimeType,
6366
UUIDType,
@@ -86,6 +89,7 @@ def test_literal_from_nan_error() -> None:
8689
DateLiteral,
8790
TimeLiteral,
8891
TimestampLiteral,
92+
TimestampNanoLiteral,
8993
DecimalLiteral,
9094
StringLiteral,
9195
FixedLiteral,
@@ -278,6 +282,47 @@ def test_timestamp_to_date() -> None:
278282
assert date_lit.value == 0
279283

280284

285+
def test_timestamp_ns_to_date() -> None:
286+
epoch_lit = TimestampNanoLiteral(0)
287+
date_lit = epoch_lit.to(DateType())
288+
289+
assert date_lit.value == 0
290+
291+
292+
def test_timestamp_to_timestamp_ns() -> None:
293+
ts_lit = TimestampLiteral(1000)
294+
ts_ns_lit = ts_lit.to(TimestampNanoType())
295+
296+
assert isinstance(ts_ns_lit, TimestampNanoLiteral)
297+
assert ts_ns_lit.value == 1000000
298+
299+
300+
def test_timestamp_ns_to_timestamp() -> None:
301+
ts_ns_lit = TimestampNanoLiteral(1000000)
302+
ts_lit = ts_ns_lit.to(TimestampType())
303+
304+
assert isinstance(ts_lit, TimestampLiteral)
305+
assert ts_lit.value == 1000
306+
307+
308+
def test_string_to_timestamp_ns_literal() -> None:
309+
assert StringLiteral("1970-01-01T00:00:00.000000001").to(TimestampNanoType()) == TimestampNanoLiteral(1)
310+
assert StringLiteral("1970-01-01T00:00:00.000000001+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(1)
311+
assert StringLiteral("1970-01-01T00:00:00.123456789").to(TimestampNanoType()) == TimestampNanoLiteral(123456789)
312+
assert StringLiteral("1970-01-01T00:00:00.123456789+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789)
313+
314+
315+
def test_long_to_timestamp_ns_literal() -> None:
316+
assert LongLiteral(123456789).to(TimestampNanoType()) == TimestampNanoLiteral(123456789)
317+
assert LongLiteral(123456789).to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789)
318+
319+
320+
def test_timestamp_ns_increment_decrement() -> None:
321+
lit = TimestampNanoLiteral(123)
322+
assert lit.increment() == TimestampNanoLiteral(124)
323+
assert lit.decrement() == TimestampNanoLiteral(122)
324+
325+
281326
def test_string_literal() -> None:
282327
sqrt2 = literal("1.414").to(StringType())
283328
pi = literal("3.141").to(StringType())

tests/test_conversions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,9 @@ def test_datetime_obj_to_bytes(primitive_type: PrimitiveType, value: datetime |
570570
(DateType(), date(2017, 11, 16), "2017-11-16"),
571571
(TimeType(), time(22, 31, 8, 123456), "22:31:08.123456"),
572572
(TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456), "2017-11-16T22:31:08.123456"),
573+
(TimestampNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789"),
573574
(TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc), "2017-11-16T22:31:08.123456+00:00"),
575+
(TimestamptzNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789+00:00"),
574576
(StringType(), "iceberg", "iceberg"),
575577
(BinaryType(), b"\x01\x02\x03\xff", "010203ff"),
576578
(FixedType(4), b"\x01\x02\x03\xff", "010203ff"),
@@ -595,7 +597,9 @@ def test_json_single_serialization(primitive_type: PrimitiveType, value: Any, ex
595597
(DateType(), date(2017, 11, 16)),
596598
(TimeType(), time(22, 31, 8, 123456)),
597599
(TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456)),
600+
(TimestampNanoType(), 1510871468123456789),
598601
(TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc)),
602+
(TimestamptzNanoType(), 1510871468123456789),
599603
(StringType(), "iceberg"),
600604
(BinaryType(), b"\x01\x02\x03\xff"),
601605
(FixedType(4), b"\x01\x02\x03\xff"),

0 commit comments

Comments
 (0)