Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions pyiceberg/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,15 @@
time_str_to_micros,
time_to_micros,
timestamp_to_micros,
timestamp_to_nanos,
timestamptz_to_micros,
timestamptz_to_nanos,
to_human_day,
to_human_time,
to_human_timestamp,
to_human_timestamp_ns,
to_human_timestamptz,
to_human_timestamptz_ns,
)
from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal

Expand Down Expand Up @@ -439,6 +443,15 @@ def _(_: PrimitiveType, val: int | datetime) -> str:
return to_human_timestamp(val)


@to_json.register(TimestampNanoType)
def _(_: TimestampNanoType, val: int | datetime) -> str:
"""Python datetime (without timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp."""
if isinstance(val, datetime):
val = datetime_to_nanos(val)

return to_human_timestamp_ns(val)


@to_json.register(TimestamptzType)
def _(_: TimestamptzType, val: int | datetime) -> str:
"""Python datetime (with timezone) or microseconds since epoch serializes into an ISO8601 timestamp."""
Expand All @@ -447,6 +460,14 @@ def _(_: TimestamptzType, val: int | datetime) -> str:
return to_human_timestamptz(val)


@to_json.register(TimestamptzNanoType)
def _(_: TimestamptzNanoType, val: int | datetime) -> str:
"""Python datetime (with timezone) or nanoseconds since epoch serializes into an ISO8601 timestamp."""
if isinstance(val, datetime):
val = datetime_to_nanos(val)
return to_human_timestamptz_ns(val)


@to_json.register(FloatType)
@to_json.register(DoubleType)
def _(_: FloatType | DoubleType, val: float) -> float:
Expand Down Expand Up @@ -586,6 +607,17 @@ def _(_: PrimitiveType, val: str | int | datetime) -> datetime:
return val


@from_json.register(TimestampNanoType)
def _(_: TimestampNanoType, val: str | int | datetime) -> int | datetime:
"""JSON ISO8601 string into nanoseconds since epoch."""
if isinstance(val, str):
return timestamp_to_nanos(val)
elif isinstance(val, int):
return val
else:
return val


@from_json.register(TimestamptzType)
def _(_: TimestamptzType, val: str | int | datetime) -> datetime:
"""JSON ISO8601 string into Python datetime."""
Expand All @@ -597,6 +629,17 @@ def _(_: TimestamptzType, val: str | int | datetime) -> datetime:
return val


@from_json.register(TimestamptzNanoType)
def _(_: TimestamptzNanoType, val: str | int | datetime) -> int | datetime:
"""JSON ISO8601 string into nanoseconds since epoch."""
if isinstance(val, str):
return timestamptz_to_nanos(val)
elif isinstance(val, int):
return val
else:
return val


@from_json.register(FloatType)
@from_json.register(DoubleType)
def _(_: FloatType | DoubleType, val: float) -> float:
Expand Down
70 changes: 70 additions & 0 deletions pyiceberg/expressions/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@
IntegerType,
LongType,
StringType,
TimestampNanoType,
TimestampType,
TimestamptzNanoType,
TimestamptzType,
TimeType,
UUIDType,
Expand All @@ -57,10 +59,15 @@
days_to_date,
micros_to_days,
micros_to_timestamp,
nanos_to_days,
nanos_to_micros,
time_str_to_micros,
time_to_micros,
timestamp_to_micros,
timestamp_to_nanos,
timestamptz_to_micros,
timestamptz_to_nanos,
to_human_timestamp_ns,
)
from pyiceberg.utils.decimal import decimal_to_unscaled, unscaled_to_decimal
from pyiceberg.utils.singleton import Singleton
Expand Down Expand Up @@ -332,6 +339,14 @@ def _(self, _: TimestampType) -> Literal[int]:
def _(self, _: TimestamptzType) -> Literal[int]:
return TimestampLiteral(self.value)

@to.register(TimestampNanoType)
def _(self, _: TimestampNanoType) -> Literal[int]:
return TimestampNanoLiteral(self.value)

@to.register(TimestamptzNanoType)
def _(self, _: TimestamptzNanoType) -> Literal[int]:
return TimestampNanoLiteral(self.value)

@to.register(DecimalType)
def _(self, type_var: DecimalType) -> Literal[Decimal]:
unscaled = Decimal(self.value)
Expand Down Expand Up @@ -476,11 +491,58 @@ def _(self, _: TimestampType) -> Literal[int]:
def _(self, _: TimestamptzType) -> Literal[int]:
return self

@to.register(TimestampNanoType)
def _(self, _: TimestampNanoType) -> Literal[int]:
return TimestampNanoLiteral(self.value * 1000)

@to.register(TimestamptzNanoType)
def _(self, _: TimestamptzNanoType) -> Literal[int]:
return TimestampNanoLiteral(self.value * 1000)

@to.register(DateType)
def _(self, _: DateType) -> Literal[int]:
return DateLiteral(micros_to_days(self.value))


class TimestampNanoLiteral(Literal[int]):
def __init__(self, value: int) -> None:
super().__init__(value, int)

@model_serializer
def ser_model(self) -> str:
return to_human_timestamp_ns(self.root)

def increment(self) -> Literal[int]:
return TimestampNanoLiteral(self.value + 1)

def decrement(self) -> Literal[int]:
return TimestampNanoLiteral(self.value - 1)

@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert TimestampNanoLiteral into {type_var}")

@to.register(TimestampNanoType)
def _(self, _: TimestampNanoType) -> Literal[int]:
return self

@to.register(TimestamptzNanoType)
def _(self, _: TimestamptzNanoType) -> Literal[int]:
return self

@to.register(TimestampType)
def _(self, _: TimestampType) -> Literal[int]:
return TimestampLiteral(nanos_to_micros(self.value))

@to.register(TimestamptzType)
def _(self, _: TimestamptzType) -> Literal[int]:
return TimestampLiteral(nanos_to_micros(self.value))

@to.register(DateType)
def _(self, _: DateType) -> Literal[int]:
return DateLiteral(nanos_to_days(self.value))


class DecimalLiteral(Literal[Decimal]):
def __init__(self, value: Decimal) -> None:
super().__init__(value, Decimal)
Expand Down Expand Up @@ -600,6 +662,14 @@ def _(self, _: TimestampType) -> Literal[int]:
def _(self, _: TimestamptzType) -> Literal[int]:
return TimestampLiteral(timestamptz_to_micros(self.value))

@to.register(TimestampNanoType)
def _(self, _: TimestampNanoType) -> Literal[int]:
return TimestampNanoLiteral(timestamp_to_nanos(self.value))

@to.register(TimestamptzNanoType)
def _(self, _: TimestamptzNanoType) -> Literal[int]:
return TimestampNanoLiteral(timestamptz_to_nanos(self.value))

@to.register(UUIDType)
def _(self, _: UUIDType) -> Literal[bytes]:
return UUIDLiteral(UUID(self.value).bytes)
Expand Down
40 changes: 40 additions & 0 deletions pyiceberg/utils/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,46 @@ def to_human_timestamp(timestamp_micros: int) -> str:
return (EPOCH_TIMESTAMP + timedelta(microseconds=timestamp_micros)).isoformat()


def to_human_timestamp_ns(timestamp_nanos: int) -> str:
"""Convert a TimestampNanoType value to human string."""
micros = timestamp_nanos // 1000
nanos = timestamp_nanos % 1000
ts_str = (EPOCH_TIMESTAMP + timedelta(microseconds=micros)).isoformat()
if "." not in ts_str:
ts_str += ".000000"
return f"{ts_str}{nanos:03d}"


def to_human_timestamptz_ns(timestamp_nanos: int) -> str:
"""Convert a TimestamptzNanoType value to human string."""
micros = timestamp_nanos // 1000
nanos = timestamp_nanos % 1000
dt = EPOCH_TIMESTAMPTZ + timedelta(microseconds=micros)
# dt.isoformat() will look like 2023-01-01T00:00:00+00:00 or 2023-01-01T00:00:00.000000+00:00
ts_str = dt.isoformat()
if "." not in ts_str:
# Insert .000000 before the timezone offset
if "+" in ts_str:
parts = ts_str.split("+")
ts_str = f"{parts[0]}.000000{nanos:03d}+{parts[1]}"
else:
parts = ts_str.split("-")
# Be careful with negative years if any, but Iceberg epoch is 1970
# Expected format: YYYY-MM-DDTHH:MM:SS-HH:MM
# The last '-' is the TZ separator.
last_dash_idx = ts_str.rfind("-")
ts_str = f"{ts_str[:last_dash_idx]}.000000{nanos:03d}{ts_str[last_dash_idx:]}"
else:
# Append nanos before the timezone offset
if "+" in ts_str:
parts = ts_str.split("+")
ts_str = f"{parts[0]}{nanos:03d}+{parts[1]}"
else:
last_dash_idx = ts_str.rfind("-")
ts_str = f"{ts_str[:last_dash_idx]}{nanos:03d}{ts_str[last_dash_idx:]}"
return ts_str


def micros_to_hours(micros: int) -> int:
"""Convert a timestamp in microseconds to hours from 1970-01-01T00:00."""
return micros // 3_600_000_000
Expand Down
45 changes: 45 additions & 0 deletions tests/expressions/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
StringLiteral,
TimeLiteral,
TimestampLiteral,
TimestampNanoLiteral,
literal,
)
from pyiceberg.types import (
Expand All @@ -57,7 +58,9 @@
LongType,
PrimitiveType,
StringType,
TimestampNanoType,
TimestampType,
TimestamptzNanoType,
TimestamptzType,
TimeType,
UUIDType,
Expand Down Expand Up @@ -86,6 +89,7 @@ def test_literal_from_nan_error() -> None:
DateLiteral,
TimeLiteral,
TimestampLiteral,
TimestampNanoLiteral,
DecimalLiteral,
StringLiteral,
FixedLiteral,
Expand Down Expand Up @@ -278,6 +282,47 @@ def test_timestamp_to_date() -> None:
assert date_lit.value == 0


def test_timestamp_ns_to_date() -> None:
epoch_lit = TimestampNanoLiteral(0)
date_lit = epoch_lit.to(DateType())

assert date_lit.value == 0


def test_timestamp_to_timestamp_ns() -> None:
ts_lit = TimestampLiteral(1000)
ts_ns_lit = ts_lit.to(TimestampNanoType())

assert isinstance(ts_ns_lit, TimestampNanoLiteral)
assert ts_ns_lit.value == 1000000


def test_timestamp_ns_to_timestamp() -> None:
ts_ns_lit = TimestampNanoLiteral(1000000)
ts_lit = ts_ns_lit.to(TimestampType())

assert isinstance(ts_lit, TimestampLiteral)
assert ts_lit.value == 1000


def test_string_to_timestamp_ns_literal() -> None:
assert StringLiteral("1970-01-01T00:00:00.000000001").to(TimestampNanoType()) == TimestampNanoLiteral(1)
assert StringLiteral("1970-01-01T00:00:00.000000001+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(1)
assert StringLiteral("1970-01-01T00:00:00.123456789").to(TimestampNanoType()) == TimestampNanoLiteral(123456789)
assert StringLiteral("1970-01-01T00:00:00.123456789+00:00").to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789)


def test_long_to_timestamp_ns_literal() -> None:
assert LongLiteral(123456789).to(TimestampNanoType()) == TimestampNanoLiteral(123456789)
assert LongLiteral(123456789).to(TimestamptzNanoType()) == TimestampNanoLiteral(123456789)


def test_timestamp_ns_increment_decrement() -> None:
lit = TimestampNanoLiteral(123)
assert lit.increment() == TimestampNanoLiteral(124)
assert lit.decrement() == TimestampNanoLiteral(122)


def test_string_literal() -> None:
sqrt2 = literal("1.414").to(StringType())
pi = literal("3.141").to(StringType())
Expand Down
4 changes: 4 additions & 0 deletions tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,9 @@ def test_datetime_obj_to_bytes(primitive_type: PrimitiveType, value: datetime |
(DateType(), date(2017, 11, 16), "2017-11-16"),
(TimeType(), time(22, 31, 8, 123456), "22:31:08.123456"),
(TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456), "2017-11-16T22:31:08.123456"),
(TimestampNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789"),
(TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc), "2017-11-16T22:31:08.123456+00:00"),
(TimestamptzNanoType(), 1510871468123456789, "2017-11-16T22:31:08.123456789+00:00"),
(StringType(), "iceberg", "iceberg"),
(BinaryType(), b"\x01\x02\x03\xff", "010203ff"),
(FixedType(4), b"\x01\x02\x03\xff", "010203ff"),
Expand All @@ -595,7 +597,9 @@ def test_json_single_serialization(primitive_type: PrimitiveType, value: Any, ex
(DateType(), date(2017, 11, 16)),
(TimeType(), time(22, 31, 8, 123456)),
(TimestampType(), datetime(2017, 11, 16, 22, 31, 8, 123456)),
(TimestampNanoType(), 1510871468123456789),
(TimestamptzType(), datetime(2017, 11, 16, 22, 31, 8, 123456, tzinfo=timezone.utc)),
(TimestamptzNanoType(), 1510871468123456789),
(StringType(), "iceberg"),
(BinaryType(), b"\x01\x02\x03\xff"),
(FixedType(4), b"\x01\x02\x03\xff"),
Expand Down
Loading