pyiceberg/utils/datetime.py (141 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Helper methods for working with date/time representations."""
from __future__ import annotations
import re
from datetime import (
date,
datetime,
time,
timedelta,
)
EPOCH_DATE = date.fromisoformat("1970-01-01")
EPOCH_TIMESTAMP = datetime.fromisoformat("1970-01-01T00:00:00.000000")
ISO_TIMESTAMP = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?")
ISO_TIMESTAMP_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?")
EPOCH_TIMESTAMPTZ = datetime.fromisoformat("1970-01-01T00:00:00.000000+00:00")
ISO_TIMESTAMPTZ = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(.\d{1,6})?[-+]\d{2}:\d{2}")
ISO_TIMESTAMPTZ_NANO = re.compile(r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(.\d{1,6})?(\d{1,3})?([-+]\d{2}:\d{2})")
def micros_to_days(timestamp: int) -> int:
"""Convert a timestamp in microseconds to a date in days."""
return timedelta(microseconds=timestamp).days
def micros_to_time(micros: int) -> time:
"""Convert a timestamp in microseconds to a time."""
micros, microseconds = divmod(micros, 1000000)
micros, seconds = divmod(micros, 60)
micros, minutes = divmod(micros, 60)
hours = micros
return time(hour=hours, minute=minutes, second=seconds, microsecond=microseconds)
def date_str_to_days(date_str: str) -> int:
"""Convert an ISO-8601 formatted date to days from 1970-01-01."""
return (date.fromisoformat(date_str) - EPOCH_DATE).days
def date_to_days(date_val: date) -> int:
"""Convert a Python date object to days from 1970-01-01."""
return (date_val - EPOCH_DATE).days
def days_to_date(days: int) -> date:
"""Create a date from the number of days from 1970-01-01."""
return EPOCH_DATE + timedelta(days)
def time_str_to_micros(time_str: str) -> int:
"""Convert an ISO-8601 formatted time to microseconds from midnight."""
return time_to_micros(time.fromisoformat(time_str))
def time_to_micros(t: time) -> int:
"""Convert a datetime.time object to microseconds from midnight."""
return (((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + t.microsecond
def datetime_to_micros(dt: datetime) -> int:
"""Convert a datetime to microseconds from 1970-01-01T00:00:00.000000."""
if dt.tzinfo:
delta = dt - EPOCH_TIMESTAMPTZ
else:
delta = dt - EPOCH_TIMESTAMP
return (delta.days * 86400 + delta.seconds) * 1_000_000 + delta.microseconds
def timestamp_to_micros(timestamp_str: str) -> int:
"""Convert an ISO-9601 formatted timestamp without zone to microseconds from 1970-01-01T00:00:00.000000."""
if ISO_TIMESTAMP.fullmatch(timestamp_str):
return datetime_to_micros(datetime.fromisoformat(timestamp_str))
if ISO_TIMESTAMPTZ.fullmatch(timestamp_str):
# When we can match a timestamp without a zone, we can give a more specific error
raise ValueError(f"Zone offset provided, but not expected: {timestamp_str}")
raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)")
def time_str_to_nanos(time_str: str) -> int:
"""Convert an ISO-8601 formatted time to nanoseconds from midnight."""
return time_to_nanos(time.fromisoformat(time_str))
def time_to_nanos(t: time) -> int:
"""Convert a datetime.time object to nanoseconds from midnight."""
# python datetime and time doesn't have nanoseconds support yet
# https://github.com/python/cpython/issues/59648
return ((((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + t.microsecond) * 1_000
def datetime_to_nanos(dt: datetime) -> int:
"""Convert a datetime to nanoseconds from 1970-01-01T00:00:00.000000000."""
# python datetime and time doesn't have nanoseconds support yet
# https://github.com/python/cpython/issues/59648
if dt.tzinfo:
delta = dt - EPOCH_TIMESTAMPTZ
else:
delta = dt - EPOCH_TIMESTAMP
return ((delta.days * 86400 + delta.seconds) * 1_000_000 + delta.microseconds) * 1_000
def timestamp_to_nanos(timestamp_str: str) -> int:
"""Convert an ISO-9601 formatted timestamp without zone to nanoseconds from 1970-01-01T00:00:00.000000000."""
if match := ISO_TIMESTAMP_NANO.fullmatch(timestamp_str):
# Python datetime does not have native nanoseconds support
# Hence we need to extract nanoseconds timestamp manually
ns_str = match.group(3) or "0"
ms_str = match.group(2) if match.group(2) else ""
timestamp_str_without_ns_str = match.group(1) + ms_str
return datetime_to_nanos(datetime.fromisoformat(timestamp_str_without_ns_str)) + int(ns_str)
if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamp_str):
# When we can match a timestamp without a zone, we can give a more specific error
raise ValueError(f"Zone offset provided, but not expected: {timestamp_str}")
raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)")
def timestamptz_to_nanos(timestamptz_str: str) -> int:
"""Convert an ISO-8601 formatted timestamp with zone to nanoseconds from 1970-01-01T00:00:00.000000000+00:00."""
if match := ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
# Python datetime does not have native nanoseconds support
# Hence we need to extract nanoseconds timestamp manually
ns_str = match.group(3) or "0"
ms_str = match.group(2) if match.group(2) else ""
timestamptz_str_without_ns_str = match.group(1) + ms_str + match.group(4)
return datetime_to_nanos(datetime.fromisoformat(timestamptz_str_without_ns_str)) + int(ns_str)
if ISO_TIMESTAMPTZ_NANO.fullmatch(timestamptz_str):
# When we can match a timestamp without a zone, we can give a more specific error
raise ValueError(f"Missing zone offset: {timestamptz_str} (must be ISO-8601)")
raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be ISO-8601)")
def datetime_to_millis(dt: datetime) -> int:
"""Convert a datetime to milliseconds from 1970-01-01T00:00:00.000000."""
if dt.tzinfo:
delta = dt - EPOCH_TIMESTAMPTZ
else:
delta = dt - EPOCH_TIMESTAMP
return (delta.days * 86400 + delta.seconds) * 1_000 + delta.microseconds // 1_000
def millis_to_datetime(millis: int) -> datetime:
"""Convert milliseconds from epoch to a timestamp."""
dt = timedelta(milliseconds=millis)
return EPOCH_TIMESTAMP + dt
def timestamptz_to_micros(timestamptz_str: str) -> int:
"""Convert an ISO-8601 formatted timestamp with zone to microseconds from 1970-01-01T00:00:00.000000+00:00."""
if ISO_TIMESTAMPTZ.fullmatch(timestamptz_str):
return datetime_to_micros(datetime.fromisoformat(timestamptz_str))
if ISO_TIMESTAMP.fullmatch(timestamptz_str):
# When we can match a timestamp without a zone, we can give a more specific error
raise ValueError(f"Missing zone offset: {timestamptz_str} (must be ISO-8601)")
raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be ISO-8601)")
def micros_to_timestamp(micros: int) -> datetime:
"""Convert microseconds from epoch to a timestamp."""
dt = timedelta(microseconds=micros)
return EPOCH_TIMESTAMP + dt
def micros_to_timestamptz(micros: int) -> datetime:
"""Convert microseconds from epoch to an utc timestamp."""
dt = timedelta(microseconds=micros)
return EPOCH_TIMESTAMPTZ + dt
def to_human_year(year_ordinal: int) -> str:
"""Convert a DateType value to human string."""
return f"{EPOCH_TIMESTAMP.year + year_ordinal:0=4d}"
def to_human_month(month_ordinal: int) -> str:
"""Convert a DateType value to human string."""
return f"{EPOCH_TIMESTAMP.year + month_ordinal // 12:0=4d}-{1 + month_ordinal % 12:0=2d}"
def to_human_day(day_ordinal: int) -> str:
"""Convert a DateType value to human string."""
return (EPOCH_DATE + timedelta(days=day_ordinal)).isoformat()
def to_human_hour(hour_ordinal: int) -> str:
"""Convert a DateType value to human string."""
return (EPOCH_TIMESTAMP + timedelta(hours=hour_ordinal)).isoformat("-", "hours")
def to_human_time(micros_from_midnight: int) -> str:
"""Convert a TimeType value to human string."""
return micros_to_time(micros_from_midnight).isoformat()
def to_human_timestamptz(timestamp_micros: int) -> str:
"""Convert a TimestamptzType value to human string."""
return (EPOCH_TIMESTAMPTZ + timedelta(microseconds=timestamp_micros)).isoformat()
def to_human_timestamp(timestamp_micros: int) -> str:
"""Convert a TimestampType value to human string."""
return (EPOCH_TIMESTAMP + timedelta(microseconds=timestamp_micros)).isoformat()
def micros_to_hours(micros: int) -> int:
"""Convert a timestamp in microseconds to hours from 1970-01-01T00:00."""
return micros // 3_600_000_000
def days_to_months(days: int) -> int:
d = days_to_date(days)
return (d.year - EPOCH_DATE.year) * 12 + (d.month - EPOCH_DATE.month)
def micros_to_months(micros: int) -> int:
dt = micros_to_timestamp(micros)
return (dt.year - EPOCH_TIMESTAMP.year) * 12 + (dt.month - EPOCH_TIMESTAMP.month)
def days_to_years(days: int) -> int:
return days_to_date(days).year - EPOCH_DATE.year
def micros_to_years(micros: int) -> int:
return micros_to_timestamp(micros).year - EPOCH_TIMESTAMP.year
def nanos_to_timestamp(nanos: int) -> datetime:
"""Convert nanoseconds from epoch to a microsecond timestamp."""
dt = timedelta(microseconds=nanos_to_micros(nanos))
return EPOCH_TIMESTAMP + dt
def nanos_to_years(nanos: int) -> int:
return nanos_to_timestamp(nanos).year - EPOCH_TIMESTAMP.year
def nanos_to_months(nanos: int) -> int:
dt = nanos_to_timestamp(nanos)
return (dt.year - EPOCH_TIMESTAMP.year) * 12 + (dt.month - EPOCH_TIMESTAMP.month)
def nanos_to_days(nanos: int) -> int:
"""Convert a timestamp in nanoseconds to a date in days."""
return timedelta(microseconds=nanos // 1000).days
def nanos_to_time(nanos: int) -> time:
"""Convert a timestamp in nanoseconds to a microsecond precision time."""
micros = nanos_to_micros(nanos)
micros, microseconds = divmod(micros, 1000000)
micros, seconds = divmod(micros, 60)
micros, minutes = divmod(micros, 60)
hours = micros
return time(hour=hours, minute=minutes, second=seconds, microsecond=microseconds)
def nanos_to_hours(nanos: int) -> int:
"""Convert a timestamp in nanoseconds to hours from 1970-01-01T00:00."""
return nanos // 3_600_000_000_0000
def nanos_to_micros(nanos: int) -> int:
"""Convert a nanoseconds timestamp to microsecond timestamp by dropping precision."""
return nanos // 1000