pyiceberg/expressions/literals.py (517 lines of code) (raw):

# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # pylint: disable=W0613 from __future__ import annotations import struct from abc import ABC, abstractmethod from datetime import date, datetime, time from decimal import ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan from typing import Any, Generic, Type from uuid import UUID from pyiceberg.typedef import L from pyiceberg.types import ( BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType, FloatType, IcebergType, IntegerType, LongType, StringType, TimestampType, TimestamptzType, TimeType, UUIDType, ) from pyiceberg.utils.datetime import ( date_str_to_days, date_to_days, datetime_to_micros, micros_to_days, time_str_to_micros, time_to_micros, timestamp_to_micros, timestamptz_to_micros, ) from pyiceberg.utils.decimal import decimal_to_unscaled, unscaled_to_decimal from pyiceberg.utils.singleton import Singleton UUID_BYTES_LENGTH = 16 class Literal(Generic[L], ABC): """Literal which has a value and can be converted between types.""" _value: L def __init__(self, value: L, value_type: Type[L]): if value is None or not isinstance(value, value_type): raise TypeError(f"Invalid literal value: {value!r} (not a {value_type})") if isinstance(value, float) and isnan(value): raise ValueError("Cannot create expression literal from NaN.") self._value = value @property def value(self) -> L: return self._value @singledispatchmethod @abstractmethod def to(self, type_var: IcebergType) -> Literal[L]: ... # pragma: no cover def __repr__(self) -> str: """Return the string representation of the Literal class.""" return f"{type(self).__name__}({self.value!r})" def __str__(self) -> str: """Return the string representation of the Literal class.""" return str(self.value) def __hash__(self) -> int: """Return a hashed representation of the Literal class.""" return hash(self.value) def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the Literal class.""" if not isinstance(other, Literal): return False return self.value == other.value def __ne__(self, other: Any) -> bool: """Return the inequality of two instances of the Literal class.""" return not self.__eq__(other) def __lt__(self, other: Any) -> bool: """Return if one instance of the Literal class is less than another instance.""" return self.value < other.value def __gt__(self, other: Any) -> bool: """Return if one instance of the Literal class is greater than another instance.""" return self.value > other.value def __le__(self, other: Any) -> bool: """Return if one instance of the Literal class is less than or equal to another instance.""" return self.value <= other.value def __ge__(self, other: Any) -> bool: """Return if one instance of the Literal class is greater than or equal to another instance.""" return self.value >= other.value def literal(value: L) -> Literal[L]: """ Construct an Iceberg Literal based on Python primitive data type. Args: value (Python primitive type): the value to be associated with literal. Example: from pyiceberg.expressions.literals import literal. >>> literal(123) LongLiteral(123) """ if isinstance(value, float): return DoubleLiteral(value) # type: ignore elif isinstance(value, bool): return BooleanLiteral(value) elif isinstance(value, int): return LongLiteral(value) elif isinstance(value, str): return StringLiteral(value) elif isinstance(value, UUID): return UUIDLiteral(value.bytes) # type: ignore elif isinstance(value, bytes): return BinaryLiteral(value) elif isinstance(value, Decimal): return DecimalLiteral(value) elif isinstance(value, datetime): return TimestampLiteral(datetime_to_micros(value)) # type: ignore elif isinstance(value, date): return DateLiteral(date_to_days(value)) # type: ignore elif isinstance(value, time): return TimeLiteral(time_to_micros(value)) # type: ignore else: raise TypeError(f"Invalid literal value: {repr(value)}") class AboveMax(Literal[L]): def __repr__(self) -> str: """Return the string representation of the AboveMax class.""" return f"{self.__class__.__name__}()" def __str__(self) -> str: """Return the string representation of the AboveMax class.""" return self.__class__.__name__ class BelowMin(Literal[L]): def __repr__(self) -> str: """Return the string representation of the BelowMin class.""" return f"{self.__class__.__name__}()" def __str__(self) -> str: """Return the string representation of the BelowMin class.""" return self.__class__.__name__ class FloatAboveMax(AboveMax[float], Singleton): def __init__(self) -> None: super().__init__(FloatType.max, float) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of FloatAboveMax") @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: return self class FloatBelowMin(BelowMin[float], Singleton): def __init__(self) -> None: super().__init__(FloatType.min, float) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of FloatBelowMin") @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: return self class IntAboveMax(AboveMax[int], Singleton): def __init__(self) -> None: super().__init__(IntegerType.max, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of IntAboveMax") @to.register(IntegerType) def _(self, _: IntegerType) -> Literal[int]: return self class IntBelowMin(BelowMin[int], Singleton): def __init__(self) -> None: super().__init__(IntegerType.min, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of IntBelowMin") @to.register(IntegerType) def _(self, _: IntegerType) -> Literal[int]: return self class LongAboveMax(AboveMax[int], Singleton): def __init__(self) -> None: super().__init__(LongType.max, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of IntAboveMax") @to.register(LongType) def _(self, _: LongType) -> Literal[int]: return self class LongBelowMin(BelowMin[int], Singleton): def __init__(self) -> None: super().__init__(LongType.min, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError("Cannot change the type of IntBelowMin") @to.register(LongType) def _(self, _: LongType) -> Literal[int]: return self class BooleanLiteral(Literal[bool]): def __init__(self, value: bool) -> None: super().__init__(value, bool) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal[bool]: # type: ignore raise TypeError(f"Cannot convert BooleanLiteral into {type_var}") @to.register(BooleanType) def _(self, _: BooleanType) -> Literal[bool]: return self class LongLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert LongLiteral into {type_var}") def increment(self) -> Literal[int]: return LongLiteral(self.value + 1) def decrement(self) -> Literal[int]: return LongLiteral(self.value - 1) @to.register(LongType) def _(self, _: LongType) -> Literal[int]: if LongType.max < self.value: return LongAboveMax() elif LongType.min > self.value: return LongBelowMin() else: return self @to.register(IntegerType) def _(self, _: IntegerType) -> Literal[int]: if IntegerType.max < self.value: return IntAboveMax() elif IntegerType.min > self.value: return IntBelowMin() return self @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: return FloatLiteral(float(self.value)) @to.register(DoubleType) def _(self, _: DoubleType) -> Literal[float]: return DoubleLiteral(float(self.value)) @to.register(DateType) def _(self, _: DateType) -> Literal[int]: return DateLiteral(self.value) @to.register(TimeType) def _(self, _: TimeType) -> Literal[int]: return TimeLiteral(self.value) @to.register(TimestampType) def _(self, _: TimestampType) -> Literal[int]: return TimestampLiteral(self.value) @to.register(TimestamptzType) def _(self, _: TimestamptzType) -> Literal[int]: return TimestampLiteral(self.value) @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: unscaled = Decimal(self.value) if type_var.scale == 0: return DecimalLiteral(unscaled) else: sign, digits, _ = unscaled.as_tuple() zeros = (0,) * type_var.scale return DecimalLiteral(Decimal((sign, digits + zeros, -type_var.scale))) class FloatLiteral(Literal[float]): def __init__(self, value: float) -> None: super().__init__(value, float) self._value32 = struct.unpack("<f", struct.pack("<f", value))[0] def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the FloatLiteral class.""" return self._value32 == other def __lt__(self, other: Any) -> bool: """Return if one instance of the FloatLiteral class is less than another instance.""" return self._value32 < other def __gt__(self, other: Any) -> bool: """Return if one instance of the FloatLiteral class is greater than another instance.""" return self._value32 > other def __le__(self, other: Any) -> bool: """Return if one instance of the FloatLiteral class is less than or equal to another instance.""" return self._value32 <= other def __ge__(self, other: Any) -> bool: """Return if one instance of the FloatLiteral class is greater than or equal to another instance.""" return self._value32 >= other def __hash__(self) -> int: """Return a hashed representation of the FloatLiteral class.""" return hash(self._value32) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert FloatLiteral into {type_var}") @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: return self @to.register(DoubleType) def _(self, _: DoubleType) -> Literal[float]: return DoubleLiteral(self.value) @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: return DecimalLiteral(Decimal(self.value).quantize(Decimal((0, (1,), -type_var.scale)), rounding=ROUND_HALF_UP)) class DoubleLiteral(Literal[float]): def __init__(self, value: float) -> None: super().__init__(value, float) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert DoubleLiteral into {type_var}") @to.register(DoubleType) def _(self, _: DoubleType) -> Literal[float]: return self @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: if FloatType.max < self.value: return FloatAboveMax() elif FloatType.min > self.value: return FloatBelowMin() return FloatLiteral(self.value) @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: return DecimalLiteral(Decimal(self.value).quantize(Decimal((0, (1,), -type_var.scale)), rounding=ROUND_HALF_UP)) class DateLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) def increment(self) -> Literal[int]: return DateLiteral(self.value + 1) def decrement(self) -> Literal[int]: return DateLiteral(self.value - 1) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert DateLiteral into {type_var}") @to.register(DateType) def _(self, _: DateType) -> Literal[int]: return self class TimeLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert TimeLiteral into {type_var}") @to.register(TimeType) def _(self, _: TimeType) -> Literal[int]: return self class TimestampLiteral(Literal[int]): def __init__(self, value: int) -> None: super().__init__(value, int) def increment(self) -> Literal[int]: return TimestampLiteral(self.value + 1) def decrement(self) -> Literal[int]: return TimestampLiteral(self.value - 1) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert TimestampLiteral into {type_var}") @to.register(TimestampType) def _(self, _: TimestampType) -> Literal[int]: return self @to.register(TimestamptzType) def _(self, _: TimestamptzType) -> Literal[int]: return self @to.register(DateType) def _(self, _: DateType) -> Literal[int]: return DateLiteral(micros_to_days(self.value)) class DecimalLiteral(Literal[Decimal]): def __init__(self, value: Decimal) -> None: super().__init__(value, Decimal) def increment(self) -> Literal[Decimal]: original_scale = abs(int(self.value.as_tuple().exponent)) unscaled = decimal_to_unscaled(self.value) return DecimalLiteral(unscaled_to_decimal(unscaled + 1, original_scale)) def decrement(self) -> Literal[Decimal]: original_scale = abs(int(self.value.as_tuple().exponent)) unscaled = decimal_to_unscaled(self.value) return DecimalLiteral(unscaled_to_decimal(unscaled - 1, original_scale)) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert DecimalLiteral into {type_var}") @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: if type_var.scale == abs(int(self.value.as_tuple().exponent)): return self raise ValueError(f"Could not convert {self.value} into a {type_var}") @to.register(IntegerType) def _(self, _: IntegerType) -> Literal[int]: value_int = int(self.value.to_integral_value()) if value_int > IntegerType.max: return IntAboveMax() elif value_int < IntegerType.min: return IntBelowMin() else: return LongLiteral(value_int) @to.register(LongType) def _(self, _: LongType) -> Literal[int]: value_int = int(self.value.to_integral_value()) if value_int > LongType.max: return IntAboveMax() elif value_int < LongType.min: return IntBelowMin() else: return LongLiteral(value_int) @to.register(FloatType) def _(self, _: FloatType) -> Literal[float]: value_float = float(self.value) if value_float > FloatType.max: return FloatAboveMax() elif value_float < FloatType.min: return FloatBelowMin() else: return FloatLiteral(value_float) @to.register(DoubleType) def _(self, _: DoubleLiteral) -> Literal[float]: return DoubleLiteral(float(self.value)) class StringLiteral(Literal[str]): def __init__(self, value: str) -> None: super().__init__(value, str) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert StringLiteral into {type_var}") @to.register(StringType) def _(self, _: StringType) -> Literal[str]: return self @to.register(IntegerType) def _(self, type_var: IntegerType) -> Literal[int]: try: number = int(float(self.value)) if IntegerType.max < number: return IntAboveMax() elif IntegerType.min > number: return IntBelowMin() return LongLiteral(number) except ValueError as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(LongType) def _(self, type_var: LongType) -> Literal[int]: try: long_value = int(float(self.value)) if LongType.max < long_value: return LongAboveMax() elif LongType.min > long_value: return LongBelowMin() else: return LongLiteral(long_value) except (TypeError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(DateType) def _(self, type_var: DateType) -> Literal[int]: try: return DateLiteral(date_str_to_days(self.value)) except (TypeError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(TimeType) def _(self, type_var: TimeType) -> Literal[int]: try: return TimeLiteral(time_str_to_micros(self.value)) except (TypeError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(TimestampType) def _(self, _: TimestampType) -> Literal[int]: return TimestampLiteral(timestamp_to_micros(self.value)) @to.register(TimestamptzType) def _(self, _: TimestamptzType) -> Literal[int]: return TimestampLiteral(timestamptz_to_micros(self.value)) @to.register(UUIDType) def _(self, _: UUIDType) -> Literal[bytes]: return UUIDLiteral(UUID(self.value).bytes) @to.register(DecimalType) def _(self, type_var: DecimalType) -> Literal[Decimal]: dec = Decimal(self.value) scale = abs(int(dec.as_tuple().exponent)) if type_var.scale == scale: return DecimalLiteral(dec) else: raise ValueError(f"Could not convert {self.value} into a {type_var}, scales differ {type_var.scale} <> {scale}") @to.register(BooleanType) def _(self, type_var: BooleanType) -> Literal[bool]: value_upper = self.value.upper() if value_upper in ["TRUE", "FALSE"]: return BooleanLiteral(value_upper == "TRUE") else: raise ValueError(f"Could not convert {self.value} into a {type_var}") @to.register(FloatType) def _(self, type_var: FloatType) -> Literal[float]: try: number = float(self.value) if FloatType.max < number: return FloatAboveMax() elif FloatType.min > number: return FloatBelowMin() return FloatLiteral(number) except ValueError as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(DoubleType) def _(self, type_var: DoubleType) -> Literal[float]: try: number = float(self.value) return DoubleLiteral(number) except ValueError as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e def __repr__(self) -> str: """Return the string representation of the StringLiteral class.""" return f"literal({repr(self.value)})" class UUIDLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert UUIDLiteral into {type_var}") @to.register(UUIDType) def _(self, _: UUIDType) -> Literal[bytes]: return self @to.register(FixedType) def _(self, type_var: FixedType) -> Literal[bytes]: if len(type_var) == UUID_BYTES_LENGTH: return FixedLiteral(self.value) else: raise TypeError( f"Cannot convert UUIDLiteral into {type_var}, different length: {len(type_var)} <> {UUID_BYTES_LENGTH}" ) @to.register(BinaryType) def _(self, _: BinaryType) -> Literal[bytes]: return BinaryLiteral(self.value) class FixedLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert FixedLiteral into {type_var}") @to.register(FixedType) def _(self, type_var: FixedType) -> Literal[bytes]: if len(self.value) == len(type_var): return self else: raise ValueError( f"Could not convert {self.value!r} into a {type_var}, lengths differ {len(self.value)} <> {len(type_var)}" ) @to.register(BinaryType) def _(self, _: BinaryType) -> Literal[bytes]: return BinaryLiteral(self.value) @to.register(UUIDType) def _(self, type_var: UUIDType) -> Literal[bytes]: if len(self.value) == UUID_BYTES_LENGTH: return UUIDLiteral(self.value) else: raise TypeError( f"Could not convert {self.value!r} into a {type_var}, lengths differ {len(self.value)} <> {UUID_BYTES_LENGTH}" ) class BinaryLiteral(Literal[bytes]): def __init__(self, value: bytes) -> None: super().__init__(value, bytes) @singledispatchmethod def to(self, type_var: IcebergType) -> Literal: # type: ignore raise TypeError(f"Cannot convert BinaryLiteral into {type_var}") @to.register(BinaryType) def _(self, _: BinaryType) -> Literal[bytes]: return self @to.register(FixedType) def _(self, type_var: FixedType) -> Literal[bytes]: if len(type_var) == len(self.value): return FixedLiteral(self.value) else: raise TypeError( f"Cannot convert BinaryLiteral into {type_var}, different length: {len(type_var)} <> {len(self.value)}" ) @to.register(UUIDType) def _(self, type_var: UUIDType) -> Literal[bytes]: if len(self.value) == UUID_BYTES_LENGTH: return UUIDLiteral(self.value) else: raise TypeError( f"Cannot convert BinaryLiteral into {type_var}, different length: {UUID_BYTES_LENGTH} <> {len(self.value)}" )