in pyiceberg/transforms.py [0:0]
def transform(self, source: IcebergType, bucket: bool = True) -> Callable[[Optional[Any]], Optional[int]]:
if isinstance(source, TimeType):
def hash_func(v: Any) -> int:
if isinstance(v, py_datetime.time):
v = datetime.time_to_micros(v)
return mmh3.hash(struct.pack("<q", v))
elif isinstance(source, DateType):
def hash_func(v: Any) -> int:
if isinstance(v, py_datetime.date):
v = datetime.date_to_days(v)
return mmh3.hash(struct.pack("<q", v))
elif isinstance(source, (TimestampType, TimestamptzType)):
def hash_func(v: Any) -> int:
if isinstance(v, py_datetime.datetime):
v = datetime.datetime_to_micros(v)
return mmh3.hash(struct.pack("<q", v))
elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):
def hash_func(v: Any) -> int:
# In order to bucket TimestampNano the same as Timestamp
# convert to micros before hashing.
if isinstance(v, py_datetime.datetime):
v = datetime.datetime_to_micros(v)
else:
v = datetime.nanos_to_micros(v)
return mmh3.hash(struct.pack("<q", v))
elif isinstance(source, (IntegerType, LongType)):
def hash_func(v: Any) -> int:
return mmh3.hash(struct.pack("<q", v))
elif isinstance(source, DecimalType):
def hash_func(v: Any) -> int:
return mmh3.hash(decimal_to_bytes(v))
elif isinstance(source, (StringType, FixedType, BinaryType)):
def hash_func(v: Any) -> int:
return mmh3.hash(v)
elif isinstance(source, UUIDType):
def hash_func(v: Any) -> int:
if isinstance(v, UUID):
return mmh3.hash(v.bytes)
return mmh3.hash(v)
else:
raise ValueError(f"Unknown type {source}")
if bucket:
return lambda v: (hash_func(v) & IntegerType.max) % self._num_buckets if v is not None else None
return hash_func