def transform()

in pyiceberg/transforms.py [0:0]


    def transform(self, source: IcebergType, bucket: bool = True) -> Callable[[Optional[Any]], Optional[int]]:
        if isinstance(source, TimeType):

            def hash_func(v: Any) -> int:
                if isinstance(v, py_datetime.time):
                    v = datetime.time_to_micros(v)

                return mmh3.hash(struct.pack("<q", v))

        elif isinstance(source, DateType):

            def hash_func(v: Any) -> int:
                if isinstance(v, py_datetime.date):
                    v = datetime.date_to_days(v)

                return mmh3.hash(struct.pack("<q", v))

        elif isinstance(source, (TimestampType, TimestamptzType)):

            def hash_func(v: Any) -> int:
                if isinstance(v, py_datetime.datetime):
                    v = datetime.datetime_to_micros(v)

                return mmh3.hash(struct.pack("<q", v))

        elif isinstance(source, (TimestampNanoType, TimestamptzNanoType)):

            def hash_func(v: Any) -> int:
                # In order to bucket TimestampNano the same as Timestamp
                # convert to micros before hashing.
                if isinstance(v, py_datetime.datetime):
                    v = datetime.datetime_to_micros(v)
                else:
                    v = datetime.nanos_to_micros(v)

                return mmh3.hash(struct.pack("<q", v))

        elif isinstance(source, (IntegerType, LongType)):

            def hash_func(v: Any) -> int:
                return mmh3.hash(struct.pack("<q", v))

        elif isinstance(source, DecimalType):

            def hash_func(v: Any) -> int:
                return mmh3.hash(decimal_to_bytes(v))

        elif isinstance(source, (StringType, FixedType, BinaryType)):

            def hash_func(v: Any) -> int:
                return mmh3.hash(v)

        elif isinstance(source, UUIDType):

            def hash_func(v: Any) -> int:
                if isinstance(v, UUID):
                    return mmh3.hash(v.bytes)
                return mmh3.hash(v)

        else:
            raise ValueError(f"Unknown type {source}")

        if bucket:
            return lambda v: (hash_func(v) & IntegerType.max) % self._num_buckets if v is not None else None
        return hash_func