def _cast_if_needed()

in pyiceberg/io/pyarrow.py [0:0]
36 lines of code
25 McCabe index (conditional complexity)

    def _cast_if_needed(self, field: NestedField, values: pa.Array) -> pa.Array:
        file_field = self._file_schema.find_field(field.field_id)

        if field.field_type.is_primitive:
            if field.field_type != file_field.field_type:
                target_schema = schema_to_pyarrow(
                    promote(file_field.field_type, field.field_type), include_field_ids=self._include_field_ids
                )
                if self._use_large_types is False:
                    target_schema = _pyarrow_schema_ensure_small_types(target_schema)
                return values.cast(target_schema)
            elif (target_type := schema_to_pyarrow(field.field_type, include_field_ids=self._include_field_ids)) != values.type:
                if field.field_type == TimestampType():
                    # Downcasting of nanoseconds to microseconds
                    if (
                        pa.types.is_timestamp(target_type)
                        and not target_type.tz
                        and pa.types.is_timestamp(values.type)
                        and not values.type.tz
                    ):
                        if target_type.unit == "us" and values.type.unit == "ns" and self._downcast_ns_timestamp_to_us:
                            return values.cast(target_type, safe=False)
                        elif target_type.unit == "us" and values.type.unit in {"s", "ms"}:
                            return values.cast(target_type)
                    raise ValueError(f"Unsupported schema projection from {values.type} to {target_type}")
                elif field.field_type == TimestamptzType():
                    if (
                        pa.types.is_timestamp(target_type)
                        and target_type.tz == "UTC"
                        and pa.types.is_timestamp(values.type)
                        and values.type.tz in UTC_ALIASES
                    ):
                        if target_type.unit == "us" and values.type.unit == "ns" and self._downcast_ns_timestamp_to_us:
                            return values.cast(target_type, safe=False)
                        elif target_type.unit == "us" and values.type.unit in {"s", "ms", "us"}:
                            return values.cast(target_type)
                    raise ValueError(f"Unsupported schema projection from {values.type} to {target_type}")
        return values