def convert_type_to_large()

in smallpond/io/arrow.py [0:0]


def convert_type_to_large(type_: arrow.DataType) -> arrow.DataType:
    """
    Convert all string and binary types to large types recursively.
    """
    # Since arrow uses 32-bit signed offsets for string and binary types, convert all string and binary columns
    # to large_string and large_binary to avoid offset overflow, see https://issues.apache.org/jira/browse/ARROW-17828.
    if arrow.types.is_string(type_):
        return arrow.large_string()
    elif arrow.types.is_binary(type_):
        return arrow.large_binary()
    elif isinstance(type_, arrow.ListType):
        return arrow.list_(convert_type_to_large(type_.value_type))
    elif isinstance(type_, arrow.StructType):
        return arrow.struct(
            [
                arrow.field(
                    field.name,
                    convert_type_to_large(field.type),
                    nullable=field.nullable,
                )
                for field in type_
            ]
        )
    elif isinstance(type_, arrow.MapType):
        return arrow.map_(
            convert_type_to_large(type_.key_type),
            convert_type_to_large(type_.item_type),
        )
    else:
        return type_