def embed_array_storage()

in src/datasets/table.py [0:0]


def embed_array_storage(array: pa.Array, feature: "FeatureType", token_per_repo_id=None):
    """Embed data into an arrays's storage.
    For custom features like Audio or Image, it takes into account the "embed_storage" methods
    they define to embed external data (e.g. an image file) into an array.

    <Added version="2.4.0"/>

    Args:
        array (`pa.Array`):
            The PyArrow array in which to embed data.
        feature (`datasets.features.FeatureType`):
            Array features.

    Raises:
        `TypeError`: if the target type is not supported according, e.g.

            - if a field is missing

    Returns:
         array (`pyarrow.Array`): the casted array
    """
    from .features import LargeList, List

    _e = partial(embed_array_storage, token_per_repo_id=token_per_repo_id)

    if isinstance(array, pa.ExtensionArray):
        array = array.storage
    if hasattr(feature, "embed_storage"):
        return feature.embed_storage(array, token_per_repo_id=token_per_repo_id)
    elif pa.types.is_struct(array.type):
        # feature must be a dict
        if isinstance(feature, dict):
            arrays = [_e(array.field(name), subfeature) for name, subfeature in feature.items()]
            return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
    elif pa.types.is_list(array.type):
        # feature must be either List(subfeature)
        # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
        array_offsets = _combine_list_array_offsets_with_mask(array)
        if isinstance(feature, List) and feature.length == -1:
            return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
    elif pa.types.is_large_list(array.type):
        # feature must be LargeList(subfeature)
        # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
        array_offsets = _combine_list_array_offsets_with_mask(array)
        return pa.LargeListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
    elif pa.types.is_fixed_size_list(array.type):
        # feature must be List(subfeature)
        if isinstance(feature, List) and feature.length > -1:
            array_values = array.values[
                array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
            ]
            embedded_array_values = _e(array_values, feature.feature)
            return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
    if not isinstance(feature, (List, LargeList, dict)):
        return array
    raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")