in src/datasets/table.py [0:0]
def embed_array_storage(array: pa.Array, feature: "FeatureType", token_per_repo_id=None):
"""Embed data into an arrays's storage.
For custom features like Audio or Image, it takes into account the "embed_storage" methods
they define to embed external data (e.g. an image file) into an array.
<Added version="2.4.0"/>
Args:
array (`pa.Array`):
The PyArrow array in which to embed data.
feature (`datasets.features.FeatureType`):
Array features.
Raises:
`TypeError`: if the target type is not supported according, e.g.
- if a field is missing
Returns:
array (`pyarrow.Array`): the casted array
"""
from .features import LargeList, List
_e = partial(embed_array_storage, token_per_repo_id=token_per_repo_id)
if isinstance(array, pa.ExtensionArray):
array = array.storage
if hasattr(feature, "embed_storage"):
return feature.embed_storage(array, token_per_repo_id=token_per_repo_id)
elif pa.types.is_struct(array.type):
# feature must be a dict
if isinstance(feature, dict):
arrays = [_e(array.field(name), subfeature) for name, subfeature in feature.items()]
return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
elif pa.types.is_list(array.type):
# feature must be either List(subfeature)
# Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
array_offsets = _combine_list_array_offsets_with_mask(array)
if isinstance(feature, List) and feature.length == -1:
return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
elif pa.types.is_large_list(array.type):
# feature must be LargeList(subfeature)
# Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
array_offsets = _combine_list_array_offsets_with_mask(array)
return pa.LargeListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
elif pa.types.is_fixed_size_list(array.type):
# feature must be List(subfeature)
if isinstance(feature, List) and feature.length > -1:
array_values = array.values[
array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
]
embedded_array_values = _e(array_values, feature.feature)
return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
if not isinstance(feature, (List, LargeList, dict)):
return array
raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")