in src/datasets/features/features.py [0:0]
def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[str, Union[str, bool, None]]] = None):
"""Decode a nested example.
This is used since some features (in particular Audio and Image) have some logic during decoding.
To avoid iterating over possibly long lists, it first checks (recursively) if the first element that is not None or empty (if it is a sequence) has to be decoded.
If the first element needs to be decoded, then all the elements of the list will be decoded, otherwise they'll stay the same.
"""
# Nested structures: we allow dict, list/tuples, sequences
if isinstance(schema, dict):
return (
{k: decode_nested_example(sub_schema, sub_obj) for k, (sub_schema, sub_obj) in zip_dict(schema, obj)}
if obj is not None
else None
)
elif isinstance(schema, (list, tuple)):
sub_schema = schema[0]
if obj is None:
return None
else:
if len(obj) > 0:
for first_elmt in obj:
if _check_non_null_non_empty_recursive(first_elmt, sub_schema):
break
if decode_nested_example(sub_schema, first_elmt) != first_elmt:
return [decode_nested_example(sub_schema, o) for o in obj]
return list(obj)
elif isinstance(schema, (LargeList, List)):
if obj is None:
return None
else:
sub_schema = schema.feature
if len(obj) > 0:
for first_elmt in obj:
if _check_non_null_non_empty_recursive(first_elmt, sub_schema):
break
if decode_nested_example(sub_schema, first_elmt) != first_elmt:
return [decode_nested_example(sub_schema, o) for o in obj]
return list(obj)
# Object with special decoding:
elif hasattr(schema, "decode_example") and getattr(schema, "decode", True):
# we pass the token to read and decode files from private repositories in streaming mode
return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) if obj is not None else None
return obj