in src/datasets/features/features.py [0:0]
def encode_nested_example(schema, obj, level=0):
"""Encode a nested example.
This is used since some features (in particular ClassLabel) have some logic during encoding.
To avoid iterating over possibly long lists, it first checks (recursively) if the first element that is not None or empty (if it is a sequence) has to be encoded.
If the first element needs to be encoded, then all the elements of the list will be encoded, otherwise they'll stay the same.
"""
# Nested structures: we allow dict, list/tuples, sequences
if isinstance(schema, dict):
if level == 0 and obj is None:
raise ValueError("Got None but expected a dictionary instead")
return (
{k: encode_nested_example(schema[k], obj.get(k), level=level + 1) for k in schema}
if obj is not None
else None
)
elif isinstance(schema, (LargeList, List)):
if obj is None:
return None
else:
if len(obj) > 0:
sub_schema = schema.feature
for first_elmt in obj:
if _check_non_null_non_empty_recursive(first_elmt, sub_schema):
break
try:
changed = bool(encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt)
except ValueError: # can happen when comparing arrays
changed = False
if changed:
return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj]
return list(obj)
# Object with special encoding:
# ClassLabel will convert from string to int, TranslationVariableLanguages does some checks
elif hasattr(schema, "encode_example"):
return schema.encode_example(obj) if obj is not None else None
# Other object should be directly convertible to a native Arrow type (like Translation and Translation)
return obj