in src/datasets/features/features.py [0:0]
def _from_yaml_list(cls, yaml_data: list) -> "Features":
yaml_data = copy.deepcopy(yaml_data)
# we convert the list obtained from YAML data into the dict representation that is used for JSON dump
def unsimplify(feature: dict) -> dict:
if not isinstance(feature, dict):
raise TypeError(f"Expected a dict but got a {type(feature)}: {feature}")
for list_type in ["large_list", "list", "sequence"]:
#
# list_type: int32 -> list_type:
# -> dtype: int32
#
if isinstance(feature.get(list_type), str):
feature[list_type] = {"dtype": feature[list_type]}
#
# class_label: -> class_label:
# names: -> names:
# '0': negative -> - negative
# '1': positive -> - positive
#
if isinstance(feature.get("class_label"), dict) and isinstance(feature["class_label"].get("names"), dict):
label_ids = sorted(feature["class_label"]["names"], key=int)
if label_ids and [int(label_id) for label_id in label_ids] != list(range(int(label_ids[-1]) + 1)):
raise ValueError(
f"ClassLabel expected a value for all label ids [0:{int(label_ids[-1]) + 1}] but some ids are missing."
)
feature["class_label"]["names"] = [feature["class_label"]["names"][label_id] for label_id in label_ids]
return feature
def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:
if isinstance(obj, dict):
if not obj:
return {}
_type = next(iter(obj))
if _type == "large_list":
_feature = from_yaml_inner(unsimplify(obj).pop(_type))
return {"feature": _feature, **obj, "_type": "LargeList"}
if _type == "sequence": # backward compatibility
if isinstance(obj[_type], list):
_feature = from_yaml_inner(unsimplify(obj).pop(_type))
return {
name: {"feature": _subfeature, **obj, "_type": "List"}
for name, _subfeature in _feature.items()
}
else:
_feature = from_yaml_inner(unsimplify(obj).pop(_type))
return {"feature": _feature, **obj, "_type": "List"}
if _type == "list":
_feature = from_yaml_inner(unsimplify(obj).pop(_type))
return {"feature": _feature, **obj, "_type": "List"}
if _type == "struct":
return from_yaml_inner(obj["struct"])
elif _type == "dtype":
if isinstance(obj["dtype"], str):
# e.g. int32, float64, string, audio, image
try:
Value(obj["dtype"])
return {**obj, "_type": "Value"}
except ValueError:
# e.g. Audio, Image, ArrayXD
return {"_type": snakecase_to_camelcase(obj["dtype"])}
else:
return from_yaml_inner(obj["dtype"])
else:
return {"_type": snakecase_to_camelcase(_type), **unsimplify(obj)[_type]}
elif isinstance(obj, list):
names = [_feature.pop("name") for _feature in obj]
return {name: from_yaml_inner(_feature) for name, _feature in zip(names, obj)}
else:
raise TypeError(f"Expected a dict or a list but got {type(obj)}: {obj}")
return cls.from_dict(from_yaml_inner(yaml_data))