in src/datasets/features/features.py [0:0]
def _to_yaml_list(self) -> list:
# we compute the YAML list from the dict representation that is used for JSON dump
yaml_data = self.to_dict()
def simplify(feature: dict) -> dict:
if not isinstance(feature, dict):
raise TypeError(f"Expected a dict but got a {type(feature)}: {feature}")
for list_type in ["large_list", "list", "sequence"]:
#
# list_type: -> list_type: int32
# dtype: int32 ->
#
if isinstance(feature.get(list_type), dict) and list(feature[list_type]) == ["dtype"]:
feature[list_type] = feature[list_type]["dtype"]
#
# list_type: -> list_type:
# struct: -> - name: foo
# - name: foo -> dtype: int32
# dtype: int32 ->
#
if isinstance(feature.get(list_type), dict) and list(feature[list_type]) == ["struct"]:
feature[list_type] = feature[list_type]["struct"]
#
# class_label: -> class_label:
# names: -> names:
# - negative -> '0': negative
# - positive -> '1': positive
#
if isinstance(feature.get("class_label"), dict) and isinstance(feature["class_label"].get("names"), list):
# server-side requirement: keys must be strings
feature["class_label"]["names"] = {
str(label_id): label_name for label_id, label_name in enumerate(feature["class_label"]["names"])
}
return feature
def to_yaml_inner(obj: Union[dict, list]) -> dict:
if isinstance(obj, dict):
_type = obj.pop("_type", None)
if _type == "LargeList":
_feature = obj.pop("feature")
return simplify({"large_list": to_yaml_inner(_feature), **obj})
elif _type == "List":
_feature = obj.pop("feature")
return simplify({"list": to_yaml_inner(_feature), **obj})
elif _type == "Value":
return obj
elif _type and not obj:
return {"dtype": camelcase_to_snakecase(_type)}
elif _type:
return {"dtype": simplify({camelcase_to_snakecase(_type): obj})}
else:
return {"struct": [{"name": name, **to_yaml_inner(_feature)} for name, _feature in obj.items()]}
elif isinstance(obj, list):
return simplify({"list": simplify(to_yaml_inner(obj[0]))})
elif isinstance(obj, tuple):
return to_yaml_inner(list(obj))
else:
raise TypeError(f"Expected a dict or a list but got {type(obj)}: {obj}")
def to_yaml_types(obj: dict) -> dict:
if isinstance(obj, dict):
return {k: to_yaml_types(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [to_yaml_types(v) for v in obj]
elif isinstance(obj, tuple):
return to_yaml_types(list(obj))
else:
return obj
return to_yaml_types(to_yaml_inner(yaml_data)["struct"])