in manage.py [0:0]
def resolve_dataset(args, task: str):
import datasets
builder = datasets.load_dataset_builder(
args.dataset_name, use_auth_token=args.token
)
if args.dataset_config is None:
args.dataset_config = builder.config_id
print(f"Inferred dataset_config {args.dataset_config}")
splits = builder.info.splits
if splits is not None:
if args.dataset_split not in splits:
raise ValueError(
f"The split `{args.dataset_split}` is not a valid split, please choose from {','.join(splits.keys())}"
)
task_templates = builder.info.task_templates
if task_templates is not None:
for task_template in task_templates:
if task_template.task == task:
args.dataset_column = task_template.audio_file_path_column
print(f"Inferred dataset_column {args.dataset_column}")
return (
args.dataset_name,
args.dataset_config,
args.dataset_split,
args.dataset_column,
)