in yourbench/utils/dataset_engine.py [0:0]
def _load_hub(repo_id: str, subset: str | None, token: str | None) -> Dataset:
"""Load dataset from HuggingFace Hub."""
logger.info(f"Loading '{subset or 'default'}' from Hub: {repo_id}")
try:
dataset = load_dataset(repo_id, name=subset, split="train", token=token)
if len(dataset) == 0:
raise ValueError(f"Dataset from Hub is empty (repo: {repo_id}, subset: {subset})")
return dataset
except ValueError as e:
if "BuilderConfig" in str(e) and "not found" in str(e):
raise ConfigurationError(f"Subset '{subset}' not found on Hub") from e
if "split" in str(e):
raise ConfigurationError("Split 'train' not found in dataset") from e
raise