in services/worker/src/worker/job_runners/dataset/modalities.py [0:0]
def compute_modalities_response(dataset: str) -> DatasetModalitiesResponse:
"""
Get the response of 'dataset-modalities' for one specific dataset on huggingface.co.
Args:
dataset (`str`):
A namespace (user or an organization) and a repo name separated by a `/`.
Raises:
[~`libcommon.exceptions.PreviousStepFormatError`]:
If the content of the previous step has not the expected format
Returns:
`tuple[DatasetModalitiesResponse, float]`: An object with the modalities_response and the progress.
"""
logging.info(f"compute 'dataset-modalities' for {dataset=}")
modalities: set[DatasetModality] = set()
try:
modalities.update(detect_modalities_from_features(dataset))
except PreviousStepFormatError:
raise
except Exception:
logging.info(f"failed to detect modalities from features of {dataset=}")
pass
try:
modalities.update(detect_modalities_from_url_columns(dataset))
except PreviousStepFormatError:
raise
except Exception:
logging.info(f"failed to detect modalities from file types of {dataset=}")
pass
try:
modalities.update(detect_modalities_from_filetypes(dataset))
except PreviousStepFormatError:
raise
except Exception:
logging.info(f"failed to detect modalities from file types of {dataset=}")
pass
return DatasetModalitiesResponse(
{
"modalities": sorted(modalities),
}
)