def compute_dataset_info_response()

in services/worker/src/worker/job_runners/dataset/info.py [0:0]


def compute_dataset_info_response(dataset: str) -> tuple[DatasetInfoResponse, float]:
    """
    Get the response of 'dataset-info' for one specific dataset on huggingface.co.

    Args:
        dataset (`str`):
            A namespace (user or an organization) and a repo name separated by a `/`.

    Raises:
        [~`libcommon.simple_cache.CachedArtifactError`]:
            If the previous step gave an error.
        [~`libcommon.exceptions.PreviousStepFormatError`]:
            If the content of the previous step doesn't have the expected format.

    Returns:
        `tuple[DatasetInfoResponse, float]`: Tuple of an object with the dataset_info response and
            progress float value from 0. to 1. which corresponds to the percentage of dataset configs
            correctly processed and included in current response (some configs might not exist in cache yet
            or raise errors).
    """
    logging.info(f"compute 'dataset-info' for {dataset=}")

    config_names_response = get_previous_step_or_raise(kind="dataset-config-names", dataset=dataset)
    content = config_names_response["content"]
    if "config_names" not in content:
        raise PreviousStepFormatError("Previous step did not return the expected content: 'config_names'.")

    try:
        config_infos: dict[str, Any] = {}
        total = 0
        pending, failed = [], []
        partial = False
        for config_item in content["config_names"]:
            config = config_item["config"]
            total += 1
            try:
                config_response = get_response(kind="config-info", dataset=dataset, config=config)
            except CachedArtifactNotFoundError:
                logging.debug(f"No response found in previous step for {dataset=} {config=}: 'config-info'.")
                pending.append(
                    PreviousJob(
                        kind="config-info",
                        dataset=dataset,
                        config=config,
                        split=None,
                    )
                )
                continue
            if config_response["http_status"] != HTTPStatus.OK:
                logging.debug(f"Previous step gave an error: {config_response['http_status']}")
                failed.append(
                    PreviousJob(
                        kind="config-info",
                        dataset=dataset,
                        config=config,
                        split=None,
                    )
                )
                continue
            config_infos[config] = config_response["content"]["dataset_info"]
            partial = partial or config_response["content"]["partial"]

    except Exception as e:
        raise PreviousStepFormatError("Previous step did not return the expected content.", e) from e

    progress = (total - len(pending)) / total if total else 1.0

    return DatasetInfoResponse(dataset_info=config_infos, pending=pending, failed=failed, partial=partial), progress