def get_num_bytes_from_config_infos()

in libs/libcommon/src/libcommon/orchestrator.py [0:0]


def get_num_bytes_from_config_infos(dataset: str, config: str, split: Optional[str] = None) -> Optional[int]:
    try:
        resp = get_response(kind=CONFIG_INFO_KIND, dataset=dataset, config=config)
    except CachedArtifactNotFoundError:
        return None
    if "dataset_info" in resp["content"] and isinstance(resp["content"]["dataset_info"], dict):
        dataset_info = resp["content"]["dataset_info"]
        if split is None:
            num_bytes = dataset_info.get("dataset_size")
            if isinstance(num_bytes, int):
                return num_bytes
        elif "splits" in dataset_info and isinstance(dataset_info["splits"], dict):
            split_infos = dataset_info["splits"]
            if split in split_infos and isinstance(split_infos[split], dict):
                split_info = split_infos[split]
                num_bytes = split_info.get("num_bytes")
                if isinstance(num_bytes, int):
                    return num_bytes
    return None