in libs/libcommon/src/libcommon/orchestrator.py [0:0]
def get_num_bytes_from_config_infos(dataset: str, config: str, split: Optional[str] = None) -> Optional[int]:
try:
resp = get_response(kind=CONFIG_INFO_KIND, dataset=dataset, config=config)
except CachedArtifactNotFoundError:
return None
if "dataset_info" in resp["content"] and isinstance(resp["content"]["dataset_info"], dict):
dataset_info = resp["content"]["dataset_info"]
if split is None:
num_bytes = dataset_info.get("dataset_size")
if isinstance(num_bytes, int):
return num_bytes
elif "splits" in dataset_info and isinstance(dataset_info["splits"], dict):
split_infos = dataset_info["splits"]
if split in split_infos and isinstance(split_infos[split], dict):
split_info = split_infos[split]
num_bytes = split_info.get("num_bytes")
if isinstance(num_bytes, int):
return num_bytes
return None