in paq/download.py [0:0]
def download(resource_key: str, out_dir: str = None):
if resource_key not in RESOURCES_MAP:
# match by prefix
resources = [k for k in RESOURCES_MAP.keys() if k.startswith(resource_key)]
if resources:
for key in resources:
download(key, out_dir)
else:
logger.info("no resources found for specified key")
return []
download_info = RESOURCES_MAP[resource_key]
if "skip_if_exists_path" in download_info:
root_dir = _get_root_dir(out_dir)
save_root = os.path.join(root_dir, "data", download_info['skip_if_exists_path'])
if os.path.exists(save_root):
logger.info(f"Resource: {resource_key} already exists here: {save_root}, "
f"delete this directory to force re-download")
return []
s3_url = download_info["s3_url"]
save_root_dir = None
data_files = []
if isinstance(s3_url, list):
if isinstance(download_info["original_ext"], str):
exts = [download_info["original_ext"] for _ in s3_url]
else:
exts = download_info['original_ext']
for i, (url, ext) in enumerate(zip(s3_url, exts)):
save_root_dir, local_file = download_resource(
url,
ext,
download_info["compressed"],
resource_key,
# "{}_{}".format(resource_key, i),
out_dir,
True
)
data_files.append(local_file)
else:
save_root_dir, local_file = download_resource(
s3_url,
download_info["original_ext"],
download_info["compressed"],
resource_key,
out_dir,
)
data_files.append(local_file)
license_files = download_info.get("license_files", None)
if license_files:
download_file(license_files[0], save_root_dir, "LICENSE")
download_file(license_files[1], save_root_dir, "README")
return data_files