in dpr/data/download_data.py [0:0]
def download(resource_key: str, out_dir: str = None):
if resource_key not in RESOURCES_MAP:
# match by prefix
resources = [k for k in RESOURCES_MAP.keys() if k.startswith(resource_key)]
print("matched by prefix resources: ", resources)
if resources:
for key in resources:
download(key, out_dir)
else:
logger.info("no resources found for specified key")
return []
download_info = RESOURCES_MAP[resource_key]
s3_url = download_info["s3_url"]
save_root_dir = None
data_files = []
if isinstance(s3_url, list):
for i, url in enumerate(s3_url):
save_root_dir, local_file = download_resource(
url,
download_info["original_ext"],
download_info["compressed"],
"{}_{}".format(resource_key, i),
out_dir,
)
data_files.append(local_file)
else:
save_root_dir, local_file = download_resource(
s3_url,
download_info["original_ext"],
download_info["compressed"],
resource_key,
out_dir,
)
data_files.append(local_file)
license_files = download_info.get("license_files", None)
if license_files:
download_file(license_files[0], save_root_dir, "LICENSE")
download_file(license_files[1], save_root_dir, "README")
return data_files