in scripts/download_demo_data.py [0:0]
def _download_demo_data(bucket_name: str, bucket_key_prefix: str, download_files: List[str]) -> None:
# Verify existence of demo data, conditionally download the demo data and upload to toolkit bucket.
# Prepare file names list
download_file_names = [fname.split("/")[-1] for fname in download_files]
# Find list of objects in S3 path
response = s3.list_s3_objects(bucket_name, bucket_key_prefix)
s3_data_files = []
if "Contents" in response.keys():
for obj in response["Contents"]:
file_name = obj["Key"].split("/")[-1]
if file_name:
s3_data_files.append(file_name)
_logger.info(f"data_files={s3_data_files}")
# Check if required files are matching
files_to_download = set(download_file_names) - set(s3_data_files)
if files_to_download:
downloaded_files = []
remote_dir = os.path.join(os.getcwd(), "data")
# Download specific files and upload to S3 bucket path
for fp in download_files:
file_name = fp.split("/")[-1]
if file_name in files_to_download:
sh.run(f"wget {fp} -P {remote_dir} -q")
downloaded_files.append(fp)
# Uploading to S3 data files path
remote_src_file = os.path.join(remote_dir, file_name)
s3_key_prefix = f"{bucket_key_prefix}{file_name}"
_logger.info(f"uploading {remote_src_file} to s3 {bucket_name}/{s3_key_prefix}")
s3.upload_file(remote_src_file, bucket_name, s3_key_prefix)
_logger.info(f"Downloaded CSM data from {downloaded_files}")
else:
# Verify the S3 path
_logger.info("Data files are up to date. No need to download")
response = s3.list_s3_objects(bucket_name, bucket_key_prefix)
if "Contents" in response.keys():
for obj in response["Contents"]:
_logger.debug(obj["Key"])