in python/shared/research_pacs/shared/util.py [0:0]
def load_file(location, aws_region, content_type='str', s3_credentials=None):
"""
Load and return a file either from Amazon S3 or from a local (or locally mounted) file system.
Args:
location (str): File location in the format `s3://bucket/key` if the file is stored in Amazon
S3
aws_region (str): AWS region where the S3 bucket resides
content_type (str): Can be `str` to return a string, `bytes` to return encoded bytes, `json` to
parse a JSON document and return a dic, `yaml` to parse a YAML document and return a dict
s3_credentials (dict): Optional S3 credentials passed to the boto3 S3 client. If no
credentials are provided, we use the EC2 role or task role
"""
try:
logger.debug(f'Load the file "{location}" as "{content_type}"')
match = re.search('^s3:\/\/([^\/]+)\/(.+)$', location)
# Log the file from S3 if the location matches the S3 pattern
if match != None:
if s3_credentials != None:
s3 = boto3.client('s3', region_name=aws_region, **s3_credentials)
else:
s3 = boto3.client('s3', region_name=aws_region)
s3_response = s3.get_object(Bucket=match.group(1), Key=match.group(2))
content_bytes = s3_response['Body'].read()
# Otherwise, load the file from the local system, or locally-mounted file system
else:
with open(location, 'rb') as f:
content_bytes = f.read()
if content_type == 'bytes':
return content_bytes
elif content_type == 'str':
return content_bytes.decode()
elif content_type == 'json':
return json.loads(content_bytes.decode())
elif content_type == 'yaml':
return yaml.safe_load(content_bytes.decode())
except Exception as e:
msg_err = f'Failed to load the file {location} as "{content_type}" - {e}'
logger.debug(msg_err)
raise Exception(msg_err)