in src/app.py [0:0]
def output(dcm):
log.debug(f'Convert data structure to dataframe')
df = pd.DataFrame.from_dict(dcm.img_list)
# Drop columns with all NONE Values
df.dropna(axis=1, how='all', inplace=True)
session = boto3.session.Session(region_name=S3_OUTPUT_BUCKET_REGION)
try:
parquet = wr.s3.to_parquet(
df=df,
boto3_session=session,
path=f's3://{S3_OUTPUT_BUCKET}/',
compression='snappy',
dataset=True,
sanitize_columns=True,
s3_additional_kwargs={
'ServerSideEncryption': 'AES256',
# Truncate long string for tags
'Tagging': parse.urlencode({"S3_BUCKET": dcm.source_s3_bucket[-128:], "S3_KEY": dcm.source_s3_key[-256:]})
},
partition_cols={
PARTITION_COL,
},
)
log.info(f'Completed output, {parquet}')
return parquet
except Exception as e:
log.error(f'Unable to convert df to parquet')
log.error(e)
raise