in functions/source/job-creation/job_creation.py [0:0]
def gen_data_dict(dataframe, bucket):
"""
Generate input manifest content.
"""
data = {}
mrn_id = dataframe['mrn'][0]
data['table'] = get_table(dataframe)
extra_columns = [
'comment_on_pathogen', 'BSI_type',
'other_pathogen', 'pathogen',
'commonnocasereason', 'IWP_comment',
'alternate_diagnosis', 'decision']
for column in extra_columns:
if column in dataframe.columns:
if column == 'pathogen':
data[column] = map_pathogen_name_to_id(dataframe[column][0])
else:
data[column] = dataframe[column][0]
if 'clabsi' in dataframe.columns:
data['collection_class'] = []
count = 0
collection_date = ''
for index in dataframe.index:
if collection_date == dataframe.loc[index, 'collection_dt_tm']:
count += 1
else:
count = 1
collection_date = dataframe.loc[index, 'collection_dt_tm']
collection_count = '{collection_date}_{number}'.format(
collection_date=pd.to_datetime(dataframe.loc[
index, 'collection_dt_tm']).strftime('%Y-%m-%d'),
number=count,
)
if str(dataframe.loc[index, 'clabsi']).lower() =='true':
print('i passed true test!', dataframe.loc[index, 'clabsi'].astype(str))
data['collection_class'].append((collection_count))
print(data['collection_class'])
if 'comment' in dataframe.columns:
if str(dataframe['comment'][0]) not in [
'None', '(Null)', 'nan', 'Non']:
data['comment'] = str(dataframe['comment'][0])
if 'new_comment' in dataframe.columns:
if str(dataframe['new_comment'][0]) not in [
'None', '(Null)', 'nan', 'Non']:
data['comment'] = " ".join([data['comment'], "----", str(
dataframe['new_comment'][0])])
data['sourcetimelineimg'] = f's3://{bucket}/images/{mrn_id}/timeline.png'
data['source-ref'] = str(mrn_id)
# Listing all the plots existing under patient mrn folder in s3
responses = boto3.client('s3').list_objects(
Bucket=bucket,
Delimiter=f'images/{mrn_id}/IWP/plots_',
Prefix=f'images/{mrn_id}/IWP/plots_',
)
plot_list = []
for response in responses['Contents']:
plot_list.append(response['Key'])
# Creating a list for Infection Window plots
data['iwp_plots'] = {}
plots = sorted(list(set(plot_list)))
for index, collection_time in enumerate(
sorted(list(data['table'].keys()))):
data['iwp_plots'][collection_time] = f's3://{bucket}/{plots[index]}'
print(data['iwp_plots'])
return data