in functions/source/loop/loop_lambda.py [0:0]
def write_json_on_s3(bucket, object_path, data, dataframe):
"""
Save json to a path
"""
dataframe = dataframe.copy()
# finding the decision from data object
try:
decision_dict = data['category']['caseInfo']['decision']
for key in decision_dict:
if decision_dict[key]:
decision = key
# if decision was not made and we will not increase review count,
# PR is set in clabsi-production lamdba, its increased by 1.
dataframe['decision'] = decision
if 'PR' in dataframe.columns:
dataframe.loc[:, 'PR'] = dataframe.loc[0, 'PR']
dataframe['PR'] = dataframe['PR'].astype('int32')
if decision == 'notsure':
print('lowering pr by 1')
if 'PR' in dataframe.columns:
dataframe['PR'] -= 1
if dataframe.loc[0, 'PR'] == 0:
dataframe['PR'] = 0
if dataframe.loc[0, 'PR'] == -1:
dataframe.drop(columns='PR', inplace=True)
worker_id = data['category']['workerId']
if 'PR' in dataframe.columns:
if dataframe['PR'][0] >= 1:
dataframe['second_reviewer_id'] = worker_id
else:
dataframe['first_reviewer_id'] = worker_id
job_creation_date = data['category-metadata']['creation-date']
dataframe['job_creation_date'] = job_creation_date
# setting send_to_physician column
# in dataframe according to dataManifest
if "send_to_physician" in data[
'category']['caseInfo']:
dataframe['send_to_physician'] = data[
'category']['caseInfo']['send_to_physician']
if "BSI_type" in data['category']['caseInfo']:
bsi_type_dict = data['category']['caseInfo']['bsi_type']
bsi_type = ''
for key in bsi_type_dict:
if bsi_type_dict[key]:
bsi_type = key
dataframe['BSI_type'] = bsi_type
# Blood Stream Infection subtype, if any specified
if bsi_type == 'LCBI 1' and data[
'category']['caseInfo'][
'MBI_LCBI_1']['MBI-LCBI 1']:
dataframe['BSI_subtype'] = 'MBI LCBI 1'
elif bsi_type == 'LCBI 2' and data[
'category']['caseInfo'][
'MBI_LCBI_2']['MBI-LCBI 2']:
dataframe['BSI_subtype'] = 'MBI LCBI 2'
elif bsi_type == 'LCBI 3' and data[
'category']['caseInfo'][
'MBI_LCBI_3']['MBI-LCBI 3']:
dataframe['BSI_subtype'] = 'MBI LCBI 3'
if "commonnocasereason" in data['category']['caseInfo']:
nocasereason = ''
if data['category']['caseInfo']['commonnocasereason']['RIT']:
nocasereason = 'RIT'
elif data['category']['caseInfo']['commonnocasereason']['POA']:
nocasereason = 'POA'
elif data['category']['caseInfo'][
'commonnocasereason']['Common excluded pathogen']:
nocasereason = 'Common excluded pathogen'
dataframe['commonnocasereason'] = nocasereason
if 'alternate_diagnosis' in data[
'category']['caseInfo']:
dataframe['alternate_diagnosis'] = data[
'category']['caseInfo']['alternate_diagnosis']
if "comment_on_pathogen" in data[
'category']['caseInfo']:
comment_on_pathogen = data[
'category']['caseInfo']['comment_on_pathogen']
dataframe['comment_on_pathogen'] = comment_on_pathogen
except KeyError:
print("output.manifest format problem: keyError problem")
try:
if 'pathogen' in data["category"]['caseInfo']:
dataframe['pathogen'] = map_pathogen_id_to_name(data[
"category"]['caseInfo']['pathogen'])
if 'other_pathogen' in data["category"]['caseInfo']:
dataframe['other_pathogen'] = data[
"category"]['caseInfo']['other_pathogen']
# concating all the comments so we dont lose the previous comments
if 'comment' in data['category']['caseInfo']:
if 'new_comment' in data['category']['caseInfo']:
dataframe['comment'] = " ".join(
[data['category']['caseInfo']['comment'],
"----", data['category']['caseInfo']['new_comment']])
else:
dataframe['comment'] = data['category']['caseInfo']['comment']
if 'new_comment' in data['category']['caseInfo']:
if 'comment' not in data['category']['caseInfo']:
dataframe['comment'] = data['category']['caseInfo']['new_comment']
# adding IWP_comments to the dataframe
if "IWP_comment" in data['category']['caseInfo']:
iwp_comment = data['category']['caseInfo']['IWP_comment']
dataframe['IWP_comment'] = iwp_comment
if "collection_class" in data['category']['caseInfo']:
# Format:
# "collection_class": {
# "2021-02-23_1": true,
# "2021-02-24_1": false
# },
print('collection_class is found')
collection_class = data['category']['caseInfo']['collection_class']
# Sort by collection dates
dataframe.sort_values(['collection_dt_tm'], inplace=True)
# Iterate through each row and generate the name: "<collection>_2"
count = 0
collection_date = ''
for index in dataframe.index:
if collection_date == dataframe.loc[index, 'collection_dt_tm']:
count += 1
else:
count = 1
collection_date = dataframe.loc[index, 'collection_dt_tm']
collection_count = '{collection_date}_{number}'.format(
collection_date=pd.to_datetime(dataframe.loc[
index, 'collection_dt_tm']).strftime('%Y-%m-%d'),
number=count,
)
# Figure out whether this collection_class was selected
if collection_count in collection_class:
checkmark = collection_class[collection_count]
else:
checkmark = False
# Record it in the dataframe
print(collection_count, checkmark)
dataframe.loc[index, 'clabsi'] = checkmark
except KeyError:
print('output.manifest format problem,\
["category"]["caseInfo"]["category"]\
["comment"] does not exists for comment')
# writing to dataframe
write_dataframe_to_csv_on_s3(dataframe, filename=object_path, bucket=bucket)
return dataframe