in functions/source/preprocess/preprocess.py [0:0]
def plot_timeline(dataframe, patient):
"""
Generate the timeline plot for a patient
Columns
=======
['encntr_num', 'nursing_unit_short_desc',
'beg_effective_dt_tm','end_effective_dt_tm',
'facility_name_src', 'collection_dt_tm',
'mrn', 'encntr_type_desc_src_at_collection',
'admit_dt_tm', 'clinical_event_code_desc_src',
'collection_date_id', 'loc_room_desc_src_at_collection',
'loc_bed_desc_src_at_collection', 'disch_dt_tm',
'disch_disp_desc_src', 'lab_result',
'med_service_desc_src_at_collection',
'nursing_unit_desc_at_collection',
'nursing_unit_short_desc_at_collection',
'organism',
'result_interpretation_desc_src',
'specimen_type_desc_src', 'transfer_in_to_collect',
'transfer_out_to_collect','ce_dynamic_label_id',
'doc_set_name_result', 'encntr_id',
'first_activity_start_dt_tm',
'first_catheter_type_result',
'first_dressing_type_result',
'first_site_result',
'last_activity_end_dt_tm',
'line_tube_drain_insertion_seq',
'line_insert_to_collection',
'line_remove_to_collect',
'last_temperature_result_pre_collection',
'name_last','name_first',
'birth_date_id','gender_desc_src','bc_phn',
'home_addr_patient_postal_code_forward_sortation_area']
DataTime events
===============
- beg_effective_dt_tm = Nursing unit (ICU) admission date
- end_effective_dt_tm = Nursing unit (ICU) discharge date
- collection_dt_tm = Positive blood collection date
- admit_dt_tm = Admission date (begin of stay)
- disch_dt_tm = Discharge date (end of stay)
- first_activity_start_dt_tm = Catheter insertion
- last_activity_end_dt_tm = Catheter removal
"""
print('Generating timeline plot for {}'.format(patient))
# Convert all datetime values to datetime
datetime_column_names = [
'beg_effective_dt_tm',
'end_effective_dt_tm',
'collection_dt_tm',
'admit_dt_tm',
'disch_dt_tm',
'first_activity_start_dt_tm',
'last_activity_end_dt_tm',
]
# Convert all date to to datetime format, the input data is mm-dd-yyyy
for column_name in datetime_column_names:
dataframe[column_name] = pd.to_datetime(
dataframe[column_name], errors='coerce', format='%m/%d/%Y')
#
fig, axis = plt.subplots(figsize=(
12, 3 + len(dataframe['collection_dt_tm'].unique()) / 4), dpi=300)
collection_times = []
plotted_organisms = []
x_scale_label = {}
y_scale_label = []
dates = {}
# Generate a list of organisms,
# thus same organism found can be shown as the same color
unique_organisms = []
for index in dataframe.index:
organism = dataframe.loc[index, 'organism']
unique_organisms.append(organism)
# Iterate through all records and add them to the plot
for index in dataframe.index:
# Organism found for this record
organism = dataframe.loc[index, 'organism']
# Calcululate the relative date from admission
day = {
key: relative_time_in_days(
dataframe.loc[index, key], sorted(dataframe['admit_dt_tm'])[0])
for key in datetime_column_names
}
# 3 bar graph plots: patient visit, nuring unit, central line
bar_graphs = {
'Patient visit': {
'start': 'admit_dt_tm',
'stop': 'disch_dt_tm',
'y': 0,
'color': [0.8, 0.8, 0.8],
},
dataframe.loc[index, 'nursing_unit_short_desc']: {
'start': 'beg_effective_dt_tm',
'stop': 'end_effective_dt_tm',
'y': 1,
'color': [0.6, 0.6, 0.6],
},
'Central line': {
'start': 'first_activity_start_dt_tm',
'stop': 'last_activity_end_dt_tm',
'y': 2,
'color': [0.4, 0.4, 0.4],
},
}
# One type of markers for the positive blood collection dates
marker_graphs = {
'Blood collection': {
'start': 'collection_dt_tm',
'y': 0,
'color': [0.8, 0.2, 0.2],
},
}
# bar graphs: patient visit, nuring unit, central line
for label in bar_graphs:
period = (
dataframe.loc[index, bar_graphs[label]['start']],
dataframe.loc[index, bar_graphs[label]['stop']]
)
# Do not plot the same period twice
if label not in dates:
dates[label] = []
if period not in dates[label]:
# Bar plot for the period
axis.bar(
[day[bar_graphs[label]['start']]],
[0.8],
width=day[bar_graphs[label]['stop']] -
day[bar_graphs[label]['start']],
bottom=bar_graphs[label]['y'] + 0.1,
color=bar_graphs[label]['color'],
# edgecolor='w',
# linewidth=4,
align='edge',
)
# Put marker to the start and stop date, thus if there is
# a missing date it can still be seen.
axis.plot(
[day[bar_graphs[label]['start']]],
[bar_graphs[label]['y'] + 0.5],
'k>',
)
axis.plot(
[day[bar_graphs[label]['stop']]],
[bar_graphs[label]['y'] + 0.5],
'k<',
)
dates[label].append(period)
x_scale_label[day[bar_graphs[label]['start']]] = dataframe.loc[
index, bar_graphs[label]['start']]
x_scale_label[day[bar_graphs[label]['stop']]] = dataframe.loc[
index, bar_graphs[label]['stop']]
if label not in y_scale_label:
y_scale_label.append(label)
for label in marker_graphs:
# Blood collection
if float(
day[marker_graphs[
label]['start']]) not in collection_times:
if organism not in plotted_organisms:
axis.plot(
[day[marker_graphs[label]['start']]],
[marker_graphs[label]['y'] + 0.5],
marker='o',
markersize=14,
linestyle='',
color=plt.cm.tab10(unique_organisms.index(organism)),
label=organism.replace(', ',"\n"),
)
plotted_organisms.append(organism)
else:
axis.plot(
[day[marker_graphs[label]['start']]],
[marker_graphs[label]['y'] + 0.5],
marker='o',
markersize=14,
linestyle='',
color=plt.cm.tab10(unique_organisms.index(organism)),
)
axis.plot(
[day[marker_graphs[label]['start']]],
[marker_graphs[label]['y'] + 0.5],
'wo',
markersize=5,
color='0.8'
)
collection_times.append(
float(day[marker_graphs[label]['start']]))
x_scale_label[day[
marker_graphs[label]['start']]] = dataframe.loc[
index, marker_graphs[label]['start']]
if label not in dates:
dates[label] = []
dates[label].append(day[marker_graphs[label]['start']])
axis.set_yticks([value + 0.5 for value in range(len(y_scale_label))])
axis.set_yticklabels(y_scale_label)
axis.set_ylim(0, len(y_scale_label))
axis.set_xticks(list(x_scale_label.keys()))
axis.set_xticklabels([
str(value)[:10] for value in x_scale_label.values()], rotation=90)
axis.set_xlabel('Date')
axis.set_axisbelow(True)
plt.legend(
bbox_to_anchor=(1.04, 1), loc='upper left',
ncol=1, title='Positive blood sample')
plt.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format="png")
buf.seek(0)
image = buf.read()
s3_resource = boto3.resource("s3")
# saving the patient total timeline
# plots to processed/images/patient/timeline.png
filename = f'images/{patient}/timeline.png'
bucket = os.environ['patient_bucket']
print('Timeline plot path for patient {}: {}'.format(patient, filename))
s3_resource.Object(bucket, filename).put(
Body=image, ServerSideEncryption="aws:kms")