def plot_timeline()

in functions/source/preprocess/preprocess.py [0:0]


def plot_timeline(dataframe, patient):
    """
    Generate the timeline plot for a patient
    Columns
    =======
        ['encntr_num', 'nursing_unit_short_desc',
         'beg_effective_dt_tm','end_effective_dt_tm',
         'facility_name_src', 'collection_dt_tm',
        'mrn', 'encntr_type_desc_src_at_collection',
        'admit_dt_tm', 'clinical_event_code_desc_src',
        'collection_date_id', 'loc_room_desc_src_at_collection',
        'loc_bed_desc_src_at_collection', 'disch_dt_tm',
        'disch_disp_desc_src', 'lab_result',
        'med_service_desc_src_at_collection',
        'nursing_unit_desc_at_collection',
        'nursing_unit_short_desc_at_collection',
        'organism',
        'result_interpretation_desc_src',
        'specimen_type_desc_src', 'transfer_in_to_collect',
        'transfer_out_to_collect','ce_dynamic_label_id',
        'doc_set_name_result', 'encntr_id',
        'first_activity_start_dt_tm',
        'first_catheter_type_result',
        'first_dressing_type_result',
        'first_site_result',
        'last_activity_end_dt_tm',
        'line_tube_drain_insertion_seq',
        'line_insert_to_collection',
        'line_remove_to_collect',
        'last_temperature_result_pre_collection',
        'name_last','name_first',
        'birth_date_id','gender_desc_src','bc_phn',
        'home_addr_patient_postal_code_forward_sortation_area']

    DataTime events
    ===============
    - beg_effective_dt_tm = Nursing unit (ICU) admission date
    - end_effective_dt_tm = Nursing unit (ICU) discharge date
    - collection_dt_tm = Positive blood collection date
    - admit_dt_tm = Admission date (begin of stay)
    - disch_dt_tm = Discharge date (end of stay)
    - first_activity_start_dt_tm = Catheter insertion
    - last_activity_end_dt_tm = Catheter removal
    """
    print('Generating timeline plot for {}'.format(patient))
    # Convert all datetime values to datetime
    datetime_column_names = [
        'beg_effective_dt_tm',
        'end_effective_dt_tm',
        'collection_dt_tm',
        'admit_dt_tm',
        'disch_dt_tm',
        'first_activity_start_dt_tm',
        'last_activity_end_dt_tm',
    ]
    # Convert all date to to datetime format, the input data is mm-dd-yyyy
    for column_name in datetime_column_names:
        dataframe[column_name] = pd.to_datetime(
            dataframe[column_name], errors='coerce', format='%m/%d/%Y')
    #
    fig, axis = plt.subplots(figsize=(
        12, 3 + len(dataframe['collection_dt_tm'].unique()) / 4), dpi=300)
    collection_times = []
    plotted_organisms = []
    x_scale_label = {}
    y_scale_label = []
    dates = {}
    # Generate a list of organisms,
    # thus same organism found can be shown as the same color
    unique_organisms = []
    for index in dataframe.index:
        organism = dataframe.loc[index, 'organism']
        unique_organisms.append(organism)
    # Iterate through all records and add them to the plot
    for index in dataframe.index:
        # Organism found for this record
        organism = dataframe.loc[index, 'organism']

        # Calcululate the relative date from admission
        day = {
            key: relative_time_in_days(
                dataframe.loc[index, key], sorted(dataframe['admit_dt_tm'])[0])
            for key in datetime_column_names
        }
        # 3 bar graph plots: patient visit, nuring unit, central line
        bar_graphs = {
            'Patient visit': {
                'start': 'admit_dt_tm',
                'stop': 'disch_dt_tm',
                'y': 0,
                'color': [0.8, 0.8, 0.8],
            },
            dataframe.loc[index, 'nursing_unit_short_desc']: {
                'start': 'beg_effective_dt_tm',
                'stop': 'end_effective_dt_tm',
                'y': 1,
                'color': [0.6, 0.6, 0.6],
            },
            'Central line': {
                'start': 'first_activity_start_dt_tm',
                'stop': 'last_activity_end_dt_tm',
                'y': 2,
                'color': [0.4, 0.4, 0.4],
            },
        }
        # One type of markers for the positive blood collection dates
        marker_graphs = {
            'Blood collection': {
                'start': 'collection_dt_tm',
                'y': 0,
                'color': [0.8, 0.2, 0.2],
            },
        }
        # bar graphs: patient visit, nuring unit, central line
        for label in bar_graphs:
            period = (
                dataframe.loc[index, bar_graphs[label]['start']],
                dataframe.loc[index, bar_graphs[label]['stop']]
            )
            # Do not plot the same period twice
            if label not in dates:
                dates[label] = []
            if period not in dates[label]:
                # Bar plot for the period
                axis.bar(
                    [day[bar_graphs[label]['start']]],
                    [0.8],
                    width=day[bar_graphs[label]['stop']] -
                    day[bar_graphs[label]['start']],
                    bottom=bar_graphs[label]['y'] + 0.1,
                    color=bar_graphs[label]['color'],
                    # edgecolor='w',
                    # linewidth=4,
                    align='edge',
                )
                # Put marker to the start and stop date, thus if there is
                # a missing date it can still be seen.
                axis.plot(
                    [day[bar_graphs[label]['start']]],
                    [bar_graphs[label]['y'] + 0.5],
                    'k>',
                )
                axis.plot(
                    [day[bar_graphs[label]['stop']]],
                    [bar_graphs[label]['y'] + 0.5],
                    'k<',
                )
                dates[label].append(period)
                x_scale_label[day[bar_graphs[label]['start']]] = dataframe.loc[
                    index, bar_graphs[label]['start']]
                x_scale_label[day[bar_graphs[label]['stop']]] = dataframe.loc[
                    index, bar_graphs[label]['stop']]
            if label not in y_scale_label:
                y_scale_label.append(label)
        for label in marker_graphs:
            # Blood collection
            if float(
                    day[marker_graphs[
                        label]['start']]) not in collection_times:
                if organism not in plotted_organisms:
                    axis.plot(
                        [day[marker_graphs[label]['start']]],
                        [marker_graphs[label]['y'] + 0.5],
                        marker='o',
                        markersize=14,
                        linestyle='',
                        color=plt.cm.tab10(unique_organisms.index(organism)),
                        label=organism.replace(', ',"\n"),
                    )
                    plotted_organisms.append(organism)
                else:
                    axis.plot(
                        [day[marker_graphs[label]['start']]],
                        [marker_graphs[label]['y'] + 0.5],
                        marker='o',
                        markersize=14,
                        linestyle='',
                        color=plt.cm.tab10(unique_organisms.index(organism)),
                    )
                axis.plot(
                    [day[marker_graphs[label]['start']]],
                    [marker_graphs[label]['y'] + 0.5],
                    'wo',
                    markersize=5,
                    color='0.8'
                )
                collection_times.append(
                    float(day[marker_graphs[label]['start']]))
                x_scale_label[day[
                    marker_graphs[label]['start']]] = dataframe.loc[
                    index, marker_graphs[label]['start']]
            if label not in dates:
                dates[label] = []
            dates[label].append(day[marker_graphs[label]['start']])
    axis.set_yticks([value + 0.5 for value in range(len(y_scale_label))])
    axis.set_yticklabels(y_scale_label)
    axis.set_ylim(0, len(y_scale_label))
    axis.set_xticks(list(x_scale_label.keys()))
    axis.set_xticklabels([
        str(value)[:10] for value in x_scale_label.values()], rotation=90)
    axis.set_xlabel('Date')
    axis.set_axisbelow(True)
    plt.legend(
        bbox_to_anchor=(1.04, 1), loc='upper left',
        ncol=1, title='Positive blood sample')
    plt.tight_layout()
    buf = io.BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)
    image = buf.read()
    s3_resource = boto3.resource("s3")
    # saving the patient total timeline
    # plots to processed/images/patient/timeline.png
    filename = f'images/{patient}/timeline.png'
    bucket = os.environ['patient_bucket']
    print('Timeline plot path for patient {}: {}'.format(patient, filename))
    s3_resource.Object(bucket, filename).put(
        Body=image, ServerSideEncryption="aws:kms")