def gen_data_dict()

in functions/source/job-creation/job_creation.py [0:0]


def gen_data_dict(dataframe, bucket):
    """
    Generate input manifest content.
    """

    data = {}
    mrn_id = dataframe['mrn'][0]
    data['table'] = get_table(dataframe)
    extra_columns = [
        'comment_on_pathogen', 'BSI_type',
        'other_pathogen', 'pathogen',
        'commonnocasereason', 'IWP_comment',
        'alternate_diagnosis', 'decision']

    for column in extra_columns:
        if column in dataframe.columns:
            if column == 'pathogen':
                data[column] = map_pathogen_name_to_id(dataframe[column][0])
            else:
                data[column] = dataframe[column][0]

    if 'clabsi' in dataframe.columns:
        data['collection_class'] = []
        count = 0
        collection_date = ''
        for index in dataframe.index:

            if collection_date == dataframe.loc[index, 'collection_dt_tm']:
                count += 1
            else:
                count = 1
            collection_date = dataframe.loc[index, 'collection_dt_tm']

            collection_count = '{collection_date}_{number}'.format(
                collection_date=pd.to_datetime(dataframe.loc[
                    index, 'collection_dt_tm']).strftime('%Y-%m-%d'),
                number=count,
                )

            if str(dataframe.loc[index, 'clabsi']).lower() =='true':
                print('i passed true test!', dataframe.loc[index, 'clabsi'].astype(str))

                data['collection_class'].append((collection_count))

        print(data['collection_class'])
    if 'comment' in dataframe.columns:
        if str(dataframe['comment'][0]) not in [
                'None', '(Null)', 'nan', 'Non']:
            data['comment'] = str(dataframe['comment'][0])

    if 'new_comment' in dataframe.columns:
        if str(dataframe['new_comment'][0]) not in [
                'None', '(Null)', 'nan', 'Non']:
            data['comment'] = " ".join([data['comment'], "----", str(
                dataframe['new_comment'][0])])

    data['sourcetimelineimg'] = f's3://{bucket}/images/{mrn_id}/timeline.png'
    data['source-ref'] = str(mrn_id)

    # Listing all the plots existing under patient mrn folder in s3
    responses = boto3.client('s3').list_objects(
        Bucket=bucket,
        Delimiter=f'images/{mrn_id}/IWP/plots_',
        Prefix=f'images/{mrn_id}/IWP/plots_',
    )

    plot_list = []
    for response in responses['Contents']:
        plot_list.append(response['Key'])

    # Creating a list for Infection Window plots
    data['iwp_plots'] = {}
    plots = sorted(list(set(plot_list)))

    for index, collection_time in enumerate(
            sorted(list(data['table'].keys()))):
        data['iwp_plots'][collection_time] = f's3://{bucket}/{plots[index]}'

    print(data['iwp_plots'])

    return data