def write_json_on

def write_json_on_s3()

in functions/source/loop/loop_lambda.py [0:0]
120 lines of code
43 McCabe index (conditional complexity)

def write_json_on_s3(bucket, object_path, data, dataframe):

    """
    Save json to a path
    """
    dataframe = dataframe.copy()
    # finding the decision from data object
    try:
        decision_dict = data['category']['caseInfo']['decision']
        for key in decision_dict:
            if decision_dict[key]:
                decision = key

        # if decision was not made and we will not increase review count,
        # PR is set in clabsi-production lamdba, its increased by 1.

        dataframe['decision'] = decision
        if 'PR' in dataframe.columns:
            dataframe.loc[:, 'PR'] = dataframe.loc[0, 'PR']
            dataframe['PR'] = dataframe['PR'].astype('int32')

        if decision == 'notsure':

            print('lowering pr by 1')
            if 'PR' in dataframe.columns:
                dataframe['PR'] -= 1
                if dataframe.loc[0, 'PR'] == 0:

                    dataframe['PR'] = 0

                if dataframe.loc[0, 'PR'] == -1:
                    dataframe.drop(columns='PR', inplace=True)
        worker_id = data['category']['workerId']
        if 'PR' in dataframe.columns:
            if dataframe['PR'][0] >= 1:
                dataframe['second_reviewer_id'] = worker_id
            else:
                dataframe['first_reviewer_id'] = worker_id

        job_creation_date = data['category-metadata']['creation-date']
        dataframe['job_creation_date'] = job_creation_date
        # setting send_to_physician column
        #  in dataframe according to dataManifest
        if "send_to_physician" in data[
                'category']['caseInfo']:
            dataframe['send_to_physician'] = data[
                'category']['caseInfo']['send_to_physician']
        if "BSI_type" in data['category']['caseInfo']:
            bsi_type_dict = data['category']['caseInfo']['bsi_type']
            bsi_type = ''
            for key in bsi_type_dict:
                if bsi_type_dict[key]:
                    bsi_type = key
                    dataframe['BSI_type'] = bsi_type

            # Blood Stream Infection subtype, if any specified
            if bsi_type == 'LCBI 1' and data[
                'category']['caseInfo'][
                    'MBI_LCBI_1']['MBI-LCBI 1']:
                dataframe['BSI_subtype'] = 'MBI LCBI 1'
            elif bsi_type == 'LCBI 2' and data[
                'category']['caseInfo'][
                    'MBI_LCBI_2']['MBI-LCBI 2']:
                dataframe['BSI_subtype'] = 'MBI LCBI 2'
            elif bsi_type == 'LCBI 3' and data[
                'category']['caseInfo'][
                    'MBI_LCBI_3']['MBI-LCBI 3']:
                dataframe['BSI_subtype'] = 'MBI LCBI 3'

        if "commonnocasereason" in data['category']['caseInfo']:
            nocasereason = ''
            if data['category']['caseInfo']['commonnocasereason']['RIT']:
                nocasereason = 'RIT'
            elif data['category']['caseInfo']['commonnocasereason']['POA']:
                nocasereason = 'POA'
            elif data['category']['caseInfo'][
                    'commonnocasereason']['Common excluded pathogen']:
                nocasereason = 'Common excluded pathogen'
            dataframe['commonnocasereason'] = nocasereason

        if 'alternate_diagnosis' in data[
                'category']['caseInfo']:
            dataframe['alternate_diagnosis'] = data[
                'category']['caseInfo']['alternate_diagnosis']

        if "comment_on_pathogen" in data[
                'category']['caseInfo']:
            comment_on_pathogen = data[
                'category']['caseInfo']['comment_on_pathogen']
            dataframe['comment_on_pathogen'] = comment_on_pathogen

    except KeyError:
        print("output.manifest format problem: keyError problem")

    try:
        if 'pathogen' in data["category"]['caseInfo']:
            dataframe['pathogen'] = map_pathogen_id_to_name(data[
                "category"]['caseInfo']['pathogen'])
        if 'other_pathogen' in data["category"]['caseInfo']:
            dataframe['other_pathogen'] = data[
                "category"]['caseInfo']['other_pathogen']
        # concating all the comments so we dont lose the previous comments
        if 'comment' in data['category']['caseInfo']:
            if 'new_comment' in data['category']['caseInfo']:
                dataframe['comment'] = " ".join(
                    [data['category']['caseInfo']['comment'],
                     "----", data['category']['caseInfo']['new_comment']])
            else:
                dataframe['comment'] = data['category']['caseInfo']['comment']
        if 'new_comment' in data['category']['caseInfo']:
            if 'comment' not in data['category']['caseInfo']:
                dataframe['comment'] = data['category']['caseInfo']['new_comment']

        # adding IWP_comments to the dataframe

        if "IWP_comment" in data['category']['caseInfo']:

            iwp_comment = data['category']['caseInfo']['IWP_comment']

            dataframe['IWP_comment'] = iwp_comment


        if "collection_class" in data['category']['caseInfo']:
            # Format:
            # "collection_class": {
            #   "2021-02-23_1": true,
            #   "2021-02-24_1": false
            #  },
            print('collection_class is found')
            collection_class = data['category']['caseInfo']['collection_class']

            # Sort by collection dates
            dataframe.sort_values(['collection_dt_tm'], inplace=True)

            # Iterate through each row and generate the name: "<collection>_2"
            count = 0
            collection_date = ''
            for index in dataframe.index:
                if collection_date == dataframe.loc[index, 'collection_dt_tm']:
                    count += 1
                else:
                    count = 1
                collection_date = dataframe.loc[index, 'collection_dt_tm']

                collection_count = '{collection_date}_{number}'.format(
                    collection_date=pd.to_datetime(dataframe.loc[
                        index, 'collection_dt_tm']).strftime('%Y-%m-%d'),
                    number=count,
                    )

                # Figure out whether this collection_class was selected
                if collection_count in collection_class:
                    checkmark = collection_class[collection_count]
                else:
                    checkmark = False

                # Record it in the dataframe
                print(collection_count, checkmark)
                dataframe.loc[index, 'clabsi'] = checkmark

    except KeyError:
        print('output.manifest format problem,\
            ["category"]["caseInfo"]["category"]\
                ["comment"] does not exists for comment')

    # writing to dataframe
    write_dataframe_to_csv_on_s3(dataframe, filename=object_path, bucket=bucket)

    return dataframe