in functions/source/preprocess/preprocess.py [0:0]
def preprocess(obj):
'''
Recieves s3 object from boto3, converts to excell sheet,
and then converts to 3 dataframes.
changes the dataframes column names to lower case,
and joins organism dataframe with patient dataframe.
----------
fieldname : obj
fieldname: s3 object
Returns
-------
dataframe for patients, dataframe for temperature
'''
data = obj['Body'].read()
xls = pd.read_excel(data,None,engine='openpyxl')
dataframe_patients = xls['Sheet1']
dataframe_temperature = xls['Sheet2']
dataframe_organism = xls['Sheet3']
dataframe_patients.rename(str.lower, axis='columns',inplace=True)
dataframe_temperature.rename(str.lower, axis='columns',inplace=True)
dataframe_organism.rename(str.lower, axis='columns', inplace=True)
dataframe_organism.set_index(['mrn','encntr_num'], inplace=True)
for i in dataframe_patients.index:
mrn = dataframe_patients.loc[i,'mrn']
enct_num = dataframe_patients.loc[i, 'encntr_num']
dataframe_patients.loc[i, 'organism'] = ",\n".join(dataframe_organism.loc[(mrn,enct_num),'organism_desc_src'].unique())
return dataframe_patients, dataframe_temperature