def get_df()

in sig-contributor-experience/surveys/k8s_survey_analysis/prepare_2018.py [0:0]


def get_df(path):

    survey_data = pd.read_csv(path)

    #Clean Data
    for x in survey_data.columns:
        if x.startswith("Useful:"):
            survey_data = survey_data.assign(**{x: survey_data[x].fillna(0)})
        if x.startswith("Contribute:") or x.startswith("Check for news:") or x.startswith("Attended:") or x.startswith("Attending:") or x.startswith("Most Important Pr"):
            survey_data = survey_data.assign(**{x: np.where(survey_data[x].isna(),0,1)})
        if x.startswith('Upstream'):
            survey_data = survey_data.assign(**{x: survey_data[x].fillna("Didn't Answer")})
    
   

    survey_data = survey_data.rename(columns= {x:x.replace(" ","_").replace("?", "").replace('Most_Important_Project','Most_Important_Proj').replace('Most_Important_Prj','Most_Important_Proj') for x in survey_data.columns})
    
    survey_data = survey_data.drop('Use_freq:_discuss.kubernetes.io',axis=1)

    x = pd.to_datetime(survey_data.End_Date)
    survey_data = survey_data.assign(date_taken = x.dt.date)
    survey_data = survey_data.assign(Contributing_Length = survey_data['Contributing_Length'].apply(contrib_length_2018_to_2019.get))
   
    survey_data = survey_data.rename(columns=convert_2018_to_2019)

    survey_data = survey_data.assign(Level_of_Contributor = survey_data['Level_of_Contributor'].apply(lambda x: ladder_level_2018_to_2019.get(x,x)))
    survey_data = survey_data.assign(Upstream_supported_at_employer = survey_data['Upstream_supported_at_employer'].apply(lambda x: employer_2018_to_2019.get(x,x)))
    survey_data = survey_data.assign(Interested_in_next_level = survey_data['Interested_in_next_level'].apply(lambda x: next_level_interest_2018_2019.get(x,x) ))
    survey_data = survey_data.assign(Contribute_to_other_OSS = survey_data['Contribute_to_other_OSS'].apply(lambda x: oss_projects_2018_to_2019.get(x,x)))
    survey_data.loc[:,'Do_you_use_the\xa0Help_Wanted_and/or_Good_First_Issue_labels_on_issues_you_file_to_find_contributors'] = survey_data['Do_you_use_the\xa0Help_Wanted_and/or_Good_First_Issue_labels_on_issues_you_file_to_find_contributors'].apply(lambda x: help_wanted_2018_to_2019.get(x,x))

    return survey_data