def make_single_bar_chart_multi_year()

in sig-contributor-experience/surveys/k8s_survey_analysis/plot_utils.py [0:0]


def make_single_bar_chart_multi_year(survey_data, column, facet, proportionally=False):
    """Make a barchart showing the number of respondents responding to a single column.
        Bars are colored by which year of the survey they correspond to. If facet
        is not empty, the resulting plot will be faceted into subplots by the variables
        given. 

    Args:
        survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey   
        column (str): Column to plot responses to
        facet (list,optional): List of columns use for grouping
        proportionally (bool, optiona ): Defaults to False. If True,
            the bars heights are determined proportionally to the 
            total number of responses in that facet. 

    Returns:
        (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file

    """
    cols = [column, facet]
    show_legend = False
    topic_data = survey_data[cols + ["year"]]

    topic_data_long = make_long(topic_data, facet, multi_year=True)

    if proportionally:
        proportions = (
            topic_data_long[topic_data_long.rating == 1].groupby(facet + ["year"]).sum()
            / topic_data_long.groupby(facet + ["year"]).sum()
        ).reset_index()
    else:
        proportions = (
            topic_data_long[topic_data_long.rating == 1]
            .groupby(facet + ["year"])
            .count()
            .reset_index()
        )

    x = topic_data_long.columns.tolist()
    x.remove("level_1")

    ## Uncomment to return dataframe instead of plot
    # return proportions

    return (
        p9.ggplot(proportions, p9.aes(x=facet, fill="year", y="level_1"))
        + p9.geom_bar(show_legend=show_legend, stat="identity")
        + p9.theme(
            axis_text_x=p9.element_text(angle=45, ha="right"),
            strip_text_y=p9.element_text(angle=0, ha="left"),
        )
        + p9.scale_x_discrete(
            limits=topic_data_long[facet].unique().tolist(),
            labels=[
                x.replace("_", " ") for x in topic_data_long[facet].unique().tolist()
            ],
        )
    )