def split_for_likert()

in sig-contributor-experience/surveys/k8s_survey_analysis/plot_utils.py [0:0]


def split_for_likert(topic_data_long, mid_point):
    """
    Returns the aggregated counts for ratings in the top and bottom halves of 
    the of each category, necssary for making offset bar charts

    Args:
        topic_data_long (pandas.Dataframe): A pandas Dataframe storing each respondents 
        ratings for a given topic, in long format
        mid_point (int): The midpoint to use to split the into two halves, based on ratings

    Returns:
        (tuple): Tuple containing:
            (pandas.DataFrame): Aggregated counts for ratings greater than or equal to the midpoinnt
            (pandas.DataFrame): Aggregated counts for ratings less than or equal to the midpoinnt 
    """
    x = topic_data_long.columns.tolist()
    x.remove("level_1")

    top_cutoff = topic_data_long["rating"] >= mid_point
    bottom_cutoff = topic_data_long["rating"] <= mid_point

    top_scores = (
        topic_data_long[top_cutoff]
        .groupby(x)
        .count()
        .reindex(
            pd.MultiIndex.from_product(
                [topic_data_long[y].unique().tolist() for y in x], names=x
            ),
            fill_value=0,
        )
        .reset_index()
        .sort_index(ascending=False)
    )

    # The mid point is in both the top and bottom halves, so divide by two
    top_scores.loc[top_scores["rating"] == mid_point, "level_1"] = (
        top_scores[top_scores["rating"] == mid_point]["level_1"] / 2.0
    )

    bottom_scores = (
        topic_data_long[bottom_cutoff]
        .groupby(x)
        .count()
        .reindex(
            pd.MultiIndex.from_product(
                [topic_data_long[y].unique().tolist() for y in x], names=x
            ),
            fill_value=0,
        )
        .reset_index()
    )

    # The mid point is in both the top and bottom halves, so divide by two
    bottom_scores.loc[bottom_scores["rating"] == mid_point, "level_1"] = (
        bottom_scores[bottom_scores["rating"] == mid_point]["level_1"] / 2.0
    )

    return top_scores, bottom_scores