in sig-contributor-experience/surveys/k8s_survey_analysis/plot_utils.py [0:0]
def split_for_likert(topic_data_long, mid_point):
"""
Returns the aggregated counts for ratings in the top and bottom halves of
the of each category, necssary for making offset bar charts
Args:
topic_data_long (pandas.Dataframe): A pandas Dataframe storing each respondents
ratings for a given topic, in long format
mid_point (int): The midpoint to use to split the into two halves, based on ratings
Returns:
(tuple): Tuple containing:
(pandas.DataFrame): Aggregated counts for ratings greater than or equal to the midpoinnt
(pandas.DataFrame): Aggregated counts for ratings less than or equal to the midpoinnt
"""
x = topic_data_long.columns.tolist()
x.remove("level_1")
top_cutoff = topic_data_long["rating"] >= mid_point
bottom_cutoff = topic_data_long["rating"] <= mid_point
top_scores = (
topic_data_long[top_cutoff]
.groupby(x)
.count()
.reindex(
pd.MultiIndex.from_product(
[topic_data_long[y].unique().tolist() for y in x], names=x
),
fill_value=0,
)
.reset_index()
.sort_index(ascending=False)
)
# The mid point is in both the top and bottom halves, so divide by two
top_scores.loc[top_scores["rating"] == mid_point, "level_1"] = (
top_scores[top_scores["rating"] == mid_point]["level_1"] / 2.0
)
bottom_scores = (
topic_data_long[bottom_cutoff]
.groupby(x)
.count()
.reindex(
pd.MultiIndex.from_product(
[topic_data_long[y].unique().tolist() for y in x], names=x
),
fill_value=0,
)
.reset_index()
)
# The mid point is in both the top and bottom halves, so divide by two
bottom_scores.loc[bottom_scores["rating"] == mid_point, "level_1"] = (
bottom_scores[bottom_scores["rating"] == mid_point]["level_1"] / 2.0
)
return top_scores, bottom_scores