in sig-contributor-experience/surveys/k8s_survey_analysis/plot_utils.py [0:0]
def make_single_likert_chart(survey_data, column, facet, labels, five_is_high=False):
"""Make an offset stacked barchart showing the number of respondents at each rank
or value for a single columns in the original data. Each facet is shown as
a tick on the x-axis
Args:
survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey
topic (str): String that all questions of interest start with
labels (list): List of strings to use as labels, corresponding
to the numerical values given by the respondents.
facet (str): Column used for grouping
five_is_high (bool, optionalc): Defaults to False. If True,
5 is considered the highest value in a ranking, otherwise
it is taken as the lowest value.
Returns:
(plotnine.ggplot): Offset stacked barchart plot object which
can be displayed in a notebook or saved out to a file
"""
mid_point = 3
cols = [column, facet]
show_legend = True
topic_data = survey_data[cols]
topic_data_long = make_long(topic_data, facet)
if not five_is_high:
topic_data_long = topic_data_long.assign(rating=topic_data_long.rating * -1.0)
x = topic_data_long.columns.tolist()
x.remove("level_1")
x.remove("level_0")
if not five_is_high:
mid_point *= -1
top_cutoff = topic_data_long["rating"] >= mid_point
bottom_cutoff = topic_data_long["rating"] <= mid_point
top_scores = (
topic_data_long[top_cutoff]
.groupby(x)
.count()
.reset_index()
.sort_index(ascending=False)
)
top_scores.loc[top_scores["rating"] == mid_point, "level_1"] = (
top_scores[top_scores["rating"] == mid_point]["level_1"] / 2.0
)
top_scores = top_scores.merge(
topic_data_long.groupby(facet).count().reset_index(), on=facet
)
top_scores = top_scores.assign(level_1=top_scores.level_1_x / top_scores.level_1_y)
bottom_scores = topic_data_long[bottom_cutoff].groupby(x).count().reset_index()
bottom_scores.loc[bottom_scores["rating"] == mid_point, "level_1"] = (
bottom_scores[bottom_scores["rating"] == mid_point]["level_1"] / 2.0
)
bottom_scores = bottom_scores.merge(
topic_data_long.groupby(facet).count().reset_index(), on=facet
)
bottom_scores = bottom_scores.assign(
level_1=bottom_scores.level_1_x * -1 / bottom_scores.level_1_y
)
vp = (
p9.ggplot(
topic_data_long,
p9.aes(x=facet, fill="factor(rating_x)", color="factor(rating_x)"),
)
+ p9.geom_col(
data=top_scores,
mapping=p9.aes(y="level_1"),
show_legend=show_legend,
size=0.25,
position=p9.position_stack(reverse=True),
)
+ p9.geom_col(
data=bottom_scores,
mapping=p9.aes(y="level_1"),
show_legend=show_legend,
size=0.25,
)
+ p9.geom_hline(yintercept=0, color="white")
+ p9.theme(
axis_text_x=p9.element_text(angle=45, ha="right"),
strip_text_y=p9.element_text(angle=0, ha="left"),
)
+ p9.scale_x_discrete(
limits=topic_data_long[facet].unique().tolist(),
labels=[
x.replace("_", " ") for x in topic_data_long[facet].unique().tolist()
],
)
)
if five_is_high:
vp = (
vp
+ p9.scale_color_brewer(
"div",
"RdBu",
limits=[1, 2, 3, 4, 5],
labels=["\n".join(wrap(x, 15)) for x in labels],
)
+ p9.scale_fill_brewer(
"div",
"RdBu",
limits=[1, 2, 3, 4, 5],
labels=["\n".join(wrap(x, 15)) for x in labels],
)
)
else:
vp = (
vp
+ reverse_scale_fill_brewer(
"div",
"RdBu",
limits=[-1, -2, -3, -4, -5],
labels=["\n".join(wrap(x, 15)) for x in labels],
)
+ reverse_scale_color_brewer(
"div",
"RdBu",
limits=[-1, -2, -3, -4, -5],
labels=["\n".join(wrap(x, 15)) for x in labels],
)
)
return vp