in src/analyze.py [0:0]
def avg_plots(df: pd.DataFrame, metric: str, plot_rank: bool = True):
"""
For each {dataset, seed, oversampler, classifier} keep only the results of the HP configuration that
yield the best score according to opt_metric. Then, calculate average and rank for the scores in
output_metrics
Parameters
----------
df : pandas.DataFrame
Filtered and summarized dataframe, produced by filter_optimal_hps
metric : str
metric to present
plot_rank : bool
Whether to plot rank or not
Returns
-------
None
"""
score_mean = []
score_std = []
rank_mean = []
rank_std = []
model_names = []
major_ticks = []
classifiers = list(np.unique(df.reset_index()["learner"]))
oversamplers = list(np.unique(df.reset_index()["oversampler"]))
for classifier in classifiers:
for oversampler in oversamplers:
idx = (classifier, oversampler)
score_mean.append(df.loc[idx][(metric, "mean")])
score_std.append(df.loc[idx][(metric, "std")])
rank_mean.append(df.loc[idx][(f"{metric}.rank", "mean")])
rank_std.append(df.loc[idx][(f"{metric}.rank", "std")])
model_name = CLASSIFIERS[classifier]
if oversampler != "none":
model_name += "+" + OVERSAMPLERS[oversampler]
model_names.append(model_name)
# Add an empty row between classifiers
score_mean.append(np.nan)
score_std.append(np.nan)
rank_mean.append(np.nan)
rank_std.append(np.nan)
model_names.append(" " * len(model_names))
major_ticks.append(len(score_mean) - 1)
# Delete the last empty row
score_mean = score_mean[:-1]
score_std = score_std[:-1]
model_names = model_names[:-1]
major_ticks = major_ticks[:-1]
rank_mean = rank_mean[:-1]
rank_std = rank_std[:-1]
fig_height = 9 / (4 * 8) * (len(classifiers) * (len(oversamplers) + 1))
plt.figure(figsize=(5, fig_height), dpi=320)
ax = plt.axes()
if plot_rank:
ax2 = ax.twiny()
ax2.errorbar(x=rank_mean, y=range(len(score_mean)), xerr=rank_std, fmt="r^")
ax2.set_xlabel("Rank")
ax2.xaxis.label.set_color("red")
for t in ax2.xaxis.get_ticklabels():
t.set_color("red")
ax.errorbar(x=score_mean, y=range(len(score_mean)), xerr=score_std, fmt="bo")
ax.xaxis.grid(True)
ax.set_yticks(major_ticks, minor=False)
ax.set_yticklabels("" * len(major_ticks), minor=False)
ax.set_yticks(range(len(model_names)), minor=True)
ax.set_yticklabels(model_names, minor=True)
ax.yaxis.grid(True, which="major")
ax.set_xlabel(METRICS[metric.split(".")[1]])
if plot_rank:
ax.xaxis.label.set_color("blue")
for t in ax.xaxis.get_ticklabels():
t.set_color("blue")
plt.show()