in sample_info/modules/visualizations.py [0:0]
def plot_histogram_of_informativeness(informativeness_scores, groups=None, bins=50, plt=None, save_name=None,
density=False, use_density_estimation=False, bandwidth=0.0002, **kwargs):
"""
:param groups: list of strings describing the group of each sample
"""
if plt is None:
_, plt = import_matplotlib(agg=True, use_style=False)
informativeness_scores = convert_scores_to_numpy(informativeness_scores)
if use_density_estimation:
density = True
with_groups = True
if groups is None:
groups = ['dummy'] * len(informativeness_scores)
with_groups = False
different_groups = sorted(list(set(groups)))
groups = np.array(groups)
fig, ax = plt.subplots(figsize=(7, 5))
if not use_density_estimation:
for g in different_groups:
_, bins, _ = ax.hist(informativeness_scores[groups == g], bins=bins, alpha=0.5,
label=g, density=density)
else:
for g in different_groups:
scores = informativeness_scores[groups == g]
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(scores.reshape((-1, 1)))
xs = np.linspace(0.000, np.max(informativeness_scores), 1000)
prob_density = np.exp(kde.score_samples(xs.reshape((-1, 1))))
ax.plot(xs, prob_density, label=g)
ax.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
ax.set_xlabel('Informativeness of an example')
if density:
ax.ticklabel_format(axis="y", style='sci', scilimits=(0, 0))
ax.set_ylabel('Density')
else:
ax.set_ylabel('Count')
if with_groups:
ax.legend()
fig.tight_layout()
if save_name:
savefig(fig, save_name)
return fig, ax