def plot_histogram_of_informativeness()

in sample_info/modules/visualizations.py [0:0]
38 lines of code
10 McCabe index (conditional complexity)

def plot_histogram_of_informativeness(informativeness_scores, groups=None, bins=50, plt=None, save_name=None,
                                      density=False, use_density_estimation=False, bandwidth=0.0002, **kwargs):
    """
    :param groups: list of strings describing the group of each sample
    """
    if plt is None:
        _, plt = import_matplotlib(agg=True, use_style=False)

    informativeness_scores = convert_scores_to_numpy(informativeness_scores)

    if use_density_estimation:
        density = True

    with_groups = True
    if groups is None:
        groups = ['dummy'] * len(informativeness_scores)
        with_groups = False

    different_groups = sorted(list(set(groups)))
    groups = np.array(groups)

    fig, ax = plt.subplots(figsize=(7, 5))
    if not use_density_estimation:
        for g in different_groups:
            _, bins, _ = ax.hist(informativeness_scores[groups == g], bins=bins, alpha=0.5,
                                 label=g, density=density)
    else:
        for g in different_groups:
            scores = informativeness_scores[groups == g]
            kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(scores.reshape((-1, 1)))
            xs = np.linspace(0.000, np.max(informativeness_scores), 1000)
            prob_density = np.exp(kde.score_samples(xs.reshape((-1, 1))))
            ax.plot(xs, prob_density, label=g)

    ax.ticklabel_format(axis="x", style="sci", scilimits=(0, 0))
    ax.set_xlabel('Informativeness of an example')
    if density:
        ax.ticklabel_format(axis="y", style='sci', scilimits=(0, 0))
        ax.set_ylabel('Density')
    else:
        ax.set_ylabel('Count')
    if with_groups:
        ax.legend()

    fig.tight_layout()
    if save_name:
        savefig(fig, save_name)

    return fig, ax