ludwig/visualize.py [1585:1640]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        probabilities_per_model,
        ground_truth,
        labels_limit,
        model_names=None,
        output_directory=None,
        file_format='pdf',
        **kwargs
):
    """Show models accuracy and data coverage while increasing treshold

    For each model it produces a pair of lines indicating the accuracy of
    the model and the data coverage while increasing a threshold (x axis) on
    the probabilities of predictions for the specified output_feature_name.

    # Inputs

    :param probabilities_per_model: (list) List of model probabilities
    :param ground_truth: (ndarray) NumPy Array containing ground truth data
    :param labels_limit: (int) Maximum numbers of labels.
             If labels in dataset are higher than this number, "rare" label
    :param model_names: (list, default: None) List of the names of the models to use as labels.
    :param output_directory: (sting) Directory where to save plots.
             If not specified, plots will be displayed in a window
    :param file_format: (string, default: 'pdf') File format of output plots - pdf or png

    # Return

    :return: (None)
    """
    if labels_limit > 0:
        ground_truth[ground_truth > labels_limit] = labels_limit
    probs = probabilities_per_model
    model_names_list = convert_to_list(model_names)
    thresholds = [t / 100 for t in range(0, 101, 5)]

    accuracies = []
    dataset_kept = []

    for i, prob in enumerate(probs):

        if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
            prob_limit = prob[:, :labels_limit + 1]
            prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
            prob = prob_limit

        max_prob = np.max(prob, axis=1)
        predictions = np.argmax(prob, axis=1)

        accuracies_alg = []
        dataset_kept_alg = []

        for threshold in thresholds:
            threshold = threshold if threshold < 1 else 0.999
            filtered_indices = max_prob >= threshold
            filtered_gt = ground_truth[filtered_indices]
            filtered_predictions = predictions[filtered_indices]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



ludwig/visualize.py [1671:1728]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        probabilities_per_model,
        ground_truth,
        labels_limit,
        model_names=None,
        output_directory=None,
        file_format='pdf',
        **kwargs
):
    """Show models comparision of confidence treshold data vs accuracy.

    For each model it produces a line indicating the accuracy of the model
    and the data coverage while increasing a threshold on the probabilities
    of predictions for the specified output_feature_name. The difference with
    confidence_thresholding is that it uses two axes instead of three,
    not visualizing the threshold and having coverage as x axis instead of
    the threshold.

    # Inputs

    :param probabilities_per_model: (list) List of model probabilities
    :param ground_truth: (ndarray) NumPy Array containing ground truth data
    :param labels_limit:(int) Maximum numbers of labels.
             If labels in dataset are higher than this number, "rare" label
    :param model_names: (list, default: None) List of the names of the models to use as labels.
    :param output_directory: (string, default: None) Directory where to save plots.
             If not specified, plots will be displayed in a window
    :param file_format: (string, default: 'pdf') File format of output plots - pdf or png

    # Return
    :return: (None)
    """
    if labels_limit > 0:
        ground_truth[ground_truth > labels_limit] = labels_limit
    probs = probabilities_per_model
    model_names_list = convert_to_list(model_names)
    thresholds = [t / 100 for t in range(0, 101, 5)]

    accuracies = []
    dataset_kept = []

    for i, prob in enumerate(probs):

        if labels_limit > 0 and prob.shape[1] > labels_limit + 1:
            prob_limit = prob[:, :labels_limit + 1]
            prob_limit[:, labels_limit] = prob[:, labels_limit:].sum(1)
            prob = prob_limit

        max_prob = np.max(prob, axis=1)
        predictions = np.argmax(prob, axis=1)

        accuracies_alg = []
        dataset_kept_alg = []

        for threshold in thresholds:
            threshold = threshold if threshold < 1 else 0.999
            filtered_indices = max_prob >= threshold
            filtered_gt = ground_truth[filtered_indices]
            filtered_predictions = predictions[filtered_indices]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



