src/autotrain/trainers/image_classification/utils.py [10:122]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
BINARY_CLASSIFICATION_EVAL_METRICS = (
    "eval_loss",
    "eval_accuracy",
    "eval_f1",
    "eval_auc",
    "eval_precision",
    "eval_recall",
)

MULTI_CLASS_CLASSIFICATION_EVAL_METRICS = (
    "eval_loss",
    "eval_accuracy",
    "eval_f1_macro",
    "eval_f1_micro",
    "eval_f1_weighted",
    "eval_precision_macro",
    "eval_precision_micro",
    "eval_precision_weighted",
    "eval_recall_macro",
    "eval_recall_micro",
    "eval_recall_weighted",
)

MODEL_CARD = """
---
tags:
- autotrain
- transformers
- image-classification{base_model}
widget:
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
  example_title: Tiger
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
  example_title: Teapot
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
  example_title: Palace{dataset_tag}
---

# Model Trained Using AutoTrain

- Problem type: Image Classification

## Validation Metrics
{validation_metrics}
"""


def _binary_classification_metrics(pred):
    """
    Computes various binary classification metrics given the predictions and labels.

    Args:
        pred (tuple): A tuple containing raw predictions and true labels.
                      raw_predictions (numpy.ndarray): The raw prediction scores from the model.
                      labels (numpy.ndarray): The true labels.

    Returns:
        dict: A dictionary containing the following metrics:
            - f1 (float): The F1 score.
            - precision (float): The precision score.
            - recall (float): The recall score.
            - auc (float): The Area Under the ROC Curve (AUC) score.
            - accuracy (float): The accuracy score.
    """
    raw_predictions, labels = pred
    predictions = np.argmax(raw_predictions, axis=1)
    result = {
        "f1": metrics.f1_score(labels, predictions),
        "precision": metrics.precision_score(labels, predictions),
        "recall": metrics.recall_score(labels, predictions),
        "auc": metrics.roc_auc_score(labels, raw_predictions[:, 1]),
        "accuracy": metrics.accuracy_score(labels, predictions),
    }
    return result


def _multi_class_classification_metrics(pred):
    """
    Compute various classification metrics for multi-class classification.

    Args:
        pred (tuple): A tuple containing raw predictions and true labels.
                      - raw_predictions (numpy.ndarray): The raw prediction scores for each class.
                      - labels (numpy.ndarray): The true labels.

    Returns:
        dict: A dictionary containing the following metrics:
              - "f1_macro": F1 score with macro averaging.
              - "f1_micro": F1 score with micro averaging.
              - "f1_weighted": F1 score with weighted averaging.
              - "precision_macro": Precision score with macro averaging.
              - "precision_micro": Precision score with micro averaging.
              - "precision_weighted": Precision score with weighted averaging.
              - "recall_macro": Recall score with macro averaging.
              - "recall_micro": Recall score with micro averaging.
              - "recall_weighted": Recall score with weighted averaging.
              - "accuracy": Accuracy score.
    """
    raw_predictions, labels = pred
    predictions = np.argmax(raw_predictions, axis=1)
    results = {
        "f1_macro": metrics.f1_score(labels, predictions, average="macro"),
        "f1_micro": metrics.f1_score(labels, predictions, average="micro"),
        "f1_weighted": metrics.f1_score(labels, predictions, average="weighted"),
        "precision_macro": metrics.precision_score(labels, predictions, average="macro"),
        "precision_micro": metrics.precision_score(labels, predictions, average="micro"),
        "precision_weighted": metrics.precision_score(labels, predictions, average="weighted"),
        "recall_macro": metrics.recall_score(labels, predictions, average="macro"),
        "recall_micro": metrics.recall_score(labels, predictions, average="micro"),
        "recall_weighted": metrics.recall_score(labels, predictions, average="weighted"),
        "accuracy": metrics.accuracy_score(labels, predictions),
    }
    return results
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


src/autotrain/trainers/text_classification/utils.py [8:115]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
BINARY_CLASSIFICATION_EVAL_METRICS = (
    "eval_loss",
    "eval_accuracy",
    "eval_f1",
    "eval_auc",
    "eval_precision",
    "eval_recall",
)

MULTI_CLASS_CLASSIFICATION_EVAL_METRICS = (
    "eval_loss",
    "eval_accuracy",
    "eval_f1_macro",
    "eval_f1_micro",
    "eval_f1_weighted",
    "eval_precision_macro",
    "eval_precision_micro",
    "eval_precision_weighted",
    "eval_recall_macro",
    "eval_recall_micro",
    "eval_recall_weighted",
)

MODEL_CARD = """
---
library_name: transformers
tags:
- autotrain
- text-classification{base_model}
widget:
- text: "I love AutoTrain"{dataset_tag}
---

# Model Trained Using AutoTrain

- Problem type: Text Classification

## Validation Metrics
{validation_metrics}
"""


def _binary_classification_metrics(pred):
    """
    Calculate various binary classification metrics.

    Args:
        pred (tuple): A tuple containing raw predictions and true labels.
                      - raw_predictions (numpy.ndarray): The raw prediction scores from the model.
                      - labels (numpy.ndarray): The true labels.

    Returns:
        dict: A dictionary containing the following metrics:
              - "f1" (float): The F1 score.
              - "precision" (float): The precision score.
              - "recall" (float): The recall score.
              - "auc" (float): The Area Under the ROC Curve (AUC) score.
              - "accuracy" (float): The accuracy score.
    """
    raw_predictions, labels = pred
    predictions = np.argmax(raw_predictions, axis=1)
    result = {
        "f1": metrics.f1_score(labels, predictions),
        "precision": metrics.precision_score(labels, predictions),
        "recall": metrics.recall_score(labels, predictions),
        "auc": metrics.roc_auc_score(labels, raw_predictions[:, 1]),
        "accuracy": metrics.accuracy_score(labels, predictions),
    }
    return result


def _multi_class_classification_metrics(pred):
    """
    Compute various classification metrics for multi-class classification.

    Args:
        pred (tuple): A tuple containing raw predictions and true labels.
                      - raw_predictions (numpy.ndarray): The raw prediction scores for each class.
                      - labels (numpy.ndarray): The true labels.

    Returns:
        dict: A dictionary containing the following metrics:
              - "f1_macro": F1 score with macro averaging.
              - "f1_micro": F1 score with micro averaging.
              - "f1_weighted": F1 score with weighted averaging.
              - "precision_macro": Precision score with macro averaging.
              - "precision_micro": Precision score with micro averaging.
              - "precision_weighted": Precision score with weighted averaging.
              - "recall_macro": Recall score with macro averaging.
              - "recall_micro": Recall score with micro averaging.
              - "recall_weighted": Recall score with weighted averaging.
              - "accuracy": Accuracy score.
    """
    raw_predictions, labels = pred
    predictions = np.argmax(raw_predictions, axis=1)
    results = {
        "f1_macro": metrics.f1_score(labels, predictions, average="macro"),
        "f1_micro": metrics.f1_score(labels, predictions, average="micro"),
        "f1_weighted": metrics.f1_score(labels, predictions, average="weighted"),
        "precision_macro": metrics.precision_score(labels, predictions, average="macro"),
        "precision_micro": metrics.precision_score(labels, predictions, average="micro"),
        "precision_weighted": metrics.precision_score(labels, predictions, average="weighted"),
        "recall_macro": metrics.recall_score(labels, predictions, average="macro"),
        "recall_micro": metrics.recall_score(labels, predictions, average="micro"),
        "recall_weighted": metrics.recall_score(labels, predictions, average="weighted"),
        "accuracy": metrics.accuracy_score(labels, predictions),
    }
    return results
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -