mlebench/competitions/alaska2-image-steganalysis/grade.py (64 lines of code) (raw):

import numpy as np import pandas as pd from sklearn import metrics from mlebench.grade_helpers import InvalidSubmissionError def insert_thresholds(fpr: np.ndarray, tpr: np.ndarray, tpr_thresholds: list) -> tuple: """ Insert tpr_thresholds into the TPR and FPR arrays to ensure that the thresholds are present in the TPR array and the corresponding FPR values are interpolated. e.g. > tpr = [0.0, 0.25, 0.5, 0.75, 1.0] > fpr = [0.0, 0.1, 0.2, 0.3, 0.4] > tpr_thresholds = [0.0, 0.3, 0.4, 1.0] > fpr, tpr = insert_thresholds(fpr, tpr, tpr_thresholds) > print(tpr) > print(fpr) [0.0, 0.25, 0.3, 0.4, 0.5, 0.75, 1.0] [0.0, 0.1, 0.12, 0.16, 0.2, 0.3, 0.4] """ fpr_ = fpr.tolist().copy() # Don't modify the input arrays tpr_ = tpr.tolist().copy() for threshold in tpr_thresholds: if threshold not in tpr_: # Find the right position within tpr to insert the threshold for i, tpr_val in enumerate(tpr_): if tpr_val > threshold: # Linear interpolation of fpr new_fpr = fpr_[i - 1] + (fpr_[i] - fpr_[i - 1]) * (threshold - tpr_[i - 1]) / ( tpr_[i] - tpr_[i - 1] ) tpr_.insert(i, threshold) fpr_.insert(i, new_fpr) break return np.array(fpr_), np.array(tpr_) def alaska_weighted_auc(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Instead of the standard AUC, the competition uses a weighted AUC where different regions of the ROC curve are weighted differently. We compute the area under the curve segment by segment (HORIZONTAL segments between TPR pairs on the y-axis), and then compute a weighted average of the segments. For more details, see: www.kaggle.com/competitions/alaska2-image-steganalysis/overview/evaluation This particular implementation is adapted from: https://www.kaggle.com/code/anokas/weighted-auc-metric-updated (Key change vs the above implementation: The above implementation uses the `tpr` and `fpr` arrays from metrics.roc_curve as is, neglecting to handle the case where the `tpr` values don't line up nicely with the thresholds - leading to a situation where some segments either belong partially to the wrong threshold, or get skipped entirely. Our implementation fixes this by inserting the thresholds into the `tpr` and `fpr` arrays before computing the AUC.) """ tpr_thresholds = [0.0, 0.4, 1.0] weights = [2, 1] fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) fpr, tpr = insert_thresholds(fpr, tpr, tpr_thresholds) # size of subsets areas = np.array(tpr_thresholds[1:]) - np.array(tpr_thresholds[:-1]) competition_metric = 0 # Compute AUC segment by segment (where each segment is a horizontal slice of the ROC curve # between a pair of consecutive TPR thresholds) for idx, weight in enumerate(weights): y_min = tpr_thresholds[idx] y_max = tpr_thresholds[idx + 1] # Here, we're creating new x and y arrays to calculate the AUC for this segment: # The segment arrays consist of the FPR and TPR values in the segment, mask = (y_min <= tpr) & (tpr <= y_max) if mask.sum() == 0: continue xs = fpr[mask] ys = tpr[mask] # plus a new point [1, y_max] which just closes the shape of this segment (draws a # horizontal line from the highest point in this segment to the right of the x-axis) xs = np.concatenate([xs, [1]]) ys = np.concatenate([ys, [y_max]]) ys = ys - y_min # normalize such that curve starts at y=0 score = metrics.auc(xs, ys) submetric = score * weight competition_metric += submetric # The total area is normalized by the sum of weights such that the final weighted AUC is between 0 and 1. normalization = np.dot(areas, weights) return competition_metric / normalization def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame): if len(submission) != len(answers): raise InvalidSubmissionError("Submission and answers must have the same length") if "Id" not in submission.columns: raise InvalidSubmissionError("Submission must have an 'Id' column") if "Label" not in submission.columns: raise InvalidSubmissionError("Submission must have a 'Label' column") submission = submission.sort_values("Id") answers = answers.sort_values("Id") if (submission["Id"].values != answers["Id"].values).any(): raise InvalidSubmissionError("Submission and answers must have the same ids") # Answers and submission must be numbers try: submission["Label"] = submission["Label"].astype(float) except ValueError: raise InvalidSubmissionError("Labels in submission must be numbers") answers["Label"] = answers["Label"].astype(float) # Cannot contain NaNs assert not answers["Label"].isnull().any(), "Answers cannot contain NaNs" if submission["Label"].isnull().any(): raise InvalidSubmissionError("Submission cannot contain NaNs") return answers["Label"], submission["Label"] def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float: y_true, y_pred = prepare_for_metric(submission, answers) return alaska_weighted_auc(np.array(y_true), np.array(y_pred))