mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py

import numpy as np import pandas as pd from mlebench.grade_helpers import InvalidSubmissionError def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict: # submission if set(submission.columns) != {"azimuth", "zenith", "event_id"}: raise InvalidSubmissionError( "Submission must contain columns 'azimuth','zenith' and 'event_id'" ) if not np.issubdtype(submission["azimuth"].dtype, np.number): raise InvalidSubmissionError("Azimuth must be a number") if not np.issubdtype(submission["zenith"].dtype, np.number): raise InvalidSubmissionError("Zenith must be a number") if not np.all(np.isfinite(submission["azimuth"])): raise InvalidSubmissionError("Azimuth must not be infinite") if not np.all(np.isfinite(submission["zenith"])): raise InvalidSubmissionError("Zenith must not be infinite") if submission["azimuth"].isnull().any(): raise InvalidSubmissionError("Azimuth must not be NaN") if submission["zenith"].isnull().any(): raise InvalidSubmissionError("Zenith must not be NaN") # answers assert set(answers.columns) == { "azimuth", "zenith", "event_id", }, "Answers must contain columns 'azimuth','zenith' and 'event_id'" assert np.issubdtype(answers["azimuth"].dtype, np.number), "Azimuth must be a number" assert np.issubdtype(answers["zenith"].dtype, np.number), "Zenith must be a number" assert np.all(np.isfinite(answers["azimuth"])), "Azimuth must not be infinite" assert np.all(np.isfinite(answers["zenith"])), "Zenith must not be infinite" assert not answers["azimuth"].isnull().any(), "Azimuth must not be NaN" assert not answers["zenith"].isnull().any(), "Zenith must not be NaN" # both if len(submission) != len(answers): raise InvalidSubmissionError("Submission and answers must have the same length") if set(submission["event_id"]) != set(answers["event_id"]): raise InvalidSubmissionError("Submission and answers must have the same event_ids") # sort values by id so that the order is correct submission = submission.sort_values("event_id") answers = answers.sort_values("event_id") return { "az_true": answers["azimuth"].to_numpy(), "zen_true": answers["zenith"].to_numpy(), "az_pred": submission["azimuth"].to_numpy(), "zen_pred": submission["zenith"].to_numpy(), } # courtesy of notebook from competition host: https://www.kaggle.com/code/sohier/mean-angular-error def angular_dist_score(az_true, zen_true, az_pred, zen_pred): """ calculate the MAE of the angular distance between two directions. The two vectors are first converted to cartesian unit vectors, and then their scalar product is computed, which is equal to the cosine of the angle between the two vectors. The inverse cosine (arccos) thereof is then the angle between the two input vectors Parameters: ----------- az_true : float (or array thereof) true azimuth value(s) in radian zen_true : float (or array thereof) true zenith value(s) in radian az_pred : float (or array thereof) predicted azimuth value(s) in radian zen_pred : float (or array thereof) predicted zenith value(s) in radian Returns: -------- dist : float mean over the angular distance(s) in radian """ # pre-compute all sine and cosine values sa1 = np.sin(az_true) ca1 = np.cos(az_true) sz1 = np.sin(zen_true) cz1 = np.cos(zen_true) sa2 = np.sin(az_pred) ca2 = np.cos(az_pred) sz2 = np.sin(zen_pred) cz2 = np.cos(zen_pred) # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz) scalar_prod = sz1 * sz2 * (ca1 * ca2 + sa1 * sa2) + (cz1 * cz2) # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability # that might otherwise occure from the finite precision of the sine and cosine functions scalar_prod = np.clip(scalar_prod, -1, 1) # convert back to an angle (in radian) return np.average(np.abs(np.arccos(scalar_prod))) def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float: ang_dist_score_inputs = prepare_for_metric(submission, answers) score = angular_dist_score(**ang_dist_score_inputs) return score

mlebench/competitions/icecube-neutrinos-in-deep-ice/grade.py (59 lines of code) (raw):