mlebench/competitions/predict-volcanic-eruptions-ingv-oe/grade.py (29 lines of code) (raw):

from pandas import DataFrame from sklearn.metrics import mean_absolute_error from mlebench.grade_helpers import InvalidSubmissionError def prepare_for_grading(submission: DataFrame, answers: DataFrame, target_column: str) -> DataFrame: """ Merge the submission's target_column into the answers DataFrame, matching on 'segment_id'. target_column from the submission DataFrame will have the suffix '_pred', and target_column from the answers DataFrame will have the suffix '_true'. """ # Answers checks assert ( target_column in answers.columns ), f"Target column {target_column} not found in answers DataFrame." assert "segment_id" in answers.columns, "Segment ID column not found in answers DataFrame." # Submission checks if target_column not in submission.columns: raise InvalidSubmissionError( f"Target column {target_column} not found in submissions DataFrame." ) if "segment_id" not in submission.columns: raise InvalidSubmissionError("Segment ID column not found in submission DataFrame.") if not set(submission["segment_id"]) == set(answers["segment_id"]): raise InvalidSubmissionError( f"Submission is missing the following segment_ids: {set(answers['segment_id']) - set(submission['segment_id'])}" ) merged = answers.merge( submission[["segment_id", target_column]], on="segment_id", suffixes=("_true", "_pred") ) return merged def grade(submission: DataFrame, answers: DataFrame) -> float: merged = prepare_for_grading(submission, answers, "time_to_eruption") score = mean_absolute_error( y_true=merged["time_to_eruption_true"], y_pred=merged["time_to_eruption_pred"] ) rounded_score = int(round(score)) return rounded_score