mlebench/competitions/petfinder-pawpularity-score/grade.py (29 lines of code) (raw):

import pandas as pd from sklearn.metrics import root_mean_squared_error from mlebench.grade_helpers import InvalidSubmissionError def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) -> dict: id_col = "Id" target_col = "Pawpularity" assert id_col in answers.columns, f"Missing {id_col} column in answers" assert target_col in answers.columns, f"Missing {target_col} column in answers" assert ( answers[target_col].between(1, 100).all() ), f"Pawpularity in answers should be between 1 and 100" if id_col not in submission.columns: raise InvalidSubmissionError(f"Missing {id_col} column in submission") if target_col not in submission.columns: raise InvalidSubmissionError(f"Missing {target_col} column in submission") if not submission[target_col].between(1, 100).all(): raise InvalidSubmissionError(f"Pawpularity in submission should be between 1 and 100") submission = submission.sort_values(id_col) answers = answers.sort_values(id_col) if (submission[id_col].values != answers[id_col].values).any(): raise InvalidSubmissionError(f"IDs in submission do not match IDs in answers") return { "y_true": answers[target_col].to_numpy(), "y_pred": submission[target_col].to_numpy(), } def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float: rmse_input = prepare_for_metric(submission, answers) score = root_mean_squared_error(**rmse_input) return score