in mlebench/grade_helpers.py [0:0]
def rank_score(self, score: Optional[float], leaderboard: pd.DataFrame) -> dict:
"""
Ranks a score based on the leaderboard.
Returns a dictionary of bools with the following keys:
- gold_medal: bool
- silver_medal: bool
- bronze_medal: bool
- above_median: bool
- gold_threshold: float
- silver_threshold: float
- bronze_threshold: float
- median_threshold: float
"""
assert "score" in leaderboard.columns, "Leaderboard must have a `score` column."
lower_is_better = self.is_lower_better(leaderboard)
num_teams = len(leaderboard)
scores = leaderboard["score"]
def get_score_at_position(position: int) -> float:
"""
Returns the score at the given position in the leaderboard.
Raises an IndexError if the position is out of bounds.
"""
if position - 1 >= len(scores) or position < 1:
raise IndexError("Position out of bounds in the leaderboard.")
return scores.iloc[position - 1]
def get_thresholds(num_teams: int) -> tuple[float, float, float, float]:
"""
Returns the thresholds for medals based on kaggle.com/progression
Returns a tuple of thresholds in the order of gold, silver, bronze, median
"""
if 1 <= num_teams < 100:
gold_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
elif 100 <= num_teams < 250:
gold_threshold = get_score_at_position(10)
silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
elif 250 <= num_teams < 1000:
gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
silver_threshold = get_score_at_position(50)
bronze_threshold = get_score_at_position(100)
elif num_teams >= 1000:
gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
silver_threshold = get_score_at_position(max(1, int(num_teams * 0.05)))
bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
else:
raise ValueError("Number of teams in leaderboard must be greater than 0.")
median_threshold = scores.median()
return (
float(gold_threshold),
float(silver_threshold),
float(bronze_threshold),
float(median_threshold),
)
gold_threshold, silver_threshold, bronze_threshold, median_threshold = get_thresholds(
num_teams
)
if score is None:
return {
"gold_medal": False,
"silver_medal": False,
"bronze_medal": False,
"above_median": False,
"gold_threshold": gold_threshold,
"silver_threshold": silver_threshold,
"bronze_threshold": bronze_threshold,
"median_threshold": median_threshold,
}
assert isinstance(
score, (float, int)
), f"Expected `score` to be a `float` or `int` but got a {type(score)}."
gold_medal = score <= gold_threshold if lower_is_better else score >= gold_threshold
silver_medal = not gold_medal and (
score <= silver_threshold if lower_is_better else score >= silver_threshold
)
bronze_medal = (
not gold_medal
and not silver_medal
and (score <= bronze_threshold if lower_is_better else score >= bronze_threshold)
)
above_median = score < median_threshold if lower_is_better else score > median_threshold
return {
"gold_medal": gold_medal,
"silver_medal": silver_medal,
"bronze_medal": bronze_medal,
"above_median": above_median,
"gold_threshold": gold_threshold,
"silver_threshold": silver_threshold,
"bronze_threshold": bronze_threshold,
"median_threshold": median_threshold,
}