def rank_score()

in mlebench/grade_helpers.py [0:0]
72 lines of code
15 McCabe index (conditional complexity)

    def rank_score(self, score: Optional[float], leaderboard: pd.DataFrame) -> dict:
        """
        Ranks a score based on the leaderboard.
        Returns a dictionary of bools with the following keys:
        - gold_medal: bool
        - silver_medal: bool
        - bronze_medal: bool
        - above_median: bool
        - gold_threshold: float
        - silver_threshold: float
        - bronze_threshold: float
        - median_threshold: float
        """
        assert "score" in leaderboard.columns, "Leaderboard must have a `score` column."

        lower_is_better = self.is_lower_better(leaderboard)

        num_teams = len(leaderboard)
        scores = leaderboard["score"]

        def get_score_at_position(position: int) -> float:
            """
            Returns the score at the given position in the leaderboard.
            Raises an IndexError if the position is out of bounds.
            """
            if position - 1 >= len(scores) or position < 1:
                raise IndexError("Position out of bounds in the leaderboard.")
            return scores.iloc[position - 1]

        def get_thresholds(num_teams: int) -> tuple[float, float, float, float]:
            """
            Returns the thresholds for medals based on kaggle.com/progression
            Returns a tuple of thresholds in the order of gold, silver, bronze, median
            """
            if 1 <= num_teams < 100:
                gold_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
            elif 100 <= num_teams < 250:
                gold_threshold = get_score_at_position(10)
                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
            elif 250 <= num_teams < 1000:
                gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
                silver_threshold = get_score_at_position(50)
                bronze_threshold = get_score_at_position(100)
            elif num_teams >= 1000:
                gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.05)))
                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
            else:
                raise ValueError("Number of teams in leaderboard must be greater than 0.")

            median_threshold = scores.median()

            return (
                float(gold_threshold),
                float(silver_threshold),
                float(bronze_threshold),
                float(median_threshold),
            )

        gold_threshold, silver_threshold, bronze_threshold, median_threshold = get_thresholds(
            num_teams
        )

        if score is None:
            return {
                "gold_medal": False,
                "silver_medal": False,
                "bronze_medal": False,
                "above_median": False,
                "gold_threshold": gold_threshold,
                "silver_threshold": silver_threshold,
                "bronze_threshold": bronze_threshold,
                "median_threshold": median_threshold,
            }

        assert isinstance(
            score, (float, int)
        ), f"Expected `score` to be a `float` or `int` but got a {type(score)}."

        gold_medal = score <= gold_threshold if lower_is_better else score >= gold_threshold
        silver_medal = not gold_medal and (
            score <= silver_threshold if lower_is_better else score >= silver_threshold
        )
        bronze_medal = (
            not gold_medal
            and not silver_medal
            and (score <= bronze_threshold if lower_is_better else score >= bronze_threshold)
        )
        above_median = score < median_threshold if lower_is_better else score > median_threshold

        return {
            "gold_medal": gold_medal,
            "silver_medal": silver_medal,
            "bronze_medal": bronze_medal,
            "above_median": above_median,
            "gold_threshold": gold_threshold,
            "silver_threshold": silver_threshold,
            "bronze_threshold": bronze_threshold,
            "median_threshold": median_threshold,
        }