leaderboard/cat_sampling_stability.py [189:224]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for subject_id, theta in zip(self._subject_ids, thetas):
                subject_skills[subject_id] = theta
                # subject_skills[subject_id] = self.estimate_theta(subject_id)
            item_information = list(self.compute_sum_information(subject_skills).items())
            sorted_items = sorted(item_information, key=lambda k: k[1], reverse=True)
            item_keys = [i[0] for i in sorted_items]
            needed_items = n_items - len(self._chosen_items)
            selected_items = item_keys[:needed_items]
            self._remaining_items = self._remaining_items - set(selected_items)
            self._chosen_items = self._chosen_items | set(selected_items)
            if len(self._chosen_items) != n_items:
                raise ValueError("mismatch")
            return list(self._chosen_items)


class Simulation:
    def __init__(self, max_size: int = 6000, step_size: int = 25, n_trials: int = 10):
        self._step_size = step_size
        self._n_trials = n_trials
        squad = load_squad_id_to_question()

        # dev_preds = LeaderboardPredictions.parse_file(
        #    conf["squad"]["submission_predictions"]["dev"]
        # )
        test_preds = LeaderboardPredictions.parse_file(
            conf["squad"]["submission_predictions"]["test"]
        )
        test_item_ids = set()
        for scored_preds in test_preds.scored_predictions.values():
            for item_id in scored_preds["exact_match"].keys():
                test_item_ids.add(item_id)
            break

        test_item_ids = list(test_item_ids)
        # squad_scores = load_squad_submissions(dev_preds)
        self._random_sampler = RandomSampler("dev")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


leaderboard/sampling_stability.py [186:221]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for subject_id, theta in zip(self._subject_ids, thetas):
                subject_skills[subject_id] = theta
                # subject_skills[subject_id] = self.estimate_theta(subject_id)
            item_information = list(self.compute_sum_information(subject_skills).items())
            sorted_items = sorted(item_information, key=lambda k: k[1], reverse=True)
            item_keys = [i[0] for i in sorted_items]
            needed_items = n_items - len(self._chosen_items)
            selected_items = item_keys[:needed_items]
            self._remaining_items = self._remaining_items - set(selected_items)
            self._chosen_items = self._chosen_items | set(selected_items)
            if len(self._chosen_items) != n_items:
                raise ValueError("mismatch")
            return list(self._chosen_items)


class Simulation:
    def __init__(self, max_size: int = 6000, step_size: int = 25, n_trials: int = 10):
        self._step_size = step_size
        self._n_trials = n_trials
        squad = load_squad_id_to_question()

        # dev_preds = LeaderboardPredictions.parse_file(
        #    conf["squad"]["submission_predictions"]["dev"]
        # )
        test_preds = LeaderboardPredictions.parse_file(
            conf["squad"]["submission_predictions"]["test"]
        )
        test_item_ids = set()
        for scored_preds in test_preds.scored_predictions.values():
            for item_id in scored_preds["exact_match"].keys():
                test_item_ids.add(item_id)
            break

        test_item_ids = list(test_item_ids)
        # squad_scores = load_squad_submissions(dev_preds)
        self._random_sampler = RandomSampler("dev")
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -