def aggregate()

in src/lighteval/tasks/extended/tiny_benchmarks/main.py [0:0]


    def aggregate(self, y_input):
        if len(y_input) == self.num_samples and self.estimates is not None:
            return self.estimates[self.task]

        # We load the weights for the relevant examples
        with open("extended_tasks/tiny_benchmarks/tinyBenchmarks.pkl", "rb") as handle:
            tinyBenchmarks = pickle.load(handle)

        seen_examples = tinyBenchmarks[self.scenario]["seen_examples"]
        examples_weights = tinyBenchmarks[self.scenario]["examples_weights"]
        irt_parameters = tinyBenchmarks[self.scenario]["irt_parameters"]
        A, B = irt_parameters["A"], irt_parameters["B"]
        optimal_lambdas = tinyBenchmarks[self.scenario]["optimal_lambdas"]
        scenarios_position = tinyBenchmarks[self.scenario]["scenarios_position"]
        subscenarios_position = tinyBenchmarks[self.scenario]["subscenarios_position"]

        N = np.max([np.max(x) for x in scenarios_position.values()]) + 1
        balance_weights = np.ones(N)
        for scenario in scenarios_position.keys():
            N_sce = len(scenarios_position[scenario])
            n_sub = len(subscenarios_position[scenario])
            for sub in subscenarios_position[scenario].keys():
                n_i = len(subscenarios_position[scenario][sub])
                balance_weights[subscenarios_position[scenario][sub]] = N_sce / (n_sub * n_i)

        # In case we use the big IRT model to estimate the performance of individual scenarios
        if self.task not in self.BENCHS:
            scenarios = [self.task]
            ind_scenario = (
                self.number_of_examples * ([i for i, s in enumerate(scenarios_position.keys()) if s == self.task][0])
            )
            seen_examples = seen_examples[ind_scenario : ind_scenario + self.number_of_examples]
        else:
            scenarios = list(scenarios_position.keys())

        # Creating vector y and estimating theta
        y = np.zeros(N)
        for i, j in enumerate(seen_examples):
            y[j] = y_input[i]

        # Getting estimates
        theta = fit_theta(y, seen_examples, A, B)
        estimates = {}
        unseen_examples = [i for i in range(N) if i not in seen_examples]

        for scenario in scenarios:
            N_sce = len(scenarios_position[scenario])
            seen_examples_sce = [s for s in seen_examples if s in scenarios_position[scenario]]
            unseen_examples_sce = [s for s in unseen_examples if s in scenarios_position[scenario]]

            data_part_IRTp = ((balance_weights * y)[seen_examples_sce]).mean()
            irt_part = (balance_weights * item_curve(theta.reshape(1, A.shape[1], 1), A, B))[
                0, [unseen_examples_sce]
            ].mean()
            IRTp_lambd = self.number_of_examples / N_sce
            IRT = (examples_weights[scenario] * y[seen_examples_sce]).sum()
            IRTp = IRTp_lambd * data_part_IRTp + (1 - IRTp_lambd) * irt_part
            IRTpp = optimal_lambdas[scenario] * IRT + (1 - optimal_lambdas[scenario]) * IRTp

            estimates[scenario] = {}
            estimates[scenario]["irt"] = IRT
            estimates[scenario]["pirt"] = IRTp
            estimates[scenario]["gpirt"] = IRTpp

        self.num_samples = len(y_input)
        self.estimates = estimates

        return estimates[self.task]