def _compute()

in vision/m4/evaluation/custom_metrics/unfolded_image_captioning_metrics.py [0:0]


    def _compute(self, example_ids, generated_captions, reference_captions):
        data_per_id = {}

        for ex_id, gen_cap, ref_caps in zip(example_ids, generated_captions, reference_captions):
            # If condition is a dirty trick to handle the case of distributed evaluation where some instances can be
            # repeated over a few proceses to make the batches even.
            # In this case, we just verify that all processes predicted the same thing, and only take one copy of predictions
            # in order to not mess up metrics. Ideally this "unique" logic should be handled outside of the metric or maybe
            # in the add_batch call...
            if ex_id not in data_per_id:
                data_per_id[ex_id] = {
                    "generated_caption": gen_cap,
                    "reference_captions": ref_caps,
                }
            else:
                if data_per_id[ex_id]["generated_caption"] == gen_cap:
                    logger.warning(
                        f"Example {ex_id} has different predictions accross processes. We have: {gen_cap} and"
                        f" {data_per_id[ex_id]['generated_caption']}"
                    )
                if data_per_id[ex_id]["reference_captions"] == ref_caps:
                    logger.warning(
                        f"Example {ex_id} has different answers accross processes. We have: {ref_caps} and"
                        f" {data_per_id[ex_id]['reference_captions']}"
                    )

        # assert list(range(len(data_per_id))) == sorted(data_per_id.keys())

        results = {}
        default_to_save_generations = (
            reference_captions[0] is None or len(reference_captions[0]) == 0
        ) and ImageCaptioningMetrics.DEFAULT_TO_SERVER_RESULTS in self.metrics

        if self.save_generations or default_to_save_generations:
            # If answers are None, we default to the server results
            results["server_results"] = [
                {
                    "image_id": ex_id,
                    "caption": data["generated_caption"],
                }
                for ex_id, data in data_per_id.items()
            ]

        if default_to_save_generations:
            return results

        # We put the results in the format expected by the tokenizer of pycocoevalcap
        gts = {}
        res = {}
        caption_counter = 0
        for ex_id, data_dict in data_per_id.items():
            res[ex_id] = [{"image_id": ex_id, "caption": data_dict["generated_caption"], "id": caption_counter}]
            caption_counter += 1
            gts[ex_id] = [
                {"image_id": ex_id, "caption": ref_str, "id": caption_counter + idx}
                for idx, ref_str in enumerate(data_dict["reference_captions"])
            ]
            caption_counter += len(data_dict["reference_captions"])

        if len(self.pycoco_scorers) > 0:
            tokenizer = PTBTokenizer()

            gts = tokenizer.tokenize(gts)
            res = tokenizer.tokenize(res)

            for scorer, method in self.pycoco_scorers:
                score, scores = scorer.compute_score(gts, res)
                if type(method) == list:
                    for sc, scs, m in zip(score, scores, method):
                        results[f"{m}"] = sc
                        results[f"{m}_all"] = convert_to_list(scs)
                else:
                    results[f"{method}"] = score
                    results[f"{method}_all"] = convert_to_list(scores)

        if len(self.other_scorers) > 0:
            for scorer, method in self.other_scorers:
                generated_captions = [data_per_id[ex_id]["generated_caption"] for ex_id in data_per_id]
                reference_captions = [data_per_id[ex_id]["reference_captions"] for ex_id in data_per_id]
                score = scorer.compute_score(generated_captions, reference_captions)
                results[f"{method}"] = score

        return results