def evaluate_example()

in datasets.py [0:0]


    def evaluate_example(self, example: InputExample, output_sentence: str, model=None, tokenizer=None,
                         eval_nll=False) -> Counter:
        """
        Evaluate an output sentence on a single example of this dataset.

        We use NLL inference only when the generated output sentence has an unrecognized relation.
        """
        if not eval_nll and not self.eval_nll:
            # evaluate by generating the output sentence
            predicted_entities, predicted_relations = self.output_format.run_inference(
                example,
                output_sentence,
                entity_types=self.entity_types,
                relation_types=self.relation_types,
            )

            predicted_relation_str = next(iter(predicted_relations))[0]

            if predicted_relation_str not in [relation_type.natural for relation_type in self.relation_types.values()]:
                # the output relation is not in the list of possible relations, so we use NLL evaluation instead
                return self.evaluate_example(
                    example=example,
                    output_sentence=output_sentence,
                    model=model,
                    tokenizer=tokenizer,
                    eval_nll=True
                )

        else:
            # NLL evaluation
            predicted_relation_type = self.nll_inference(
                example=example,
                relation_types=list(self.relation_types.values()),
                model=model,
                tokenizer=tokenizer,
            )

            predicted_relation_str = predicted_relation_type.natural

        # load ground truth relation
        gt_relation_str = example.relations[0].type.natural

        if gt_relation_str == self.NO_RELATION and predicted_relation_str == self.NO_RELATION:
            return Counter({
                'num_sentences': 0,
                'gt_relations': 0,
                'predicted_relations': 0,
                'correct_relations': 0,
            })
        elif gt_relation_str == self.NO_RELATION:
            return Counter({
                'num_sentences': 1,
                'gt_relations': 0,
                'predicted_relations': 1,
                'correct_relations': 0,
            })
        elif predicted_relation_str == self.NO_RELATION:
            return Counter({
                'num_sentences': 1,
                'gt_relations': 1,
                'predicted_relations': 0,
                'correct_relations': 0,
            })
        else:
            return Counter({
                'num_sentences': 1,
                'gt_relations': 1,
                'predicted_relations': 1,
                'correct_relations': 1 if predicted_relation_str == gt_relation_str else 0,
            })