def run_from_preds_confs()

in expanded_checklist/checklist/eval_core.py [0:0]


    def run_from_preds_confs(
            self, preds, confs, label_vocab=None, task=None,
            data_filter=None, overwrite=False):
        """Update self.results (run tests) from list of predictions and
        confidences

        Parameters
        ----------
        preds : list
            predictions
        confs : list
            confidences
        overwrite : bool
            If False, raise exception if results already exist

        data_filter: a dictionary, e.g. {"DOMAIN": business} -- if this eval
        core has metadata that marks the properties in the dictionary then the
        evaluation will only focus on examples that match the constraints
        """
        if not task:
            raise Exception('Task has to be provided to determine the labels!')
        elif task not in self.labels_dict:
            logger.warning(
                f"Task {task} is lacking labels in this " +
                "evaluation core. This will limit the metrics that " +
                "can be used.")
            if self.data_structure == DataShape.GROUPED:
                labels = [[None] * len(x) for x in self.data]
            else:
                labels = [None] * len(self.data)
            self.labels_dict[task] =\
                Munch({"labels": labels, "n_classes": None})

        self.core_record_cache = {}
        self._check_create_results(overwrite)

        # store results in self.results and label vocab in self.label_vocab
        self.update_results_from_preds(preds, confs)

        self.label_vocab = label_vocab
        self._check_results()

        # the data_filter is saved in results, so that it can retrieved/
        # recognized in summary() function
        self.results.data_filter = data_filter
        self.results.task = task

        for test in self.tests:
            try:
                # get the record for the test -- keeping this in the loop is
                # less efficient, but more safe -- each test gets it's own
                # copy of a record (with accordingly processed data)
                core_record = self._get_core_record(test)

                if (core_record.sequence and
                        issubclass(type(test), BasicClassificationMetrics)) or \
                        (not core_record.sequence and
                            issubclass(type(test), BasicSeqMetrics)):
                    continue

                logger.info(f"Evaluating on {test.name}...")
                # this updates the self.results field with the new results
                test_res = test.compute(core_record)
                self.results[test.get_name()] = test_res
            except Exception as err:
                logger.error(f"Couldn't run the {test.name} test.")
                # all 'non-expected'/'non-standard' Exceptions are not types
                # of the catch-all Exception class
                if type(err) != Exception:
                    logger.exception(err)
                else:
                    logger.warning(err)
                self.results[test.get_name()] = err