in expanded_checklist/checklist/eval_core.py [0:0]
def run_from_preds_confs(
self, preds, confs, label_vocab=None, task=None,
data_filter=None, overwrite=False):
"""Update self.results (run tests) from list of predictions and
confidences
Parameters
----------
preds : list
predictions
confs : list
confidences
overwrite : bool
If False, raise exception if results already exist
data_filter: a dictionary, e.g. {"DOMAIN": business} -- if this eval
core has metadata that marks the properties in the dictionary then the
evaluation will only focus on examples that match the constraints
"""
if not task:
raise Exception('Task has to be provided to determine the labels!')
elif task not in self.labels_dict:
logger.warning(
f"Task {task} is lacking labels in this " +
"evaluation core. This will limit the metrics that " +
"can be used.")
if self.data_structure == DataShape.GROUPED:
labels = [[None] * len(x) for x in self.data]
else:
labels = [None] * len(self.data)
self.labels_dict[task] =\
Munch({"labels": labels, "n_classes": None})
self.core_record_cache = {}
self._check_create_results(overwrite)
# store results in self.results and label vocab in self.label_vocab
self.update_results_from_preds(preds, confs)
self.label_vocab = label_vocab
self._check_results()
# the data_filter is saved in results, so that it can retrieved/
# recognized in summary() function
self.results.data_filter = data_filter
self.results.task = task
for test in self.tests:
try:
# get the record for the test -- keeping this in the loop is
# less efficient, but more safe -- each test gets it's own
# copy of a record (with accordingly processed data)
core_record = self._get_core_record(test)
if (core_record.sequence and
issubclass(type(test), BasicClassificationMetrics)) or \
(not core_record.sequence and
issubclass(type(test), BasicSeqMetrics)):
continue
logger.info(f"Evaluating on {test.name}...")
# this updates the self.results field with the new results
test_res = test.compute(core_record)
self.results[test.get_name()] = test_res
except Exception as err:
logger.error(f"Couldn't run the {test.name} test.")
# all 'non-expected'/'non-standard' Exceptions are not types
# of the catch-all Exception class
if type(err) != Exception:
logger.exception(err)
else:
logger.warning(err)
self.results[test.get_name()] = err