in expanded_checklist/checklist/eval_core.py [0:0]
def _get_core_record(self, test: AbstractTest):
"""
Checks the data_filter in the self.results and created a CoreRecord
with all the data, meta etc. which holds the original data filtered
according to data_filter. If data_filter is None then the CoreRecord
holds original data, meta etc.
"""
data_filter: Dict = self.results.data_filter
task: str = self.results.task
data_nclasses: int = self.labels_dict[task].n_classes
labels: List = self.labels_dict[task].labels
self.fill_missing_attributes()
if data_filter and self.meta and \
len(self.meta) == len(self.data) == len(labels):
data, meta, labels, preds, confs =\
self._filter_the_data(data_filter)
else:
data, meta, labels, preds, confs =\
self.data, self.meta, labels, \
self.results.preds, self.results.confs
if not data:
return None
# if there are no labels for NER it means the sentence is not
# 'suited' for NER; e.g. it doesn't have any named entities of interest
# hence, we drop all examples without labels in evaluation
# drop_none_labels = True if task == "NER" else test.drop_none_labels
drop_none_labels = True
cache_entry = CacheEntry(
str(data_filter), test.required_ds,
test.probability_based, drop_none_labels,
test.group_flatten_method)
if cache_entry in self.core_record_cache:
return deepcopy(self.core_record_cache[cache_entry])
else:
# TODO: fix the run_idx to match the new filtered data
# -- to support sampling. FOR NOW SAMPLING IS NOT SUPPORTED.
new_record = CoreRecord(
# the state has copies for safety (no tests can alter what's
# in the evaluation core)
deepcopy(data),
deepcopy(meta),
deepcopy(labels),
deepcopy(preds),
deepcopy(confs),
deepcopy(self.label_vocab),
self.data_structure,
self.group_names,
task,
data_nclasses,
deepcopy(self.run_idxs)
)
# this is done here in order to do it once and store the result
# in a cache for efficiency
new_record.process_labels_preds_and_confs(
test.required_ds, test.probability_based,
drop_none_labels, test.group_flatten_method)
self.core_record_cache[cache_entry] = new_record
return deepcopy(new_record)