def _get_core_record()

in expanded_checklist/checklist/eval_core.py [0:0]


    def _get_core_record(self, test: AbstractTest):
        """
        Checks the data_filter in the self.results and created a CoreRecord
        with all the data, meta etc. which holds the original data filtered
        according to data_filter. If data_filter is None then the CoreRecord
        holds original data, meta etc.
        """
        data_filter: Dict = self.results.data_filter
        task: str = self.results.task
        data_nclasses: int = self.labels_dict[task].n_classes
        labels: List = self.labels_dict[task].labels

        self.fill_missing_attributes()

        if data_filter and self.meta and \
                len(self.meta) == len(self.data) == len(labels):
            data, meta, labels, preds, confs =\
                self._filter_the_data(data_filter)
        else:
            data, meta, labels, preds, confs =\
                self.data, self.meta, labels, \
                self.results.preds, self.results.confs

        if not data:
            return None

        # if there are no labels for NER it means the sentence is not
        # 'suited' for NER; e.g. it doesn't have any named entities of interest
        # hence, we drop all examples without labels in evaluation
        # drop_none_labels = True if task == "NER" else test.drop_none_labels
        drop_none_labels = True
        cache_entry = CacheEntry(
            str(data_filter), test.required_ds,
            test.probability_based, drop_none_labels,
            test.group_flatten_method)
        if cache_entry in self.core_record_cache:
            return deepcopy(self.core_record_cache[cache_entry])
        else:
            # TODO: fix the run_idx to match the new filtered data
            # -- to support sampling. FOR NOW SAMPLING IS NOT SUPPORTED.
            new_record = CoreRecord(
                # the state has copies for safety (no tests can alter what's
                # in the evaluation core)
                deepcopy(data),
                deepcopy(meta),
                deepcopy(labels),
                deepcopy(preds),
                deepcopy(confs),
                deepcopy(self.label_vocab),
                self.data_structure,
                self.group_names,
                task,
                data_nclasses,
                deepcopy(self.run_idxs)
            )
            # this is done here in order to do it once and store the result
            # in a cache for efficiency
            new_record.process_labels_preds_and_confs(
                test.required_ds, test.probability_based,
                drop_none_labels, test.group_flatten_method)
            self.core_record_cache[cache_entry] = new_record
            return deepcopy(new_record)