def _merge_tokens_for_identity_term()

in expanded_checklist/checklist/core_record.py [0:0]


    def _merge_tokens_for_identity_term(
            self, sent_confs, sent_preds, sent_meta, gidx, vidx=None):
        ikey = sent_meta['IDENTITY_KEY']

        if "TOKENIZED_TEMPLATE" in sent_meta:
            temp_toks = sent_meta["TOKENIZED_TEMPLATE"]
        else:
            temp_toks = tokenize(sent_meta["TEMPLATE"])

        gname = sent_meta[gidx]
        if "." in gname:
            prop, term = gname.split(".")
            if vidx is None:
                gfill = str(sent_meta['SAMPLE'][prop][term])
            else:
                gfill = str(sent_meta['SAMPLE'][prop][vidx][term])
        else:
            if vidx is None:
                gfill = str(sent_meta['SAMPLE'][gname])
            else:
                gfill = str(sent_meta['SAMPLE'][gname][vidx])

        # TODO: there can be situations where the tokenization is context-
        # dependent; for now we don't handle such cases
        fill_toks = tokenize(gfill)

        new_confs = []
        new_preds = []
        p = 0
        for temp_tok in temp_toks:
            if temp_tok == f"@{ikey}@":
                newc = []
                for x in range(p, p + len(fill_toks)):
                    # make sure it's an np.ndarray, to sum correctly
                    newc.append(np.array(sent_confs[x]))

                new_conf = sum(newc)/len(newc)
                new_confs.append(new_conf)

                new_pred_idx = np.argmax(new_conf)
                new_pred = self.label_vocab[new_pred_idx]
                new_preds.append(new_pred)
                p += len(fill_toks)
            else:
                new_confs.append(sent_confs[p])
                new_preds.append(sent_preds[p])
                p += 1
        return new_confs, new_preds