in expanded_checklist/checklist/core_record.py [0:0]
def _merge_tokens_for_identity_term(
self, sent_confs, sent_preds, sent_meta, gidx, vidx=None):
ikey = sent_meta['IDENTITY_KEY']
if "TOKENIZED_TEMPLATE" in sent_meta:
temp_toks = sent_meta["TOKENIZED_TEMPLATE"]
else:
temp_toks = tokenize(sent_meta["TEMPLATE"])
gname = sent_meta[gidx]
if "." in gname:
prop, term = gname.split(".")
if vidx is None:
gfill = str(sent_meta['SAMPLE'][prop][term])
else:
gfill = str(sent_meta['SAMPLE'][prop][vidx][term])
else:
if vidx is None:
gfill = str(sent_meta['SAMPLE'][gname])
else:
gfill = str(sent_meta['SAMPLE'][gname][vidx])
# TODO: there can be situations where the tokenization is context-
# dependent; for now we don't handle such cases
fill_toks = tokenize(gfill)
new_confs = []
new_preds = []
p = 0
for temp_tok in temp_toks:
if temp_tok == f"@{ikey}@":
newc = []
for x in range(p, p + len(fill_toks)):
# make sure it's an np.ndarray, to sum correctly
newc.append(np.array(sent_confs[x]))
new_conf = sum(newc)/len(newc)
new_confs.append(new_conf)
new_pred_idx = np.argmax(new_conf)
new_pred = self.label_vocab[new_pred_idx]
new_preds.append(new_pred)
p += len(fill_toks)
else:
new_confs.append(sent_confs[p])
new_preds.append(sent_preds[p])
p += 1
return new_confs, new_preds