in code/source/postprocessing.py [0:0]
def map_split_preds_to_idx(split_preds, split_idx):
"""
Maps back the sentences being split
(due to bert input size constraints) and their predictions,
to the original sentence index
:param split_preds: list of lists of tag predictions for each word
:param split_idx: list of deduplicated indexes when splitting sentences
:return: list of grouped predictions for each initial sentence and respective initial sentence id
"""
idx = []
flat_preds = []
flat = []
for i, _id in enumerate(split_idx):
# When the i changes, we append the flat list to flat_preds
if i != 0 and _id != split_idx[i-1]:
flat_preds.append(flat)
flat = split_preds[i]
idx.append(split_idx[i-1])
else:
flat += split_preds[i]
# last one:
flat_preds.append(flat)
idx.append(split_idx[-1])
return flat_preds, idx