in code/source/sentence_preprocessing.py [0:0]
def split_and_duplicate_index(sentences, max):
"""
Splits sentences (as list of lists of tuples), to list of lists of len(max) or less
And keeps track of the sentence "index" (usefull for inference)
"""
new = []
index = []
for i, data in enumerate(sentences):
new.append(([data[x:x+max] for x in range(0, len(data), max)]))
index.append([i for x in range(0, len(data), max)])
new = [val for sublist in new for val in sublist]
index = [val for sublist in index for val in sublist]
return new, index