def split_and_duplicate_index()

in code/source/sentence_preprocessing.py [0:0]


def split_and_duplicate_index(sentences, max):
    """
    Splits sentences (as list of lists of tuples), to list of lists of len(max) or less
    And keeps track of the sentence "index" (usefull for inference)
    """

    new = []
    index = []
    for i, data in enumerate(sentences):
        new.append(([data[x:x+max] for x in range(0, len(data), max)]))
        index.append([i for x in range(0, len(data), max)])
    new = [val for sublist in new for val in sublist]
    index = [val for sublist in index for val in sublist]
    return new, index