def create_sentences_out_of_dataframe()

in code/source/sentence_preprocessing.py [0:0]


def create_sentences_out_of_dataframe(data):
    """
    Create sentences out of a dataframe of tagged data containing the columns "Words" and "Tags"
    :param data: (pandas DataFrame) where the first element of each line corresponds to a word,
    where ### is the end of a sentence word, and where the second element of each line is the tag of this word
    :return: (list of lists of tuples) tag sentences as list of lists of tuples (word, tag)
    """
    sentence_data = list(zip(data['Sentence #'], data['Word'], data['Tag']))

    tagged_sentences = []
    tag_sent = []

    for line in sentence_data:
        if line[0] == line[0]: # When we meet a "Sentence: " for a sentence start
            if tag_sent: # Other cases
                tagged_sentences.append(tag_sent)
                tag_sent = []
                tag_sent.append((line[1], line[2]))
            if not tag_sent: # First case
                tag_sent.append((line[1], line[2]))
        elif line[0] != line[0]: # Check if NaN
            tag_sent.append((line[1],line[2]))
    # Last case
    tagged_sentences.append(tag_sent)   
    
    return tagged_sentences