in code/source/sentence_preprocessing.py [0:0]
def create_sentences_out_of_dataframe(data):
"""
Create sentences out of a dataframe of tagged data containing the columns "Words" and "Tags"
:param data: (pandas DataFrame) where the first element of each line corresponds to a word,
where ### is the end of a sentence word, and where the second element of each line is the tag of this word
:return: (list of lists of tuples) tag sentences as list of lists of tuples (word, tag)
"""
sentence_data = list(zip(data['Sentence #'], data['Word'], data['Tag']))
tagged_sentences = []
tag_sent = []
for line in sentence_data:
if line[0] == line[0]: # When we meet a "Sentence: " for a sentence start
if tag_sent: # Other cases
tagged_sentences.append(tag_sent)
tag_sent = []
tag_sent.append((line[1], line[2]))
if not tag_sent: # First case
tag_sent.append((line[1], line[2]))
elif line[0] != line[0]: # Check if NaN
tag_sent.append((line[1],line[2]))
# Last case
tagged_sentences.append(tag_sent)
return tagged_sentences