in model_code/pos_tag.py [0:0]
def tag_and_write(tgt, src, output, dataset, dataset_name):
verbs = []
nouns = []
adj = []
matching_verbs = []
matching_nouns = []
matching_adj = []
for num, line in enumerate(tgt):
doc = nlp(line.strip())
for i, token in enumerate(doc):
if token.pos_ == "VERB":
verbs.append(" ".join(line.split()[0:i+1]))
matching_verbs.append(src[num].strip())
if token.pos_ == "NOUN":
nouns.append(" ".join(line.split()[0:i+1]))
matching_nouns.append(src[num].strip())
if token.pos_ == "ADJ":
adj.append(" ".join(line.split()[0:i+1]))
matching_adj.append(src[num].strip())
file_writer(output + "/" + dataset + "." + dataset_name + "_verb_source", matching_verbs)
file_writer(output + "/" + dataset + "." + dataset_name + "_noun_source", matching_nouns)
file_writer(output + "/" + dataset + "." + dataset_name + "_adj_source", matching_adj)
file_writer(output + "/" + dataset + "." + dataset_name + "_verb_target", verbs)
file_writer(output + "/" + dataset + "." + dataset_name + "_noun_target", nouns)
file_writer(output + "/" + dataset + "." + dataset_name + "_adj_target", adj)
return