in src/morphological_filtering.py [0:0]
def _get_gender_per_word(self, sentence):
tokens = set([self.tokenizer(tok).text for tok in sentence.split()])
gender_per_word = []
for word in tokens:
if word != "את":
if word[-1] in self.fem_chars:
gender_per_word.append(FEM_LABEL)
elif word[-1] in self.msc_chars:
gender_per_word.append(MSC_LABEL)
else:
gender_per_word.append(OTHER_LABEL)
else:
gender_per_word.append(OTHER_LABEL)
return gender_per_word