in src/morphological_filtering.py [0:0]
def _get_gender_dict() -> Dict[str, str]:
"""Get the gender dictionary (mapping of word to gender label) for the supported genders for DE."""
gender_dict = {}
fem_morphs, msc_morphs = set(), set()
with open(GERMAN_MORPH_DICT, 'r') as morphs:
for line in morphs:
# read in each word and its corresponding gender
line = line.strip().split()
if len(line) > 1:
tag = line[1].split(',')
if len(tag) > 2:
if tag[0] == 'NN':
if tag[1] == 'fem':
fem_morphs.add(line[0].lower())
elif tag[1] == 'masc':
msc_morphs.add(line[0].lower())
# if a word occurs with both feminine and masculine gender, exclude it
for word in fem_morphs:
if word not in msc_morphs:
gender_dict[word] = FEM_LABEL
for word in msc_morphs:
if word not in fem_morphs:
gender_dict[word] = MSC_LABEL
return gender_dict