in src/source_filter.py [0:0]
def _get_gender(line: str) -> str:
"""
Get the gender of an input line based on the words in the line.
We define a line as feminine-specific if it contains at least one feminine pronoun and no masculine words.
Masculine-specific is defined similarly.
All other lines are defined as "other".
:param line: Line for which to get the gender.
:return: String corresponding to the gender label of the line (feminine, masculine, or other).
"""
words = _get_words(line)
# check for overlap between the words in the line and in the wordlists
has_pro_fem = words & FEM_PRO
has_pro_msc = words & MSC_PRO
has_word_fem = words & FEM_WORDS
has_word_msc = words & MSC_WORDS
if has_pro_fem and not has_word_msc:
return FEM_LABEL
if has_pro_msc and not has_word_fem:
return MSC_LABEL
return OTHER_LABEL