in src/source_filter.py [0:0]
def _get_words(line: str) -> Set[str]:
"""
Get the set of (lowercased and original cased) words from a line.
:param line: Line from which to get words.
:return: Set of words in the line, including their lowercased versions.
"""
# remove all punctuation from the line
clean_line = line.strip().translate(PUNCTUATION_STRIPPER)
# return words with both the original casing and lowercased versions; this is so we can get cases like 'Mr'
return set(clean_line.lower().split()).union(set(clean_line.split()))