def _normalize_tokens()

in utils/span_heuristic.py [0:0]


def _normalize_tokens(tokens: List[str], keep_empty_str=True) -> List[str]:
    """
    Normalize individual tokens.

    If keep_empty_str is True, this keeps the overall number of tokens the same.
    A particular token could be normalized to an empty string.
    """
    normalized_tokens = []
    for token in tokens:
        token = token.lower()
        token = ''.join(ch for ch in token if ch not in EXCLUDED_PUNCTS)
        token = re.sub(ARTICLES_RE, '', token)
        if keep_empty_str or len(token):
            normalized_tokens.append(token)

    return normalized_tokens