in src/fmeval/eval_algorithms/util.py [0:0]
def normalize_text_quac_protocol(text: str) -> str:
"""
Inspired by HELM: https://github.com/stanford-crfm/helm/blob/62f817eb695a31e8389e3f7be30609d3f0871837/src/helm/benchmark/metrics/basic_metrics.py
Given a text, normalize it using the SQUAD / QUAC protocol. That is remove punctuations, excess spaces and articles, and return the lowercased tokens.
SQUAD (https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/) and
QuAC benchmarks (https://s3.amazonaws.com/my89public/quac/scorer.py) use this protocol to normalize text before evaluating it.
HELM (https://github.com/stanford-crfm/helm/blob/62f817eb695a31e8389e3f7be30609d3f0871837/src/helm/benchmark/metrics/basic_metrics.py#L116)
and HuggingFace evaluate (https://github.com/huggingface/evaluate/blob/775555d80af30d83dc6e9f42051840d29a34f31b/metrics/squad/compute_score.py#L11)
also use this to normalization procedure.
:param text: The text that needs to be normalized.
:returns: The normalized text.
"""
text = text.lower()
text = "".join(character for character in text if character not in ENGLISH_PUNCTUATIONS)
return " ".join([word for word in text.split(" ") if (word != "" and word not in ENGLISH_ARTICLES)])