in src/lighteval/tasks/extended/ifeval/instructions_utils.py [0:0]
def split_into_sentences(text):
"""Split the text into sentences.
Args:
text: A string that consists of more than or equal to one sentences.
Returns:
A list of strings where each string is a sentence.
"""
text = " " + text + " "
text = text.replace("\n", " ")
text = re.sub(_PREFIXES, "\\1<prd>", text)
text = re.sub(_WEBSITES, "<prd>\\1", text)
text = re.sub(_DIGITS + "[.]" + _DIGITS, "\\1<prd>\\2", text)
text = re.sub(
_MULTIPLE_DOTS,
lambda match: "<prd>" * len(match.group(0)) + "<stop>",
text,
)
if "Ph.D" in text:
text = text.replace("Ph.D.", "Ph<prd>D<prd>")
text = re.sub(r"\s" + _ALPHABETS + "[.] ", " \\1<prd> ", text)
text = re.sub(_ACRONYMS + " " + _STARTERS, "\\1<stop> \\2", text)
text = re.sub(
_ALPHABETS + "[.]" + _ALPHABETS + "[.]" + _ALPHABETS + "[.]",
"\\1<prd>\\2<prd>\\3<prd>",
text,
)
text = re.sub(_ALPHABETS + "[.]" + _ALPHABETS + "[.]", "\\1<prd>\\2<prd>", text)
text = re.sub(" " + _SUFFIXES + "[.] " + _STARTERS, " \\1<stop> \\2", text)
text = re.sub(" " + _SUFFIXES + "[.]", " \\1<prd>", text)
text = re.sub(" " + _ALPHABETS + "[.]", " \\1<prd>", text)
if "”" in text:
text = text.replace(".”", "”.")
if '"' in text:
text = text.replace('."', '".')
if "!" in text:
text = text.replace('!"', '"!')
if "?" in text:
text = text.replace('?"', '"?')
text = text.replace(".", ".<stop>")
text = text.replace("?", "?<stop>")
text = text.replace("!", "!<stop>")
text = text.replace("<prd>", ".")
sentences = text.split("<stop>")
sentences = [s.strip() for s in sentences]
if sentences and not sentences[-1]:
sentences = sentences[:-1]
return sentences