in src/lighteval/metrics/utils/extractive_match_utils.py [0:0]
def make_latex_env_pattern(prefix: str = "", context: Literal["boxed", "plain"] = "plain") -> str:
"""Creates a LaTeX environment pattern with uniquely prefixed group names.
Args:
prefix (str): Prefix to add to group names to make them unique
context (Literal["boxed", "plain"]): Type of content to match inside the environments
- "boxed": Match environments containing \boxed{...}
- "plain": Match any LaTeX content
Returns:
str: Regex pattern for matching LaTeX environments with percent suffix
"""
percent_re_group = rf"(?P<{prefix}percent>(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"
# Define base content patterns
display_dollar_content = r"(?:[^$]|\$(?!\$))"
# Either \ not followed by ] or everything but \
display_content_bracket = r"(?:[^\\]|\\(?!\]))"
inline_dollar_content = r"(?:\\[$]|[^\n$])"
inline_content_parenthesis = r"(?:[^\\\n]|\\(?!\)))"
inline_content_bracket = r"[^\n\]\[]"
if context == "boxed":
# Rewrite patterns to optionally include boxed content
display_dollar_content = (
rf"{display_dollar_content}*?\\boxed{{{display_dollar_content}+?}}{display_dollar_content}*?"
)
display_content_bracket = (
rf"{display_content_bracket}*?\\boxed{{{display_content_bracket}+?}}{display_content_bracket}*?"
)
inline_dollar_content = (
rf"{inline_dollar_content}*?\\boxed{{{inline_dollar_content}+?}}{inline_dollar_content}*?"
)
inline_content_parenthesis = (
rf"{inline_content_parenthesis}*?\\boxed{{{inline_content_parenthesis}+?}}{inline_content_parenthesis}*?"
)
inline_content_bracket = (
rf"{inline_content_bracket}*?\\boxed{{{inline_content_bracket}+?}}{inline_content_bracket}*?"
)
else:
display_dollar_content = rf"{display_dollar_content}+?"
display_content_bracket = rf"{display_content_bracket}+?"
inline_dollar_content = rf"{inline_dollar_content}+?"
inline_content_parenthesis = rf"{inline_content_parenthesis}+?"
inline_content_bracket = rf"{inline_content_bracket}+?"
# Build list of regex patterns
patterns = [
# Display math environments (allow multiline)
rf"(?<!\\)\$\$(?P<{prefix}latexDisplayDollar>{display_dollar_content})(?<!\\)\$\$",
rf"(?<!\\)\\\[(?P<{prefix}latexDisplayBracket>{display_content_bracket})(?<!\\)\\\]",
# Inline math environments (single line only)
rf"(?<!\\|\d)\$(?P<{prefix}latexInlineDollar>{inline_dollar_content})(?<!\\)\$",
rf"(?<!\\)\\\((?P<{prefix}latexInlineParenthesis>{inline_content_parenthesis})(?<!\\)\\\)",
rf"\s\[(?P<{prefix}latexInlineBracket>{inline_content_bracket})\]\s",
]
if context == "plain":
simple_number = r"-?\d+(?:[.,]\d+)?"
patterns.append(rf"(?P<{prefix}latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})")
# Join patterns with | and wrap in parentheses
latex_env_re = rf"(?:(?:{'|'.join(patterns)})\s*{percent_re_group}?)"
return latex_env_re