def make_latex_env_pattern()

in src/lighteval/metrics/utils/extractive_match_utils.py [0:0]


def make_latex_env_pattern(prefix: str = "", context: Literal["boxed", "plain"] = "plain") -> str:
    """Creates a LaTeX environment pattern with uniquely prefixed group names.

    Args:
        prefix (str): Prefix to add to group names to make them unique
        context (Literal["boxed", "plain"]): Type of content to match inside the environments
            - "boxed": Match environments containing \boxed{...}
            - "plain": Match any LaTeX content

    Returns:
        str: Regex pattern for matching LaTeX environments with percent suffix
    """
    percent_re_group = rf"(?P<{prefix}percent>(?:\\?%|[Pp]ercent|[Pp]ercentage|[Pp]ct))"

    # Define base content patterns
    display_dollar_content = r"(?:[^$]|\$(?!\$))"
    # Either \ not followed by ] or everything but \
    display_content_bracket = r"(?:[^\\]|\\(?!\]))"
    inline_dollar_content = r"(?:\\[$]|[^\n$])"
    inline_content_parenthesis = r"(?:[^\\\n]|\\(?!\)))"
    inline_content_bracket = r"[^\n\]\[]"

    if context == "boxed":
        # Rewrite patterns to optionally include boxed content
        display_dollar_content = (
            rf"{display_dollar_content}*?\\boxed{{{display_dollar_content}+?}}{display_dollar_content}*?"
        )
        display_content_bracket = (
            rf"{display_content_bracket}*?\\boxed{{{display_content_bracket}+?}}{display_content_bracket}*?"
        )
        inline_dollar_content = (
            rf"{inline_dollar_content}*?\\boxed{{{inline_dollar_content}+?}}{inline_dollar_content}*?"
        )
        inline_content_parenthesis = (
            rf"{inline_content_parenthesis}*?\\boxed{{{inline_content_parenthesis}+?}}{inline_content_parenthesis}*?"
        )
        inline_content_bracket = (
            rf"{inline_content_bracket}*?\\boxed{{{inline_content_bracket}+?}}{inline_content_bracket}*?"
        )
    else:
        display_dollar_content = rf"{display_dollar_content}+?"
        display_content_bracket = rf"{display_content_bracket}+?"
        inline_dollar_content = rf"{inline_dollar_content}+?"
        inline_content_parenthesis = rf"{inline_content_parenthesis}+?"
        inline_content_bracket = rf"{inline_content_bracket}+?"

    # Build list of regex patterns
    patterns = [
        # Display math environments (allow multiline)
        rf"(?<!\\)\$\$(?P<{prefix}latexDisplayDollar>{display_dollar_content})(?<!\\)\$\$",
        rf"(?<!\\)\\\[(?P<{prefix}latexDisplayBracket>{display_content_bracket})(?<!\\)\\\]",
        # Inline math environments (single line only)
        rf"(?<!\\|\d)\$(?P<{prefix}latexInlineDollar>{inline_dollar_content})(?<!\\)\$",
        rf"(?<!\\)\\\((?P<{prefix}latexInlineParenthesis>{inline_content_parenthesis})(?<!\\)\\\)",
        rf"\s\[(?P<{prefix}latexInlineBracket>{inline_content_bracket})\]\s",
    ]
    if context == "plain":
        simple_number = r"-?\d+(?:[.,]\d+)?"
        patterns.append(rf"(?P<{prefix}latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})")

    # Join patterns with | and wrap in parentheses
    latex_env_re = rf"(?:(?:{'|'.join(patterns)})\s*{percent_re_group}?)"

    return latex_env_re