def normalize

def normalize_latex()

in src/latex2sympy2_extended/math_normalization.py [0:0]
40 lines of code
23 McCabe index (conditional complexity)

def normalize_latex(text: str, config: NormalizationConfig) -> str:
    """Normalize latex string according to the provided configuration.
    
    Args:
        text: The latex string to normalize
        config: Configuration controlling which normalizations to apply
        
    Returns:
        The normalized latex string
    """
    if config.boxed == "all" or config.boxed == "last":
        text = extract_boxed_content(text, mode=config.boxed)

    if config.basic_latex:
        # Basic latex command replacements
        text = text.replace(r'\mathrm{T}', 'T')
        text = text.replace(r'\mathrm{d}', 'd').replace(r'{\rm d}', 'd')
        text = text.replace(r'\left[\begin{matrix}', r'\begin{bmatrix}').replace(r'\end{matrix}\right]', r'\end{bmatrix}')
        text = r_left.sub(r'\1', text)
        text = r_right.sub(r'\1', text)
        text = permutation_regex.sub(r"\\frac{(\1)!}{((\1)-(\2))!}", text)
        
        # Remove useless latex commands
        text = to_remove_regex.sub("", text)
        text = replace_in_latex(text)
        
        # Remove new lines and simplify tabs
        text = text.replace("\n", " ").replace("\t", " ")
        
        # Fix doubled backslashes in commands
        if "matrix" not in text:
            text = command_slash_fix_regex.sub(r"\\", text)
    
    if config.equations:
        logger.warning("equations is deprecated, as it handled by the parser now")
        # This is to ensure that a=1,b=2 is not splitted
        if not "," in text and not ";" in text:
            eq_parts = equation_split_regex.split(text)
            # We only shorten if there are more than 2 parts, otherwise we keep equation as is
            if len(eq_parts) > 2:
                text = eq_parts[-1]
    
    if config.units:
        # Remove the units and possibly the superscript
        _text = unit_superscript_regex.sub("", text).strip()
        if _text != "" and _text != text:
            text = _text
            
        # Remove unit texts
        for _ in range(2):
            _text = units_regex.sub(r"\1", text)
            if _text != "" and _text != text:
                text = _text
        
        # This can trigger empty \text{...}
        # Make sure not to remove space this created
    
    if config.nits:
        # Fix leading decimal
        if len(text) > 0 and text[0] == ".":
            text = "0" + text
            
        # Fix 0.5 to fraction
        if text == "0.5":
            text = "\\frac{1}{2}"
    
    if config.malformed_operators:
        # Fix malformed operators
        text = _fix_malformed_operators(text)
        text = _fix_sqrt(text)
        text = _fix_fracs(text)
        text = _fix_a_slash_b(text)
    
    return text.strip()