def simple_format()

in bigquery_etl/format_sql/formatter.py [0:0]


def simple_format(tokens, indent="  "):
    """Format tokens in a single pass."""
    first_token = True
    require_newline_before_next_token = False
    allow_space_before_next_bracket = False
    allow_space_before_next_token = False
    prev_was_block_end = False
    prev_was_jinja = False
    prev_was_statement_separator = False
    prev_was_unary_operator = False
    next_operator_is_unary = True
    indent_types = []
    can_format = True
    for token in tokens:
        # skip original whitespace tokens, unless formatting is disabled
        if isinstance(token, Whitespace):
            if not can_format:
                yield token
            continue

        # update state for current token
        if isinstance(token, Comment):
            # enable to disable formatting
            if can_format and token.format_off:
                can_format = False
            elif not can_format and token.format_on:
                can_format = True
        elif isinstance(token, ClosingBracket):
            # decrease indent to match last OpeningBracket
            while indent_types and indent_types.pop() is not OpeningBracket:
                pass
        elif isinstance(token, TopLevelKeyword):
            # decrease indent from previous TopLevelKeyword
            if indent_types and indent_types[-1] is TopLevelKeyword:
                indent_types.pop()
        elif isinstance(token, BlockEndKeyword):
            # decrease indent to match last BlockKeyword
            while indent_types and indent_types.pop() is not BlockKeyword:
                pass
            prev_was_statement_separator = False
        elif isinstance(token, JinjaBlockEnd):
            # decrease indent to match last JinjaBlockStart
            while indent_types and indent_types.pop() is not JinjaBlockStart:
                pass
        elif isinstance(token, CaseSubclause):
            if token.value.upper() in ("WHEN", "ELSE"):
                # Have WHEN and ELSE clauses indented one level more than CASE.
                while indent_types and indent_types[-1] is CaseSubclause:
                    indent_types.pop()
        elif isinstance(
            token, (AliasSeparator, ExpressionSeparator, FieldAccessOperator)
        ):
            if prev_was_block_end or prev_was_jinja:
                require_newline_before_next_token = False

        # yield whitespace
        if not can_format or isinstance(token, StatementSeparator) or first_token:
            # except between statements
            # no new whitespace when formatting is disabled
            # no space before statement separator
            # no space before first token
            pass
        elif isinstance(token, Comment):
            # blank line before comments if they start on their own line
            # and come after a statement separator, and before #fail and #warn
            if token.value.startswith("\n") and prev_was_statement_separator:
                yield Whitespace("\n")
            elif re.fullmatch(r"\s*#(fail|warn)", token.value):
                yield Whitespace("\n")
        elif (
            require_newline_before_next_token
            or isinstance(
                token,
                (NewlineKeyword, ClosingBracket, BlockKeyword, JinjaBlockStatement),
            )
            or prev_was_statement_separator
        ):
            if prev_was_statement_separator:
                yield Whitespace("\n")
            yield Whitespace("\n" + indent * len(indent_types))
        elif (
            allow_space_before_next_token
            and (
                allow_space_before_next_bracket or not isinstance(token, OpeningBracket)
            )
            and not isinstance(token, (FieldAccessOperator, ExpressionSeparator))
            and not (
                prev_was_unary_operator and isinstance(token, (Literal, Identifier))
            )
        ):
            yield Whitespace(" ")

        if can_format:
            # uppercase keywords and replace contained whitespace with single spaces
            if isinstance(token, ReservedKeyword):
                token = replace(token, value=re.sub(r"\s+", " ", token.value.upper()))
            # uppercase built-in function names
            elif isinstance(token, BuiltInFunctionIdentifier):
                token = replace(token, value=token.value.upper())

        yield token

        # update state for next token
        require_newline_before_next_token = isinstance(
            token,
            (
                Comment,
                BlockKeyword,
                TopLevelKeyword,
                OpeningBracket,
                ExpressionSeparator,
                StatementSeparator,
                JinjaStatement,
            ),
        )
        allow_space_before_next_token = not isinstance(token, FieldAccessOperator)
        prev_was_block_end = isinstance(token, BlockEndKeyword)
        prev_was_statement_separator = isinstance(token, StatementSeparator)
        prev_was_unary_operator = next_operator_is_unary and isinstance(token, Operator)
        prev_was_jinja = isinstance(
            token, (JinjaExpression, JinjaComment, JinjaStatement)
        )
        if not isinstance(token, Comment):
            # format next operator as unary if there is no preceding argument
            next_operator_is_unary = not isinstance(
                token, (Literal, Identifier, ClosingBracket)
            )
        allow_space_before_next_bracket = isinstance(
            token, (SpaceBeforeBracketKeyword, Operator)
        )
        if isinstance(token, TopLevelKeyword) and token.value == "WITH":
            # don't indent CTE's and don't put the first one on a new line
            require_newline_before_next_token = False
        elif isinstance(token, BlockStartKeyword):
            # increase indent
            indent_types.append(BlockKeyword)
        elif isinstance(token, JinjaBlockStart):
            # increase indent
            indent_types.append(JinjaBlockStart)
        elif isinstance(token, (TopLevelKeyword, OpeningBracket, CaseSubclause)):
            # increase indent
            indent_types.append(type(token))
        elif isinstance(token, StatementSeparator):
            # decrease for previous top level keyword
            if indent_types and indent_types[-1] is TopLevelKeyword:
                indent_types.pop()
        first_token = False