in bigquery_etl/format_sql/formatter.py [0:0]
def simple_format(tokens, indent=" "):
"""Format tokens in a single pass."""
first_token = True
require_newline_before_next_token = False
allow_space_before_next_bracket = False
allow_space_before_next_token = False
prev_was_block_end = False
prev_was_jinja = False
prev_was_statement_separator = False
prev_was_unary_operator = False
next_operator_is_unary = True
indent_types = []
can_format = True
for token in tokens:
# skip original whitespace tokens, unless formatting is disabled
if isinstance(token, Whitespace):
if not can_format:
yield token
continue
# update state for current token
if isinstance(token, Comment):
# enable to disable formatting
if can_format and token.format_off:
can_format = False
elif not can_format and token.format_on:
can_format = True
elif isinstance(token, ClosingBracket):
# decrease indent to match last OpeningBracket
while indent_types and indent_types.pop() is not OpeningBracket:
pass
elif isinstance(token, TopLevelKeyword):
# decrease indent from previous TopLevelKeyword
if indent_types and indent_types[-1] is TopLevelKeyword:
indent_types.pop()
elif isinstance(token, BlockEndKeyword):
# decrease indent to match last BlockKeyword
while indent_types and indent_types.pop() is not BlockKeyword:
pass
prev_was_statement_separator = False
elif isinstance(token, JinjaBlockEnd):
# decrease indent to match last JinjaBlockStart
while indent_types and indent_types.pop() is not JinjaBlockStart:
pass
elif isinstance(token, CaseSubclause):
if token.value.upper() in ("WHEN", "ELSE"):
# Have WHEN and ELSE clauses indented one level more than CASE.
while indent_types and indent_types[-1] is CaseSubclause:
indent_types.pop()
elif isinstance(
token, (AliasSeparator, ExpressionSeparator, FieldAccessOperator)
):
if prev_was_block_end or prev_was_jinja:
require_newline_before_next_token = False
# yield whitespace
if not can_format or isinstance(token, StatementSeparator) or first_token:
# except between statements
# no new whitespace when formatting is disabled
# no space before statement separator
# no space before first token
pass
elif isinstance(token, Comment):
# blank line before comments if they start on their own line
# and come after a statement separator, and before #fail and #warn
if token.value.startswith("\n") and prev_was_statement_separator:
yield Whitespace("\n")
elif re.fullmatch(r"\s*#(fail|warn)", token.value):
yield Whitespace("\n")
elif (
require_newline_before_next_token
or isinstance(
token,
(NewlineKeyword, ClosingBracket, BlockKeyword, JinjaBlockStatement),
)
or prev_was_statement_separator
):
if prev_was_statement_separator:
yield Whitespace("\n")
yield Whitespace("\n" + indent * len(indent_types))
elif (
allow_space_before_next_token
and (
allow_space_before_next_bracket or not isinstance(token, OpeningBracket)
)
and not isinstance(token, (FieldAccessOperator, ExpressionSeparator))
and not (
prev_was_unary_operator and isinstance(token, (Literal, Identifier))
)
):
yield Whitespace(" ")
if can_format:
# uppercase keywords and replace contained whitespace with single spaces
if isinstance(token, ReservedKeyword):
token = replace(token, value=re.sub(r"\s+", " ", token.value.upper()))
# uppercase built-in function names
elif isinstance(token, BuiltInFunctionIdentifier):
token = replace(token, value=token.value.upper())
yield token
# update state for next token
require_newline_before_next_token = isinstance(
token,
(
Comment,
BlockKeyword,
TopLevelKeyword,
OpeningBracket,
ExpressionSeparator,
StatementSeparator,
JinjaStatement,
),
)
allow_space_before_next_token = not isinstance(token, FieldAccessOperator)
prev_was_block_end = isinstance(token, BlockEndKeyword)
prev_was_statement_separator = isinstance(token, StatementSeparator)
prev_was_unary_operator = next_operator_is_unary and isinstance(token, Operator)
prev_was_jinja = isinstance(
token, (JinjaExpression, JinjaComment, JinjaStatement)
)
if not isinstance(token, Comment):
# format next operator as unary if there is no preceding argument
next_operator_is_unary = not isinstance(
token, (Literal, Identifier, ClosingBracket)
)
allow_space_before_next_bracket = isinstance(
token, (SpaceBeforeBracketKeyword, Operator)
)
if isinstance(token, TopLevelKeyword) and token.value == "WITH":
# don't indent CTE's and don't put the first one on a new line
require_newline_before_next_token = False
elif isinstance(token, BlockStartKeyword):
# increase indent
indent_types.append(BlockKeyword)
elif isinstance(token, JinjaBlockStart):
# increase indent
indent_types.append(JinjaBlockStart)
elif isinstance(token, (TopLevelKeyword, OpeningBracket, CaseSubclause)):
# increase indent
indent_types.append(type(token))
elif isinstance(token, StatementSeparator):
# decrease for previous top level keyword
if indent_types and indent_types[-1] is TopLevelKeyword:
indent_types.pop()
first_token = False