fn next_token()

in src/tokenizer.rs [938:1694]
673 lines of code
71 McCabe index (conditional complexity)

    fn next_token(
        &self,
        chars: &mut State,
        prev_token: Option<&Token>,
    ) -> Result<Option<Token>, TokenizerError> {
        match chars.peek() {
            Some(&ch) => match ch {
                ' ' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)),
                '\t' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Tab)),
                '\n' => self.consume_and_return(chars, Token::Whitespace(Whitespace::Newline)),
                '\r' => {
                    // Emit a single Whitespace::Newline token for \r and \r\n
                    chars.next();
                    if let Some('\n') = chars.peek() {
                        chars.next();
                    }
                    Ok(Some(Token::Whitespace(Whitespace::Newline)))
                }
                // BigQuery and MySQL use b or B for byte string literal, Postgres for bit strings
                b @ 'B' | b @ 'b' if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | MySqlDialect | GenericDialect) =>
                {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('\'') => {
                            if self.dialect.supports_triple_quoted_string() {
                                return self
                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                        chars,
                                        '\'',
                                        false,
                                        Token::SingleQuotedByteStringLiteral,
                                        Token::TripleSingleQuotedByteStringLiteral,
                                    );
                            }
                            let s = self.tokenize_single_quoted_string(chars, '\'', false)?;
                            Ok(Some(Token::SingleQuotedByteStringLiteral(s)))
                        }
                        Some('\"') => {
                            if self.dialect.supports_triple_quoted_string() {
                                return self
                                    .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                        chars,
                                        '"',
                                        false,
                                        Token::DoubleQuotedByteStringLiteral,
                                        Token::TripleDoubleQuotedByteStringLiteral,
                                    );
                            }
                            let s = self.tokenize_single_quoted_string(chars, '\"', false)?;
                            Ok(Some(Token::DoubleQuotedByteStringLiteral(s)))
                        }
                        _ => {
                            // regular identifier starting with an "b" or "B"
                            let s = self.tokenize_word(b, chars);
                            Ok(Some(Token::make_word(&s, None)))
                        }
                    }
                }
                // BigQuery uses r or R for raw string literal
                b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('\'') => self
                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                chars,
                                '\'',
                                false,
                                Token::SingleQuotedRawStringLiteral,
                                Token::TripleSingleQuotedRawStringLiteral,
                            ),
                        Some('\"') => self
                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                chars,
                                '"',
                                false,
                                Token::DoubleQuotedRawStringLiteral,
                                Token::TripleDoubleQuotedRawStringLiteral,
                            ),
                        _ => {
                            // regular identifier starting with an "r" or "R"
                            let s = self.tokenize_word(b, chars);
                            Ok(Some(Token::make_word(&s, None)))
                        }
                    }
                }
                // Redshift uses lower case n for national string literal
                n @ 'N' | n @ 'n' => {
                    chars.next(); // consume, to check the next char
                    match chars.peek() {
                        Some('\'') => {
                            // N'...' - a <national character string literal>
                            let backslash_escape =
                                self.dialect.supports_string_literal_backslash_escape();
                            let s =
                                self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
                            Ok(Some(Token::NationalStringLiteral(s)))
                        }
                        _ => {
                            // regular identifier starting with an "N"
                            let s = self.tokenize_word(n, chars);
                            Ok(Some(Token::make_word(&s, None)))
                        }
                    }
                }
                // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
                x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
                    let starting_loc = chars.location();
                    chars.next(); // consume, to check the next char
                    match chars.peek() {
                        Some('\'') => {
                            let s =
                                self.tokenize_escaped_single_quoted_string(starting_loc, chars)?;
                            Ok(Some(Token::EscapedStringLiteral(s)))
                        }
                        _ => {
                            // regular identifier starting with an "E" or "e"
                            let s = self.tokenize_word(x, chars);
                            Ok(Some(Token::make_word(&s, None)))
                        }
                    }
                }
                // Unicode string literals like U&'first \000A second' are supported in some dialects, including PostgreSQL
                x @ 'u' | x @ 'U' if self.dialect.supports_unicode_string_literal() => {
                    chars.next(); // consume, to check the next char
                    if chars.peek() == Some(&'&') {
                        // we cannot advance the iterator here, as we need to consume the '&' later if the 'u' was an identifier
                        let mut chars_clone = chars.peekable.clone();
                        chars_clone.next(); // consume the '&' in the clone
                        if chars_clone.peek() == Some(&'\'') {
                            chars.next(); // consume the '&' in the original iterator
                            let s = unescape_unicode_single_quoted_string(chars)?;
                            return Ok(Some(Token::UnicodeStringLiteral(s)));
                        }
                    }
                    // regular identifier starting with an "U" or "u"
                    let s = self.tokenize_word(x, chars);
                    Ok(Some(Token::make_word(&s, None)))
                }
                // The spec only allows an uppercase 'X' to introduce a hex
                // string, but PostgreSQL, at least, allows a lowercase 'x' too.
                x @ 'x' | x @ 'X' => {
                    chars.next(); // consume, to check the next char
                    match chars.peek() {
                        Some('\'') => {
                            // X'...' - a <binary string literal>
                            let s = self.tokenize_single_quoted_string(chars, '\'', true)?;
                            Ok(Some(Token::HexStringLiteral(s)))
                        }
                        _ => {
                            // regular identifier starting with an "X"
                            let s = self.tokenize_word(x, chars);
                            Ok(Some(Token::make_word(&s, None)))
                        }
                    }
                }
                // single quoted string
                '\'' => {
                    if self.dialect.supports_triple_quoted_string() {
                        return self
                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                chars,
                                '\'',
                                self.dialect.supports_string_literal_backslash_escape(),
                                Token::SingleQuotedString,
                                Token::TripleSingleQuotedString,
                            );
                    }
                    let s = self.tokenize_single_quoted_string(
                        chars,
                        '\'',
                        self.dialect.supports_string_literal_backslash_escape(),
                    )?;

                    Ok(Some(Token::SingleQuotedString(s)))
                }
                // double quoted string
                '\"' if !self.dialect.is_delimited_identifier_start(ch)
                    && !self.dialect.is_identifier_start(ch) =>
                {
                    if self.dialect.supports_triple_quoted_string() {
                        return self
                            .tokenize_single_or_triple_quoted_string::<fn(String) -> Token>(
                                chars,
                                '"',
                                self.dialect.supports_string_literal_backslash_escape(),
                                Token::DoubleQuotedString,
                                Token::TripleDoubleQuotedString,
                            );
                    }
                    let s = self.tokenize_single_quoted_string(
                        chars,
                        '"',
                        self.dialect.supports_string_literal_backslash_escape(),
                    )?;

                    Ok(Some(Token::DoubleQuotedString(s)))
                }
                // delimited (quoted) identifier
                quote_start if self.dialect.is_delimited_identifier_start(ch) => {
                    let word = self.tokenize_quoted_identifier(quote_start, chars)?;
                    Ok(Some(Token::make_word(&word, Some(quote_start))))
                }
                // Potentially nested delimited (quoted) identifier
                quote_start
                    if self
                        .dialect
                        .is_nested_delimited_identifier_start(quote_start)
                        && self
                            .dialect
                            .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
                            .is_some() =>
                {
                    let Some((quote_start, nested_quote_start)) = self
                        .dialect
                        .peek_nested_delimited_identifier_quotes(chars.peekable.clone())
                    else {
                        return self.tokenizer_error(
                            chars.location(),
                            format!("Expected nested delimiter '{quote_start}' before EOF."),
                        );
                    };

                    let Some(nested_quote_start) = nested_quote_start else {
                        let word = self.tokenize_quoted_identifier(quote_start, chars)?;
                        return Ok(Some(Token::make_word(&word, Some(quote_start))));
                    };

                    let mut word = vec![];
                    let quote_end = Word::matching_end_quote(quote_start);
                    let nested_quote_end = Word::matching_end_quote(nested_quote_start);
                    let error_loc = chars.location();

                    chars.next(); // skip the first delimiter
                    peeking_take_while(chars, |ch| ch.is_whitespace());
                    if chars.peek() != Some(&nested_quote_start) {
                        return self.tokenizer_error(
                            error_loc,
                            format!("Expected nested delimiter '{nested_quote_start}' before EOF."),
                        );
                    }
                    word.push(nested_quote_start.into());
                    word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
                    word.push(nested_quote_end.into());
                    peeking_take_while(chars, |ch| ch.is_whitespace());
                    if chars.peek() != Some(&quote_end) {
                        return self.tokenizer_error(
                            error_loc,
                            format!("Expected close delimiter '{quote_end}' before EOF."),
                        );
                    }
                    chars.next(); // skip close delimiter

                    Ok(Some(Token::make_word(&word.concat(), Some(quote_start))))
                }
                // numbers and period
                '0'..='9' | '.' => {
                    // Some dialects support underscore as number separator
                    // There can only be one at a time and it must be followed by another digit
                    let is_number_separator = |ch: char, next_char: Option<char>| {
                        self.dialect.supports_numeric_literal_underscores()
                            && ch == '_'
                            && next_char.is_some_and(|next_ch| next_ch.is_ascii_hexdigit())
                    };

                    let mut s = peeking_next_take_while(chars, |ch, next_ch| {
                        ch.is_ascii_digit() || is_number_separator(ch, next_ch)
                    });

                    // match binary literal that starts with 0x
                    if s == "0" && chars.peek() == Some(&'x') {
                        chars.next();
                        let s2 = peeking_next_take_while(chars, |ch, next_ch| {
                            ch.is_ascii_hexdigit() || is_number_separator(ch, next_ch)
                        });
                        return Ok(Some(Token::HexStringLiteral(s2)));
                    }

                    // match one period
                    if let Some('.') = chars.peek() {
                        s.push('.');
                        chars.next();
                    }

                    // If the dialect supports identifiers that start with a numeric prefix
                    // and we have now consumed a dot, check if the previous token was a Word.
                    // If so, what follows is definitely not part of a decimal number and
                    // we should yield the dot as a dedicated token so compound identifiers
                    // starting with digits can be parsed correctly.
                    if s == "." && self.dialect.supports_numeric_prefix() {
                        if let Some(Token::Word(_)) = prev_token {
                            return Ok(Some(Token::Period));
                        }
                    }

                    // Consume fractional digits.
                    s += &peeking_next_take_while(chars, |ch, next_ch| {
                        ch.is_ascii_digit() || is_number_separator(ch, next_ch)
                    });

                    // No fraction -> Token::Period
                    if s == "." {
                        return Ok(Some(Token::Period));
                    }

                    // Parse exponent as number
                    let mut exponent_part = String::new();
                    if chars.peek() == Some(&'e') || chars.peek() == Some(&'E') {
                        let mut char_clone = chars.peekable.clone();
                        exponent_part.push(char_clone.next().unwrap());

                        // Optional sign
                        match char_clone.peek() {
                            Some(&c) if matches!(c, '+' | '-') => {
                                exponent_part.push(c);
                                char_clone.next();
                            }
                            _ => (),
                        }

                        match char_clone.peek() {
                            // Definitely an exponent, get original iterator up to speed and use it
                            Some(&c) if c.is_ascii_digit() => {
                                for _ in 0..exponent_part.len() {
                                    chars.next();
                                }
                                exponent_part +=
                                    &peeking_take_while(chars, |ch| ch.is_ascii_digit());
                                s += exponent_part.as_str();
                            }
                            // Not an exponent, discard the work done
                            _ => (),
                        }
                    }

                    // If the dialect supports identifiers that start with a numeric prefix,
                    // we need to check if the value is in fact an identifier and must thus
                    // be tokenized as a word.
                    if self.dialect.supports_numeric_prefix() {
                        if exponent_part.is_empty() {
                            // If it is not a number with an exponent, it may be
                            // an identifier starting with digits.
                            let word =
                                peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));

                            if !word.is_empty() {
                                s += word.as_str();
                                return Ok(Some(Token::make_word(s.as_str(), None)));
                            }
                        } else if prev_token == Some(&Token::Period) {
                            // If the previous token was a period, thus not belonging to a number,
                            // the value we have is part of an identifier.
                            return Ok(Some(Token::make_word(s.as_str(), None)));
                        }
                    }

                    let long = if chars.peek() == Some(&'L') {
                        chars.next();
                        true
                    } else {
                        false
                    };
                    Ok(Some(Token::Number(s, long)))
                }
                // punctuation
                '(' => self.consume_and_return(chars, Token::LParen),
                ')' => self.consume_and_return(chars, Token::RParen),
                ',' => self.consume_and_return(chars, Token::Comma),
                // operators
                '-' => {
                    chars.next(); // consume the '-'

                    match chars.peek() {
                        Some('-') => {
                            let mut is_comment = true;
                            if self.dialect.requires_single_line_comment_whitespace() {
                                is_comment = Some(' ') == chars.peekable.clone().nth(1);
                            }

                            if is_comment {
                                chars.next(); // consume second '-'
                                let comment = self.tokenize_single_line_comment(chars);
                                return Ok(Some(Token::Whitespace(
                                    Whitespace::SingleLineComment {
                                        prefix: "--".to_owned(),
                                        comment,
                                    },
                                )));
                            }

                            self.start_binop(chars, "-", Token::Minus)
                        }
                        Some('>') => {
                            chars.next();
                            match chars.peek() {
                                Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow),
                                _ => self.start_binop(chars, "->", Token::Arrow),
                            }
                        }
                        // a regular '-' operator
                        _ => self.start_binop(chars, "-", Token::Minus),
                    }
                }
                '/' => {
                    chars.next(); // consume the '/'
                    match chars.peek() {
                        Some('*') => {
                            chars.next(); // consume the '*', starting a multi-line comment
                            self.tokenize_multiline_comment(chars)
                        }
                        Some('/') if dialect_of!(self is SnowflakeDialect) => {
                            chars.next(); // consume the second '/', starting a snowflake single-line comment
                            let comment = self.tokenize_single_line_comment(chars);
                            Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                                prefix: "//".to_owned(),
                                comment,
                            })))
                        }
                        Some('/') if dialect_of!(self is DuckDbDialect | GenericDialect) => {
                            self.consume_and_return(chars, Token::DuckIntDiv)
                        }
                        // a regular '/' operator
                        _ => Ok(Some(Token::Div)),
                    }
                }
                '+' => self.consume_and_return(chars, Token::Plus),
                '*' => self.consume_and_return(chars, Token::Mul),
                '%' => {
                    chars.next(); // advance past '%'
                    match chars.peek() {
                        Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)),
                        Some(sch) if self.dialect.is_identifier_start('%') => {
                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
                        }
                        _ => self.start_binop(chars, "%", Token::Mod),
                    }
                }
                '|' => {
                    chars.next(); // consume the '|'
                    match chars.peek() {
                        Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot),
                        Some('|') => {
                            chars.next(); // consume the second '|'
                            match chars.peek() {
                                Some('/') => {
                                    self.consume_for_binop(chars, "||/", Token::PGCubeRoot)
                                }
                                _ => self.start_binop(chars, "||", Token::StringConcat),
                            }
                        }
                        Some('&') if self.dialect.supports_geometric_types() => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('>') => self.consume_for_binop(
                                    chars,
                                    "|&>",
                                    Token::VerticalBarAmpersandRightAngleBracket,
                                ),
                                _ => self.start_binop_opt(chars, "|&", None),
                            }
                        }
                        Some('>') if self.dialect.supports_geometric_types() => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('>') => self.consume_for_binop(
                                    chars,
                                    "|>>",
                                    Token::VerticalBarShiftRight,
                                ),
                                _ => self.start_binop_opt(chars, "|>", None),
                            }
                        }
                        Some('>') if self.dialect.supports_pipe_operator() => {
                            self.consume_for_binop(chars, "|>", Token::VerticalBarRightAngleBracket)
                        }
                        // Bitshift '|' operator
                        _ => self.start_binop(chars, "|", Token::Pipe),
                    }
                }
                '=' => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('>') => self.consume_and_return(chars, Token::RArrow),
                        Some('=') => self.consume_and_return(chars, Token::DoubleEq),
                        _ => Ok(Some(Token::Eq)),
                    }
                }
                '!' => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('=') => self.consume_and_return(chars, Token::Neq),
                        Some('!') => self.consume_and_return(chars, Token::DoubleExclamationMark),
                        Some('~') => {
                            chars.next();
                            match chars.peek() {
                                Some('*') => self
                                    .consume_and_return(chars, Token::ExclamationMarkTildeAsterisk),
                                Some('~') => {
                                    chars.next();
                                    match chars.peek() {
                                        Some('*') => self.consume_and_return(
                                            chars,
                                            Token::ExclamationMarkDoubleTildeAsterisk,
                                        ),
                                        _ => Ok(Some(Token::ExclamationMarkDoubleTilde)),
                                    }
                                }
                                _ => Ok(Some(Token::ExclamationMarkTilde)),
                            }
                        }
                        _ => Ok(Some(Token::ExclamationMark)),
                    }
                }
                '<' => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('=') => {
                            chars.next();
                            match chars.peek() {
                                Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship),
                                _ => self.start_binop(chars, "<=", Token::LtEq),
                            }
                        }
                        Some('|') if self.dialect.supports_geometric_types() => {
                            self.consume_for_binop(chars, "<<|", Token::ShiftLeftVerticalBar)
                        }
                        Some('>') => self.consume_for_binop(chars, "<>", Token::Neq),
                        Some('<') if self.dialect.supports_geometric_types() => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('|') => self.consume_for_binop(
                                    chars,
                                    "<<|",
                                    Token::ShiftLeftVerticalBar,
                                ),
                                _ => self.start_binop(chars, "<<", Token::ShiftLeft),
                            }
                        }
                        Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft),
                        Some('-') if self.dialect.supports_geometric_types() => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('>') => {
                                    self.consume_for_binop(chars, "<->", Token::TwoWayArrow)
                                }
                                _ => self.start_binop_opt(chars, "<-", None),
                            }
                        }
                        Some('^') if self.dialect.supports_geometric_types() => {
                            self.consume_for_binop(chars, "<^", Token::LeftAngleBracketCaret)
                        }
                        Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt),
                        _ => self.start_binop(chars, "<", Token::Lt),
                    }
                }
                '>' => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq),
                        Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight),
                        Some('^') if self.dialect.supports_geometric_types() => {
                            self.consume_for_binop(chars, ">^", Token::RightAngleBracketCaret)
                        }
                        _ => self.start_binop(chars, ">", Token::Gt),
                    }
                }
                ':' => {
                    chars.next();
                    match chars.peek() {
                        Some(':') => self.consume_and_return(chars, Token::DoubleColon),
                        Some('=') => self.consume_and_return(chars, Token::Assignment),
                        _ => Ok(Some(Token::Colon)),
                    }
                }
                ';' => self.consume_and_return(chars, Token::SemiColon),
                '\\' => self.consume_and_return(chars, Token::Backslash),
                '[' => self.consume_and_return(chars, Token::LBracket),
                ']' => self.consume_and_return(chars, Token::RBracket),
                '&' => {
                    chars.next(); // consume the '&'
                    match chars.peek() {
                        Some('>') if self.dialect.supports_geometric_types() => {
                            chars.next();
                            self.consume_and_return(chars, Token::AmpersandRightAngleBracket)
                        }
                        Some('<') if self.dialect.supports_geometric_types() => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('|') => self.consume_and_return(
                                    chars,
                                    Token::AmpersandLeftAngleBracketVerticalBar,
                                ),
                                _ => {
                                    self.start_binop(chars, "&<", Token::AmpersandLeftAngleBracket)
                                }
                            }
                        }
                        Some('&') => {
                            chars.next(); // consume the second '&'
                            self.start_binop(chars, "&&", Token::Overlap)
                        }
                        // Bitshift '&' operator
                        _ => self.start_binop(chars, "&", Token::Ampersand),
                    }
                }
                '^' => {
                    chars.next(); // consume the '^'
                    match chars.peek() {
                        Some('@') => self.consume_and_return(chars, Token::CaretAt),
                        _ => Ok(Some(Token::Caret)),
                    }
                }
                '{' => self.consume_and_return(chars, Token::LBrace),
                '}' => self.consume_and_return(chars, Token::RBrace),
                '#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
                {
                    chars.next(); // consume the '#', starting a snowflake single-line comment
                    let comment = self.tokenize_single_line_comment(chars);
                    Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
                        prefix: "#".to_owned(),
                        comment,
                    })))
                }
                '~' => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk),
                        Some('=') if self.dialect.supports_geometric_types() => {
                            self.consume_for_binop(chars, "~=", Token::TildeEqual)
                        }
                        Some('~') => {
                            chars.next();
                            match chars.peek() {
                                Some('*') => {
                                    self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk)
                                }
                                _ => self.start_binop(chars, "~~", Token::DoubleTilde),
                            }
                        }
                        _ => self.start_binop(chars, "~", Token::Tilde),
                    }
                }
                '#' => {
                    chars.next();
                    match chars.peek() {
                        Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus),
                        Some('>') => {
                            chars.next();
                            match chars.peek() {
                                Some('>') => {
                                    self.consume_for_binop(chars, "#>>", Token::HashLongArrow)
                                }
                                _ => self.start_binop(chars, "#>", Token::HashArrow),
                            }
                        }
                        Some(' ') => Ok(Some(Token::Sharp)),
                        Some('#') if self.dialect.supports_geometric_types() => {
                            self.consume_for_binop(chars, "##", Token::DoubleSharp)
                        }
                        Some(sch) if self.dialect.is_identifier_start('#') => {
                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
                        }
                        _ => self.start_binop(chars, "#", Token::Sharp),
                    }
                }
                '@' => {
                    chars.next();
                    match chars.peek() {
                        Some('@') if self.dialect.supports_geometric_types() => {
                            self.consume_and_return(chars, Token::AtAt)
                        }
                        Some('-') if self.dialect.supports_geometric_types() => {
                            chars.next();
                            match chars.peek() {
                                Some('@') => self.consume_and_return(chars, Token::AtDashAt),
                                _ => self.start_binop_opt(chars, "@-", None),
                            }
                        }
                        Some('>') => self.consume_and_return(chars, Token::AtArrow),
                        Some('?') => self.consume_and_return(chars, Token::AtQuestion),
                        Some('@') => {
                            chars.next();
                            match chars.peek() {
                                Some(' ') => Ok(Some(Token::AtAt)),
                                Some(tch) if self.dialect.is_identifier_start('@') => {
                                    self.tokenize_identifier_or_keyword([ch, '@', *tch], chars)
                                }
                                _ => Ok(Some(Token::AtAt)),
                            }
                        }
                        Some(' ') => Ok(Some(Token::AtSign)),
                        // We break on quotes here, because no dialect allows identifiers starting
                        // with @ and containing quotation marks (e.g. `@'foo'`) unless they are
                        // quoted, which is tokenized as a quoted string, not here (e.g.
                        // `"@'foo'"`). Further, at least two dialects parse `@` followed by a
                        // quoted string as two separate tokens, which this allows. For example,
                        // Postgres parses `@'1'` as the absolute value of '1' which is implicitly
                        // cast to a numeric type. And when parsing MySQL-style grantees (e.g.
                        // `GRANT ALL ON *.* to 'root'@'localhost'`), we also want separate tokens
                        // for the user, the `@`, and the host.
                        Some('\'') => Ok(Some(Token::AtSign)),
                        Some('\"') => Ok(Some(Token::AtSign)),
                        Some('`') => Ok(Some(Token::AtSign)),
                        Some(sch) if self.dialect.is_identifier_start('@') => {
                            self.tokenize_identifier_or_keyword([ch, *sch], chars)
                        }
                        _ => Ok(Some(Token::AtSign)),
                    }
                }
                // Postgres uses ? for jsonb operators, not prepared statements
                '?' if self.dialect.supports_geometric_types() => {
                    chars.next(); // consume
                    match chars.peek() {
                        Some('|') => {
                            chars.next();
                            match chars.peek() {
                                Some('|') => self.consume_and_return(
                                    chars,
                                    Token::QuestionMarkDoubleVerticalBar,
                                ),
                                _ => Ok(Some(Token::QuestionPipe)),
                            }
                        }

                        Some('&') => self.consume_and_return(chars, Token::QuestionAnd),
                        Some('-') => {
                            chars.next(); // consume
                            match chars.peek() {
                                Some('|') => self
                                    .consume_and_return(chars, Token::QuestionMarkDashVerticalBar),
                                _ => Ok(Some(Token::QuestionMarkDash)),
                            }
                        }
                        Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp),
                        _ => self.consume_and_return(chars, Token::Question),
                    }
                }
                '?' => {
                    chars.next();
                    let s = peeking_take_while(chars, |ch| ch.is_numeric());
                    Ok(Some(Token::Placeholder(String::from("?") + &s)))
                }

                // identifier or keyword
                ch if self.dialect.is_identifier_start(ch) => {
                    self.tokenize_identifier_or_keyword([ch], chars)
                }
                '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),

                // whitespace check (including unicode chars) should be last as it covers some of the chars above
                ch if ch.is_whitespace() => {
                    self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
                }
                other => self.consume_and_return(chars, Token::Char(other)),
            },
            None => Ok(None),
        }
    }