fn get_next_token_inner()

in rhai/src/tokenizer.rs [1536:2243]


fn get_next_token_inner(
    stream: &mut impl InputStream,
    state: &mut TokenizeState,
    pos: &mut Position,
) -> Option<(Token, Position)> {
    state.last_token.as_mut().map(SmartString::clear);

    // Still inside a comment?
    if state.comment_level > 0 {
        let start_pos = *pos;
        let mut comment = state.include_comments.then(|| String::new());

        state.comment_level =
            scan_block_comment(stream, state.comment_level, pos, comment.as_mut());

        let return_comment = state.include_comments;

        #[cfg(not(feature = "no_function"))]
        #[cfg(feature = "metadata")]
        let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));

        if return_comment {
            return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
        }
        if state.comment_level > 0 {
            // Reached EOF without ending comment block
            return None;
        }
    }

    // Within text?
    if let Some(ch) = state.is_within_text_terminated_by.take() {
        return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
            |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
            |(result, interpolated, start_pos)| {
                if interpolated {
                    Some((Token::InterpolatedString(result.into()), start_pos))
                } else {
                    Some((Token::StringConstant(result.into()), start_pos))
                }
            },
        );
    }

    let mut negated: Option<Position> = None;

    while let Some(c) = stream.get_next() {
        pos.advance();

        let start_pos = *pos;
        let cc = stream.peek_next().unwrap_or('\0');

        // Identifiers and strings that can have non-ASCII characters
        match (c, cc) {
            // \n
            ('\n', ..) => pos.new_line(),

            // digit ...
            ('0'..='9', ..) => {
                let mut result = SmartString::new_const();
                let mut radix_base: Option<u32> = None;
                let mut valid: fn(char) -> bool = is_numeric_digit;
                result.push(c);

                while let Some(next_char) = stream.peek_next() {
                    match next_char {
                        NUMBER_SEPARATOR => {
                            eat_next_and_advance(stream, pos);
                        }
                        ch if valid(ch) => {
                            result.push(next_char);
                            eat_next_and_advance(stream, pos);
                        }
                        #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
                        '.' => {
                            stream.get_next().unwrap();

                            // Check if followed by digits or something that cannot start a property name
                            match stream.peek_next().unwrap_or('\0') {
                                // digits after period - accept the period
                                '0'..='9' => {
                                    result.push(next_char);
                                    pos.advance();
                                }
                                // _ - cannot follow a decimal point
                                NUMBER_SEPARATOR => {
                                    stream.unget(next_char);
                                    break;
                                }
                                // .. - reserved symbol, not a floating-point number
                                '.' => {
                                    stream.unget(next_char);
                                    break;
                                }
                                // symbol after period - probably a float
                                ch if !is_id_first_alphabetic(ch) => {
                                    result.push(next_char);
                                    pos.advance();
                                    result.push('0');
                                }
                                // Not a floating-point number
                                _ => {
                                    stream.unget(next_char);
                                    break;
                                }
                            }
                        }
                        #[cfg(not(feature = "no_float"))]
                        'e' => {
                            stream.get_next().expect("`e`");

                            // Check if followed by digits or +/-
                            match stream.peek_next().unwrap_or('\0') {
                                // digits after e - accept the e
                                '0'..='9' => {
                                    result.push(next_char);
                                    pos.advance();
                                }
                                // +/- after e - accept the e and the sign
                                '+' | '-' => {
                                    result.push(next_char);
                                    pos.advance();
                                    result.push(stream.get_next().unwrap());
                                    pos.advance();
                                }
                                // Not a floating-point number
                                _ => {
                                    stream.unget(next_char);
                                    break;
                                }
                            }
                        }
                        // 0x????, 0o????, 0b???? at beginning
                        ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
                            if c == '0' && result.len() <= 1 =>
                        {
                            result.push(next_char);
                            eat_next_and_advance(stream, pos);

                            valid = match ch {
                                'x' | 'X' => is_hex_digit,
                                'o' | 'O' => is_numeric_digit,
                                'b' | 'B' => is_numeric_digit,
                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
                            };

                            radix_base = Some(match ch {
                                'x' | 'X' => 16,
                                'o' | 'O' => 8,
                                'b' | 'B' => 2,
                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
                            });
                        }

                        _ => break,
                    }
                }

                let num_pos = negated.map_or(start_pos, |negated_pos| {
                    result.insert(0, '-');
                    negated_pos
                });

                if let Some(ref mut last) = state.last_token {
                    *last = result.clone();
                }

                // Parse number
                let token = radix_base.map_or_else(
                    || {
                        let num = INT::from_str(&result).map(Token::IntegerConstant);

                        // If integer parsing is unnecessary, try float instead
                        #[cfg(not(feature = "no_float"))]
                        let num = num.or_else(|_| {
                            crate::types::FloatWrapper::from_str(&result).map(Token::FloatConstant)
                        });

                        // Then try decimal
                        #[cfg(feature = "decimal")]
                        let num = num.or_else(|_| {
                            rust_decimal::Decimal::from_str(&result)
                                .map(Box::new)
                                .map(Token::DecimalConstant)
                        });

                        // Then try decimal in scientific notation
                        #[cfg(feature = "decimal")]
                        let num = num.or_else(|_| {
                            rust_decimal::Decimal::from_scientific(&result)
                                .map(Box::new)
                                .map(Token::DecimalConstant)
                        });

                        num.unwrap_or_else(|_| {
                            Token::LexError(LERR::MalformedNumber(result.to_string()).into())
                        })
                    },
                    |radix| {
                        let result = &result[2..];

                        UNSIGNED_INT::from_str_radix(result, radix)
                            .map(|v| v as INT)
                            .map_or_else(
                                |_| {
                                    Token::LexError(
                                        LERR::MalformedNumber(result.to_string()).into(),
                                    )
                                },
                                Token::IntegerConstant,
                            )
                    },
                );

                return Some((token, num_pos));
            }

            // " - string literal
            ('"', ..) => {
                return parse_string_literal(stream, state, pos, c, false, true, false)
                    .map_or_else(
                        |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
                        |(result, ..)| Some((Token::StringConstant(result.into()), start_pos)),
                    );
            }
            // ` - string literal
            ('`', ..) => {
                // Start from the next line if at the end of line
                match stream.peek_next() {
                    // `\r - start from next line
                    Some('\r') => {
                        eat_next_and_advance(stream, pos);
                        // `\r\n
                        if stream.peek_next() == Some('\n') {
                            eat_next_and_advance(stream, pos);
                        }
                        pos.new_line();
                    }
                    // `\n - start from next line
                    Some('\n') => {
                        eat_next_and_advance(stream, pos);
                        pos.new_line();
                    }
                    _ => (),
                }

                return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
                    |(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
                    |(result, interpolated, ..)| {
                        if interpolated {
                            Some((Token::InterpolatedString(result.into()), start_pos))
                        } else {
                            Some((Token::StringConstant(result.into()), start_pos))
                        }
                    },
                );
            }

            // ' - character literal
            ('\'', '\'') => {
                return Some((
                    Token::LexError(LERR::MalformedChar(String::new()).into()),
                    start_pos,
                ))
            }
            ('\'', ..) => {
                return Some(
                    parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
                        |(result, ..)| {
                            let mut chars = result.chars();
                            let first = chars.next().unwrap();

                            if chars.next().is_some() {
                                (
                                    Token::LexError(LERR::MalformedChar(result.to_string()).into()),
                                    start_pos,
                                )
                            } else {
                                (Token::CharConstant(first), start_pos)
                            }
                        },
                    ),
                )
            }

            // Braces
            ('{', ..) => return Some((Token::LeftBrace, start_pos)),
            ('}', ..) => return Some((Token::RightBrace, start_pos)),

            // Unit
            ('(', ')') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Unit, start_pos));
            }

            // Parentheses
            ('(', '*') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("(*".into())), start_pos));
            }
            ('(', ..) => return Some((Token::LeftParen, start_pos)),
            (')', ..) => return Some((Token::RightParen, start_pos)),

            // Indexing
            ('[', ..) => return Some((Token::LeftBracket, start_pos)),
            (']', ..) => return Some((Token::RightBracket, start_pos)),

            // Map literal
            #[cfg(not(feature = "no_object"))]
            ('#', '{') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::MapStart, start_pos));
            }
            // Shebang
            ('#', '!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)),

            ('#', ' ') => {
                eat_next_and_advance(stream, pos);
                let token = if stream.peek_next() == Some('{') {
                    eat_next_and_advance(stream, pos);
                    "# {"
                } else {
                    "#"
                };
                return Some((Token::Reserved(Box::new(token.into())), start_pos));
            }

            ('#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)),

            // Operators
            ('+', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::PlusAssign, start_pos));
            }
            ('+', '+') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("++".into())), start_pos));
            }
            ('+', ..) if !state.next_token_cannot_be_unary => {
                return Some((Token::UnaryPlus, start_pos))
            }
            ('+', ..) => return Some((Token::Plus, start_pos)),

            ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
            ('-', '0'..='9') => return Some((Token::Minus, start_pos)),
            ('-', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::MinusAssign, start_pos));
            }
            ('-', '>') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("->".into())), start_pos));
            }
            ('-', '-') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("--".into())), start_pos));
            }
            ('-', ..) if !state.next_token_cannot_be_unary => {
                return Some((Token::UnaryMinus, start_pos))
            }
            ('-', ..) => return Some((Token::Minus, start_pos)),

            ('*', ')') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("*)".into())), start_pos));
            }
            ('*', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::MultiplyAssign, start_pos));
            }
            ('*', '*') => {
                eat_next_and_advance(stream, pos);

                return Some((
                    if stream.peek_next() == Some('=') {
                        eat_next_and_advance(stream, pos);
                        Token::PowerOfAssign
                    } else {
                        Token::PowerOf
                    },
                    start_pos,
                ));
            }
            ('*', ..) => return Some((Token::Multiply, start_pos)),

            // Comments
            ('/', '/') => {
                eat_next_and_advance(stream, pos);

                let mut comment: Option<String> = match stream.peek_next() {
                    #[cfg(not(feature = "no_function"))]
                    #[cfg(feature = "metadata")]
                    Some('/') => {
                        eat_next_and_advance(stream, pos);

                        // Long streams of `///...` are not doc-comments
                        match stream.peek_next() {
                            Some('/') => None,
                            _ => Some("///".into()),
                        }
                    }
                    #[cfg(feature = "metadata")]
                    Some('!') => {
                        eat_next_and_advance(stream, pos);
                        Some("//!".into())
                    }
                    _ if state.include_comments => Some("//".into()),
                    _ => None,
                };

                while let Some(c) = stream.get_next() {
                    if c == '\r' {
                        // \r\n
                        if stream.peek_next() == Some('\n') {
                            eat_next_and_advance(stream, pos);
                        }
                        pos.new_line();
                        break;
                    }
                    if c == '\n' {
                        pos.new_line();
                        break;
                    }
                    if let Some(comment) = comment.as_mut() {
                        comment.push(c);
                    }
                    pos.advance();
                }

                if let Some(comment) = comment {
                    match comment {
                        #[cfg(feature = "metadata")]
                        _ if comment.starts_with("//!") => {
                            let g = &mut state.tokenizer_control.borrow_mut().global_comments;
                            if !g.is_empty() {
                                g.push('\n');
                            }
                            g.push_str(&comment);
                        }
                        _ => return Some((Token::Comment(comment.into()), start_pos)),
                    }
                }
            }
            ('/', '*') => {
                state.comment_level = 1;
                eat_next_and_advance(stream, pos);

                let mut comment: Option<String> = match stream.peek_next() {
                    #[cfg(not(feature = "no_function"))]
                    #[cfg(feature = "metadata")]
                    Some('*') => {
                        eat_next_and_advance(stream, pos);

                        // Long streams of `/****...` are not doc-comments
                        match stream.peek_next() {
                            Some('*') => None,
                            _ => Some("/**".into()),
                        }
                    }
                    _ if state.include_comments => Some("/*".into()),
                    _ => None,
                };

                state.comment_level =
                    scan_block_comment(stream, state.comment_level, pos, comment.as_mut());

                if let Some(comment) = comment {
                    return Some((Token::Comment(comment.into()), start_pos));
                }
            }

            ('/', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::DivideAssign, start_pos));
            }
            ('/', ..) => return Some((Token::Divide, start_pos)),

            (';', ..) => return Some((Token::SemiColon, start_pos)),
            (',', ..) => return Some((Token::Comma, start_pos)),

            ('.', '.') => {
                eat_next_and_advance(stream, pos);
                return Some((
                    match stream.peek_next() {
                        Some('.') => {
                            eat_next_and_advance(stream, pos);
                            Token::Reserved(Box::new("...".into()))
                        }
                        Some('=') => {
                            eat_next_and_advance(stream, pos);
                            Token::InclusiveRange
                        }
                        _ => Token::ExclusiveRange,
                    },
                    start_pos,
                ));
            }
            ('.', ..) => return Some((Token::Period, start_pos)),

            ('=', '=') => {
                eat_next_and_advance(stream, pos);

                if stream.peek_next() == Some('=') {
                    eat_next_and_advance(stream, pos);
                    return Some((Token::Reserved(Box::new("===".into())), start_pos));
                }

                return Some((Token::EqualsTo, start_pos));
            }
            ('=', '>') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::DoubleArrow, start_pos));
            }
            ('=', ..) => return Some((Token::Equals, start_pos)),

            #[cfg(not(feature = "no_module"))]
            (':', ':') => {
                eat_next_and_advance(stream, pos);

                if stream.peek_next() == Some('<') {
                    eat_next_and_advance(stream, pos);
                    return Some((Token::Reserved(Box::new("::<".into())), start_pos));
                }

                return Some((Token::DoubleColon, start_pos));
            }
            (':', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new(":=".into())), start_pos));
            }
            (':', ';') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new(":;".into())), start_pos));
            }
            (':', ..) => return Some((Token::Colon, start_pos)),

            ('<', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::LessThanEqualsTo, start_pos));
            }
            ('<', '-') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("<-".into())), start_pos));
            }
            ('<', '<') => {
                eat_next_and_advance(stream, pos);

                return Some((
                    if stream.peek_next() == Some('=') {
                        eat_next_and_advance(stream, pos);
                        Token::LeftShiftAssign
                    } else {
                        Token::LeftShift
                    },
                    start_pos,
                ));
            }
            ('<', '|') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("<|".into())), start_pos));
            }
            ('<', ..) => return Some((Token::LessThan, start_pos)),

            ('>', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::GreaterThanEqualsTo, start_pos));
            }
            ('>', '>') => {
                eat_next_and_advance(stream, pos);

                return Some((
                    if stream.peek_next() == Some('=') {
                        eat_next_and_advance(stream, pos);
                        Token::RightShiftAssign
                    } else {
                        Token::RightShift
                    },
                    start_pos,
                ));
            }
            ('>', ..) => return Some((Token::GreaterThan, start_pos)),

            ('!', 'i') => {
                stream.get_next().unwrap();
                if stream.peek_next() == Some('n') {
                    stream.get_next().unwrap();
                    match stream.peek_next() {
                        Some(c) if is_id_continue(c) => {
                            stream.unget('n');
                            stream.unget('i');
                            return Some((Token::Bang, start_pos));
                        }
                        _ => {
                            pos.advance();
                            pos.advance();
                            return Some((Token::NotIn, start_pos));
                        }
                    }
                }

                stream.unget('i');
                return Some((Token::Bang, start_pos));
            }
            ('!', '=') => {
                eat_next_and_advance(stream, pos);

                if stream.peek_next() == Some('=') {
                    eat_next_and_advance(stream, pos);
                    return Some((Token::Reserved(Box::new("!==".into())), start_pos));
                }

                return Some((Token::NotEqualsTo, start_pos));
            }
            ('!', '.') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("!.".into())), start_pos));
            }
            ('!', ..) => return Some((Token::Bang, start_pos)),

            ('|', '|') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Or, start_pos));
            }
            ('|', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::OrAssign, start_pos));
            }
            ('|', '>') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::Reserved(Box::new("|>".into())), start_pos));
            }
            ('|', ..) => return Some((Token::Pipe, start_pos)),

            ('&', '&') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::And, start_pos));
            }
            ('&', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::AndAssign, start_pos));
            }
            ('&', ..) => return Some((Token::Ampersand, start_pos)),

            ('^', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::XOrAssign, start_pos));
            }
            ('^', ..) => return Some((Token::XOr, start_pos)),

            ('~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)),

            ('%', '=') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::ModuloAssign, start_pos));
            }
            ('%', ..) => return Some((Token::Modulo, start_pos)),

            ('@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)),

            ('$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)),

            ('?', '.') => {
                eat_next_and_advance(stream, pos);
                return Some((
                    #[cfg(not(feature = "no_object"))]
                    Token::Elvis,
                    #[cfg(feature = "no_object")]
                    Token::Reserved(Box::new("?.".into())),
                    start_pos,
                ));
            }
            ('?', '?') => {
                eat_next_and_advance(stream, pos);
                return Some((Token::DoubleQuestion, start_pos));
            }
            ('?', '[') => {
                eat_next_and_advance(stream, pos);
                return Some((
                    #[cfg(not(feature = "no_index"))]
                    Token::QuestionBracket,
                    #[cfg(feature = "no_index")]
                    Token::Reserved(Box::new("?[".into())),
                    start_pos,
                ));
            }
            ('?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)),

            // letter or underscore ...
            _ if is_id_first_alphabetic(c) || c == '_' => {
                return Some(parse_identifier_token(stream, state, pos, start_pos, c));
            }

            _ if c.is_whitespace() => (),

            _ => {
                return Some((
                    Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
                    start_pos,
                ))
            }
        }
    }

    pos.advance();

    Some((Token::EOF, *pos))
}