in rhai/src/tokenizer.rs [1536:2243]
fn get_next_token_inner(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
) -> Option<(Token, Position)> {
state.last_token.as_mut().map(SmartString::clear);
// Still inside a comment?
if state.comment_level > 0 {
let start_pos = *pos;
let mut comment = state.include_comments.then(|| String::new());
state.comment_level =
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
let return_comment = state.include_comments;
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
if return_comment {
return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
}
if state.comment_level > 0 {
// Reached EOF without ending comment block
return None;
}
}
// Within text?
if let Some(ch) = state.is_within_text_terminated_by.take() {
return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, interpolated, start_pos)| {
if interpolated {
Some((Token::InterpolatedString(result.into()), start_pos))
} else {
Some((Token::StringConstant(result.into()), start_pos))
}
},
);
}
let mut negated: Option<Position> = None;
while let Some(c) = stream.get_next() {
pos.advance();
let start_pos = *pos;
let cc = stream.peek_next().unwrap_or('\0');
// Identifiers and strings that can have non-ASCII characters
match (c, cc) {
// \n
('\n', ..) => pos.new_line(),
// digit ...
('0'..='9', ..) => {
let mut result = SmartString::new_const();
let mut radix_base: Option<u32> = None;
let mut valid: fn(char) -> bool = is_numeric_digit;
result.push(c);
while let Some(next_char) = stream.peek_next() {
match next_char {
NUMBER_SEPARATOR => {
eat_next_and_advance(stream, pos);
}
ch if valid(ch) => {
result.push(next_char);
eat_next_and_advance(stream, pos);
}
#[cfg(any(not(feature = "no_float"), feature = "decimal"))]
'.' => {
stream.get_next().unwrap();
// Check if followed by digits or something that cannot start a property name
match stream.peek_next().unwrap_or('\0') {
// digits after period - accept the period
'0'..='9' => {
result.push(next_char);
pos.advance();
}
// _ - cannot follow a decimal point
NUMBER_SEPARATOR => {
stream.unget(next_char);
break;
}
// .. - reserved symbol, not a floating-point number
'.' => {
stream.unget(next_char);
break;
}
// symbol after period - probably a float
ch if !is_id_first_alphabetic(ch) => {
result.push(next_char);
pos.advance();
result.push('0');
}
// Not a floating-point number
_ => {
stream.unget(next_char);
break;
}
}
}
#[cfg(not(feature = "no_float"))]
'e' => {
stream.get_next().expect("`e`");
// Check if followed by digits or +/-
match stream.peek_next().unwrap_or('\0') {
// digits after e - accept the e
'0'..='9' => {
result.push(next_char);
pos.advance();
}
// +/- after e - accept the e and the sign
'+' | '-' => {
result.push(next_char);
pos.advance();
result.push(stream.get_next().unwrap());
pos.advance();
}
// Not a floating-point number
_ => {
stream.unget(next_char);
break;
}
}
}
// 0x????, 0o????, 0b???? at beginning
ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
if c == '0' && result.len() <= 1 =>
{
result.push(next_char);
eat_next_and_advance(stream, pos);
valid = match ch {
'x' | 'X' => is_hex_digit,
'o' | 'O' => is_numeric_digit,
'b' | 'B' => is_numeric_digit,
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
};
radix_base = Some(match ch {
'x' | 'X' => 16,
'o' | 'O' => 8,
'b' | 'B' => 2,
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
});
}
_ => break,
}
}
let num_pos = negated.map_or(start_pos, |negated_pos| {
result.insert(0, '-');
negated_pos
});
if let Some(ref mut last) = state.last_token {
*last = result.clone();
}
// Parse number
let token = radix_base.map_or_else(
|| {
let num = INT::from_str(&result).map(Token::IntegerConstant);
// If integer parsing is unnecessary, try float instead
#[cfg(not(feature = "no_float"))]
let num = num.or_else(|_| {
crate::types::FloatWrapper::from_str(&result).map(Token::FloatConstant)
});
// Then try decimal
#[cfg(feature = "decimal")]
let num = num.or_else(|_| {
rust_decimal::Decimal::from_str(&result)
.map(Box::new)
.map(Token::DecimalConstant)
});
// Then try decimal in scientific notation
#[cfg(feature = "decimal")]
let num = num.or_else(|_| {
rust_decimal::Decimal::from_scientific(&result)
.map(Box::new)
.map(Token::DecimalConstant)
});
num.unwrap_or_else(|_| {
Token::LexError(LERR::MalformedNumber(result.to_string()).into())
})
},
|radix| {
let result = &result[2..];
UNSIGNED_INT::from_str_radix(result, radix)
.map(|v| v as INT)
.map_or_else(
|_| {
Token::LexError(
LERR::MalformedNumber(result.to_string()).into(),
)
},
Token::IntegerConstant,
)
},
);
return Some((token, num_pos));
}
// " - string literal
('"', ..) => {
return parse_string_literal(stream, state, pos, c, false, true, false)
.map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, ..)| Some((Token::StringConstant(result.into()), start_pos)),
);
}
// ` - string literal
('`', ..) => {
// Start from the next line if at the end of line
match stream.peek_next() {
// `\r - start from next line
Some('\r') => {
eat_next_and_advance(stream, pos);
// `\r\n
if stream.peek_next() == Some('\n') {
eat_next_and_advance(stream, pos);
}
pos.new_line();
}
// `\n - start from next line
Some('\n') => {
eat_next_and_advance(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, interpolated, ..)| {
if interpolated {
Some((Token::InterpolatedString(result.into()), start_pos))
} else {
Some((Token::StringConstant(result.into()), start_pos))
}
},
);
}
// ' - character literal
('\'', '\'') => {
return Some((
Token::LexError(LERR::MalformedChar(String::new()).into()),
start_pos,
))
}
('\'', ..) => {
return Some(
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|(err, err_pos)| (Token::LexError(err.into()), err_pos),
|(result, ..)| {
let mut chars = result.chars();
let first = chars.next().unwrap();
if chars.next().is_some() {
(
Token::LexError(LERR::MalformedChar(result.to_string()).into()),
start_pos,
)
} else {
(Token::CharConstant(first), start_pos)
}
},
),
)
}
// Braces
('{', ..) => return Some((Token::LeftBrace, start_pos)),
('}', ..) => return Some((Token::RightBrace, start_pos)),
// Unit
('(', ')') => {
eat_next_and_advance(stream, pos);
return Some((Token::Unit, start_pos));
}
// Parentheses
('(', '*') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("(*".into())), start_pos));
}
('(', ..) => return Some((Token::LeftParen, start_pos)),
(')', ..) => return Some((Token::RightParen, start_pos)),
// Indexing
('[', ..) => return Some((Token::LeftBracket, start_pos)),
(']', ..) => return Some((Token::RightBracket, start_pos)),
// Map literal
#[cfg(not(feature = "no_object"))]
('#', '{') => {
eat_next_and_advance(stream, pos);
return Some((Token::MapStart, start_pos));
}
// Shebang
('#', '!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)),
('#', ' ') => {
eat_next_and_advance(stream, pos);
let token = if stream.peek_next() == Some('{') {
eat_next_and_advance(stream, pos);
"# {"
} else {
"#"
};
return Some((Token::Reserved(Box::new(token.into())), start_pos));
}
('#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)),
// Operators
('+', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::PlusAssign, start_pos));
}
('+', '+') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("++".into())), start_pos));
}
('+', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryPlus, start_pos))
}
('+', ..) => return Some((Token::Plus, start_pos)),
('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
('-', '0'..='9') => return Some((Token::Minus, start_pos)),
('-', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::MinusAssign, start_pos));
}
('-', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("->".into())), start_pos));
}
('-', '-') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("--".into())), start_pos));
}
('-', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryMinus, start_pos))
}
('-', ..) => return Some((Token::Minus, start_pos)),
('*', ')') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("*)".into())), start_pos));
}
('*', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::MultiplyAssign, start_pos));
}
('*', '*') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::PowerOfAssign
} else {
Token::PowerOf
},
start_pos,
));
}
('*', ..) => return Some((Token::Multiply, start_pos)),
// Comments
('/', '/') => {
eat_next_and_advance(stream, pos);
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('/') => {
eat_next_and_advance(stream, pos);
// Long streams of `///...` are not doc-comments
match stream.peek_next() {
Some('/') => None,
_ => Some("///".into()),
}
}
#[cfg(feature = "metadata")]
Some('!') => {
eat_next_and_advance(stream, pos);
Some("//!".into())
}
_ if state.include_comments => Some("//".into()),
_ => None,
};
while let Some(c) = stream.get_next() {
if c == '\r' {
// \r\n
if stream.peek_next() == Some('\n') {
eat_next_and_advance(stream, pos);
}
pos.new_line();
break;
}
if c == '\n' {
pos.new_line();
break;
}
if let Some(comment) = comment.as_mut() {
comment.push(c);
}
pos.advance();
}
if let Some(comment) = comment {
match comment {
#[cfg(feature = "metadata")]
_ if comment.starts_with("//!") => {
let g = &mut state.tokenizer_control.borrow_mut().global_comments;
if !g.is_empty() {
g.push('\n');
}
g.push_str(&comment);
}
_ => return Some((Token::Comment(comment.into()), start_pos)),
}
}
}
('/', '*') => {
state.comment_level = 1;
eat_next_and_advance(stream, pos);
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('*') => {
eat_next_and_advance(stream, pos);
// Long streams of `/****...` are not doc-comments
match stream.peek_next() {
Some('*') => None,
_ => Some("/**".into()),
}
}
_ if state.include_comments => Some("/*".into()),
_ => None,
};
state.comment_level =
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
if let Some(comment) = comment {
return Some((Token::Comment(comment.into()), start_pos));
}
}
('/', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::DivideAssign, start_pos));
}
('/', ..) => return Some((Token::Divide, start_pos)),
(';', ..) => return Some((Token::SemiColon, start_pos)),
(',', ..) => return Some((Token::Comma, start_pos)),
('.', '.') => {
eat_next_and_advance(stream, pos);
return Some((
match stream.peek_next() {
Some('.') => {
eat_next_and_advance(stream, pos);
Token::Reserved(Box::new("...".into()))
}
Some('=') => {
eat_next_and_advance(stream, pos);
Token::InclusiveRange
}
_ => Token::ExclusiveRange,
},
start_pos,
));
}
('.', ..) => return Some((Token::Period, start_pos)),
('=', '=') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("===".into())), start_pos));
}
return Some((Token::EqualsTo, start_pos));
}
('=', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::DoubleArrow, start_pos));
}
('=', ..) => return Some((Token::Equals, start_pos)),
#[cfg(not(feature = "no_module"))]
(':', ':') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('<') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("::<".into())), start_pos));
}
return Some((Token::DoubleColon, start_pos));
}
(':', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new(":=".into())), start_pos));
}
(':', ';') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new(":;".into())), start_pos));
}
(':', ..) => return Some((Token::Colon, start_pos)),
('<', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::LessThanEqualsTo, start_pos));
}
('<', '-') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("<-".into())), start_pos));
}
('<', '<') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::LeftShiftAssign
} else {
Token::LeftShift
},
start_pos,
));
}
('<', '|') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("<|".into())), start_pos));
}
('<', ..) => return Some((Token::LessThan, start_pos)),
('>', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::GreaterThanEqualsTo, start_pos));
}
('>', '>') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::RightShiftAssign
} else {
Token::RightShift
},
start_pos,
));
}
('>', ..) => return Some((Token::GreaterThan, start_pos)),
('!', 'i') => {
stream.get_next().unwrap();
if stream.peek_next() == Some('n') {
stream.get_next().unwrap();
match stream.peek_next() {
Some(c) if is_id_continue(c) => {
stream.unget('n');
stream.unget('i');
return Some((Token::Bang, start_pos));
}
_ => {
pos.advance();
pos.advance();
return Some((Token::NotIn, start_pos));
}
}
}
stream.unget('i');
return Some((Token::Bang, start_pos));
}
('!', '=') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("!==".into())), start_pos));
}
return Some((Token::NotEqualsTo, start_pos));
}
('!', '.') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("!.".into())), start_pos));
}
('!', ..) => return Some((Token::Bang, start_pos)),
('|', '|') => {
eat_next_and_advance(stream, pos);
return Some((Token::Or, start_pos));
}
('|', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::OrAssign, start_pos));
}
('|', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("|>".into())), start_pos));
}
('|', ..) => return Some((Token::Pipe, start_pos)),
('&', '&') => {
eat_next_and_advance(stream, pos);
return Some((Token::And, start_pos));
}
('&', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::AndAssign, start_pos));
}
('&', ..) => return Some((Token::Ampersand, start_pos)),
('^', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::XOrAssign, start_pos));
}
('^', ..) => return Some((Token::XOr, start_pos)),
('~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)),
('%', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::ModuloAssign, start_pos));
}
('%', ..) => return Some((Token::Modulo, start_pos)),
('@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)),
('$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)),
('?', '.') => {
eat_next_and_advance(stream, pos);
return Some((
#[cfg(not(feature = "no_object"))]
Token::Elvis,
#[cfg(feature = "no_object")]
Token::Reserved(Box::new("?.".into())),
start_pos,
));
}
('?', '?') => {
eat_next_and_advance(stream, pos);
return Some((Token::DoubleQuestion, start_pos));
}
('?', '[') => {
eat_next_and_advance(stream, pos);
return Some((
#[cfg(not(feature = "no_index"))]
Token::QuestionBracket,
#[cfg(feature = "no_index")]
Token::Reserved(Box::new("?[".into())),
start_pos,
));
}
('?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)),
// letter or underscore ...
_ if is_id_first_alphabetic(c) || c == '_' => {
return Some(parse_identifier_token(stream, state, pos, start_pos, c));
}
_ if c.is_whitespace() => (),
_ => {
return Some((
Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
start_pos,
))
}
}
}
pos.advance();
Some((Token::EOF, *pos))
}