src/Bicep.Core/Parsing/Lexer.cs (855 lines of code) (raw):
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using Bicep.Core.Diagnostics;
using Bicep.Core.Extensions;
using Bicep.Core.Syntax;
using Bicep.Core.Text;
namespace Bicep.Core.Parsing
{
public class Lexer
{
// maps the escape character (that follows the backslash) to its value
private static readonly ImmutableSortedDictionary<char, char> SingleCharacterEscapes = new Dictionary<char, char>
{
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'\\', '\\'},
{'\'', '\''},
{'$', '$'}
}.ToImmutableSortedDictionary();
private static readonly ImmutableArray<string> CharacterEscapeSequences = SingleCharacterEscapes.Keys
.Select(c => $"\\{c}")
.Append("\\u{...}")
.ToImmutableArray();
private const int MultilineStringTerminatingQuoteCount = 3;
// the rules for parsing are slightly different if we are inside an interpolated string (for example, a new line should result in a lex error).
// to handle this, we use a modal lexing pattern with a stack to ensure we're applying the correct set of rules.
private readonly Stack<TokenType> templateStack = new();
private readonly IList<Token> tokens = new List<Token>();
private readonly IDiagnosticWriter diagnosticWriter;
private readonly SlidingTextWindow textWindow;
public Lexer(SlidingTextWindow textWindow, IDiagnosticWriter diagnosticWriter)
{
this.textWindow = textWindow;
this.diagnosticWriter = diagnosticWriter;
}
private void AddDiagnostic(TextSpan span, DiagnosticBuilder.DiagnosticBuilderDelegate diagnosticFunc)
{
var diagnostic = diagnosticFunc(DiagnosticBuilder.ForPosition(span));
this.diagnosticWriter.Write(diagnostic);
}
private void AddDiagnostic(DiagnosticBuilder.DiagnosticBuilderDelegate diagnosticFunc)
=> AddDiagnostic(textWindow.GetSpan(), diagnosticFunc);
public void Lex()
{
while (!textWindow.IsAtEnd())
{
LexToken();
}
// make sure we always include an end-of-line
if (!tokens.Any() || tokens.Last().Type != TokenType.EndOfFile)
{
LexToken();
}
}
public ImmutableArray<Token> GetTokens() => [.. tokens];
/// <summary>
/// Converts a set of string literal tokens into their raw values. Returns null if any of the tokens are of the wrong type or malformed.
/// </summary>
/// <param name="stringTokens">the string tokens</param>
public static IEnumerable<string>? TryGetRawStringSegments(IReadOnlyList<Token> stringTokens)
{
var segments = new string[stringTokens.Count];
for (var i = 0; i < stringTokens.Count; i++)
{
var nextSegment = Lexer.TryGetStringValue(stringTokens[i]);
if (nextSegment == null)
{
return null;
}
segments[i] = nextSegment;
}
return segments;
}
public static string? TryGetMultilineStringValue(Token stringToken)
{
var tokenText = stringToken.Text;
if (tokenText.Length < MultilineStringTerminatingQuoteCount * 2)
{
return null;
}
for (var i = 0; i < MultilineStringTerminatingQuoteCount; i++)
{
if (tokenText[i] != '\'')
{
return null;
}
}
for (var i = tokenText.Length - MultilineStringTerminatingQuoteCount; i < tokenText.Length; i++)
{
if (tokenText[i] != '\'')
{
return null;
}
}
var startOffset = MultilineStringTerminatingQuoteCount;
// we strip a leading \r\n or \n
if (tokenText[startOffset] == '\r')
{
startOffset++;
}
if (tokenText[startOffset] == '\n')
{
startOffset++;
}
return tokenText.Substring(startOffset, tokenText.Length - startOffset - MultilineStringTerminatingQuoteCount);
}
/// <summary>
/// Converts string literal text into its value. Returns null if the specified string token is malformed due to lexer error recovery.
/// </summary>
/// <param name="stringToken">the string token</param>
public static string? TryGetStringValue(Token stringToken)
{
var (start, end) = stringToken.Type switch
{
TokenType.StringComplete => (LanguageConstants.StringDelimiter, LanguageConstants.StringDelimiter),
TokenType.StringLeftPiece => (LanguageConstants.StringDelimiter, LanguageConstants.StringHoleOpen),
TokenType.StringMiddlePiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringHoleOpen),
TokenType.StringRightPiece => (LanguageConstants.StringHoleClose, LanguageConstants.StringDelimiter),
_ => (null, null),
};
if (start == null || end == null)
{
return null;
}
if (stringToken.Text.Length < start.Length + end.Length ||
stringToken.Text.Substring(0, start.Length) != start ||
stringToken.Text.Substring(stringToken.Text.Length - end.Length) != end)
{
// any lexer-generated token should not hit this problem as the start & end are already verified
return null;
}
var contents = stringToken.Text.Substring(start.Length, stringToken.Text.Length - start.Length - end.Length);
var window = new SlidingTextWindow(contents);
// the value of the string will be shorter because escapes are longer than the characters they represent
var buffer = new StringBuilder(contents.Length);
while (!window.IsAtEnd())
{
var nextChar = window.Next();
if (nextChar == '\'')
{
return null;
}
if (nextChar == '\\')
{
// escape sequence begins
if (window.IsAtEnd())
{
return null;
}
char escapeChar = window.Next();
if (escapeChar == 'u')
{
// unicode escape
char openCurly = window.Next();
if (openCurly != '{')
{
return null;
}
var codePointText = ScanHexNumber(window);
if (!TryParseCodePoint(codePointText, out uint codePoint))
{
// invalid codepoint
return null;
}
char closeCurly = window.Next();
if (closeCurly != '}')
{
return null;
}
char charOrHighSurrogate = CodepointToString(codePoint, out char lowSurrogate);
buffer.Append(charOrHighSurrogate);
if (lowSurrogate != SlidingTextWindow.InvalidCharacter)
{
// previous char was a high surrogate
// also append the low surrogate
buffer.Append(lowSurrogate);
}
continue;
}
if (SingleCharacterEscapes.TryGetValue(escapeChar, out char escapeCharValue) == false)
{
// invalid escape character
return null;
}
buffer.Append(escapeCharValue);
// continue to next iteration
continue;
}
// regular string char - append to buffer
buffer.Append(nextChar);
}
return buffer.ToString();
}
private static bool TryParseCodePoint(string text, out uint codePoint) => uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint) && codePoint <= 0x10FFFF;
/// <summary>
/// Determines if the specified string is a valid identifier. To be considered a valid identifier, the string must start
/// with the identifier start character and remaining characters must be identifier continuation characters.
/// </summary>
/// <param name="value">The value</param>
public static bool IsValidIdentifier(string value)
{
if (value.Length <= 0)
{
return false;
}
var result = IsIdentifierStart(value[0]);
var index = 1;
while (result && index < value.Length)
{
result = result && IsIdentifierContinuation(value[index]);
index++;
}
return result;
}
private static char CodepointToString(uint codePoint, out char lowSurrogate)
{
if (codePoint < 0x00010000)
{
lowSurrogate = SlidingTextWindow.InvalidCharacter;
return (char)codePoint;
}
Debug.Assert(codePoint > 0x0000FFFF && codePoint <= 0x0010FFFF);
lowSurrogate = (char)((codePoint - 0x00010000) % 0x0400 + 0xDC00);
return (char)((codePoint - 0x00010000) / 0x0400 + 0xD800);
}
private IEnumerable<SyntaxTrivia> ScanTrailingTrivia(bool includeComments)
{
while (true)
{
var next = textWindow.Peek();
if (IsWhiteSpace(next))
{
yield return ScanWhitespace();
}
else if (includeComments && next == '/')
{
var nextNext = textWindow.Peek(1);
if (nextNext == '/')
{
yield return ScanSingleLineComment();
}
else if (nextNext == '*')
{
yield return ScanMultiLineComment();
}
else
{
yield break;
}
}
else
{
yield break;
}
}
}
private IEnumerable<SyntaxTrivia> ScanLeadingTrivia()
{
SyntaxTrivia? current = null;
while (true)
{
if (IsWhiteSpace(textWindow.Peek()))
{
current = ScanWhitespace();
}
else if (textWindow.Peek() == '/' && textWindow.Peek(1) == '/')
{
current = ScanSingleLineComment();
}
else if (textWindow.Peek() == '/' && textWindow.Peek(1) == '*')
{
current = ScanMultiLineComment();
}
else if (
(current is null || !current.IsComment()) &&
textWindow.Peek() == '#' &&
CheckAdjacentText(LanguageConstants.DisableNextLineDiagnosticsKeyword) &&
string.IsNullOrWhiteSpace(textWindow.GetTextBetweenLineStartAndCurrentPosition()))
{
current = ScanDisableNextLineDiagnosticsDirective();
}
else
{
yield break;
}
yield return current;
}
}
private SyntaxTrivia ScanDisableNextLineDiagnosticsDirective()
{
textWindow.Reset();
textWindow.Advance(LanguageConstants.DisableNextLineDiagnosticsKeyword.Length + 1); // Length of disable next statement plus #
var span = textWindow.GetSpan();
int start = span.Position;
int end = span.GetEndPosition();
var sb = new StringBuilder();
sb.Append(textWindow.GetText());
textWindow.Reset();
List<Token> codes = new();
while (!textWindow.IsAtEnd())
{
var nextChar = textWindow.Peek();
if (IsNewLine(nextChar))
{
break;
}
else if (IsIdentifierContinuation(nextChar) || nextChar == '-')
{
switch (textWindow.Peek(1))
{
case ' ':
case '\t':
case '\n':
case '\r':
case char.MaxValue:
textWindow.Advance();
if (GetToken() is { } token)
{
codes.Add(token);
end += token.Span.Length;
sb.Append(token.Text);
continue;
}
break;
default:
textWindow.Advance();
break;
}
}
else if (nextChar == ' ' || nextChar == '\t')
{
textWindow.Advance();
sb.Append(nextChar);
end++;
textWindow.Reset();
}
else
{
// Handle scenario where nextChar is not one of the following: identifier, '-', space, tab
// Eg: '|' in #disable-next-line BCP037|
if (GetToken() is { } token)
{
codes.Add(token);
end += token.Span.Length;
sb.Append(token.Text);
}
break;
}
}
if (codes.Count == 0)
{
AddDiagnostic(b => b.MissingDiagnosticCodes());
}
return GetDisableNextLineDiagnosticsSyntaxTrivia(codes, start, end, sb.ToString());
}
private bool CheckAdjacentText(string text)
{
for (int i = 0; i < text.Length; i++)
{
if (textWindow.Peek(i + 1) == text[i])
{
continue;
}
else
{
return false;
}
}
return true;
}
private DisableNextLineDiagnosticsSyntaxTrivia GetDisableNextLineDiagnosticsSyntaxTrivia(List<Token> codes, int start, int end, string text)
{
if (codes.Any())
{
var lastCodeSpan = codes.Last().Span;
var lastCodeSpanEnd = lastCodeSpan.GetEndPosition();
// There could be whitespace following #disable-next-line directive, in which case we need to adjust the span and text.
// E.g. #disable-next-line BCP226 // test
if (end > lastCodeSpanEnd)
{
var delta = end - lastCodeSpanEnd;
textWindow.Rewind(delta);
return new DisableNextLineDiagnosticsSyntaxTrivia(SyntaxTriviaType.DisableNextLineDiagnosticsDirective, new TextSpan(start, lastCodeSpanEnd - start), text[0..^delta], codes);
}
}
return new DisableNextLineDiagnosticsSyntaxTrivia(SyntaxTriviaType.DisableNextLineDiagnosticsDirective, new TextSpan(start, end - start), text, codes);
}
private Token? GetToken()
{
var text = textWindow.GetText();
if (text.Length > 0)
{
return new FreeformToken(TokenType.StringComplete, textWindow.GetSpan(), text.ToString(), [], []);
}
return null;
}
private void LexToken()
{
textWindow.Reset();
// important to force enum evaluation here via .ToImmutableArray()!
var leadingTrivia = ScanLeadingTrivia().ToImmutableArray();
textWindow.Reset();
var tokenType = ScanToken();
var tokenText = textWindow.GetText();
var tokenSpan = textWindow.GetSpan();
if (tokenType == TokenType.Unrecognized)
{
var text = tokenText.ToString();
if (text == "\"")
{
AddDiagnostic(b => b.DoubleQuoteToken(text));
}
else
{
AddDiagnostic(b => b.UnrecognizedToken(text));
}
}
textWindow.Reset();
// important to force enum evaluation here via .ToImmutableArray()!
var includeComments = SyntaxFacts.GetCommentStickiness(tokenType) >= CommentStickiness.Trailing;
var trailingTrivia = ScanTrailingTrivia(includeComments).ToImmutableArray();
var token = SyntaxFacts.HasFreeFromText(tokenType)
? new FreeformToken(tokenType, tokenSpan, tokenText.ToString(), leadingTrivia, trailingTrivia)
: new Token(tokenType, tokenSpan, leadingTrivia, trailingTrivia);
this.tokens.Add(token);
}
private SyntaxTrivia ScanWhitespace()
{
textWindow.Reset();
while (!textWindow.IsAtEnd())
{
var nextChar = textWindow.Peek();
switch (nextChar)
{
case ' ':
case '\t':
textWindow.Advance();
continue;
}
break;
}
return new SyntaxTrivia(SyntaxTriviaType.Whitespace, textWindow.GetSpan(), textWindow.GetText().ToString());
}
private SyntaxTrivia ScanSingleLineComment()
{
textWindow.Reset();
textWindow.Advance(2);
while (!textWindow.IsAtEnd())
{
var nextChar = textWindow.Peek();
// make sure we don't include the newline in the comment trivia
if (IsNewLine(nextChar))
{
break;
}
textWindow.Advance();
}
return new SyntaxTrivia(SyntaxTriviaType.SingleLineComment, textWindow.GetSpan(), textWindow.GetText().ToString());
}
private SyntaxTrivia ScanMultiLineComment()
{
textWindow.Reset();
textWindow.Advance(2);
while (true)
{
if (textWindow.IsAtEnd())
{
AddDiagnostic(b => b.UnterminatedMultilineComment());
break;
}
var nextChar = textWindow.Peek();
textWindow.Advance();
if (nextChar != '*' || textWindow.Peek() != '/')
{
continue;
}
if (textWindow.IsAtEnd())
{
AddDiagnostic(b => b.UnterminatedMultilineComment());
break;
}
nextChar = textWindow.Peek();
textWindow.Advance();
if (nextChar == '/')
{
break;
}
}
return new SyntaxTrivia(SyntaxTriviaType.MultiLineComment, textWindow.GetSpan(), textWindow.GetText().ToString());
}
private void ScanNewLine()
{
while (true)
{
if (textWindow.IsAtEnd())
{
return;
}
var nextChar = textWindow.Peek();
if (IsNewLine(nextChar) == false)
{
return;
}
textWindow.Advance();
}
}
private TokenType ScanMultilineString()
{
var successiveQuotes = 0;
// we've already scanned the "'''", so get straight to scanning the string contents.
while (!textWindow.IsAtEnd())
{
var nextChar = textWindow.Peek();
textWindow.Advance();
switch (nextChar)
{
case '\'':
successiveQuotes++;
if (successiveQuotes == MultilineStringTerminatingQuoteCount)
{
// we've scanned the terminating "'''". Keep scanning as long as we find more "'" characters;
// it is possible for the verbatim string's last character to be "'", and we should accept this.
while (textWindow.Peek() == '\'')
{
textWindow.Advance();
}
return TokenType.MultilineString;
}
break;
default:
successiveQuotes = 0;
break;
}
}
// We've reached the end of the file without finding terminating quotes.
// We still want to return a string token so that highlighting shows up.
AddDiagnostic(b => b.UnterminatedMultilineString());
return TokenType.MultilineString;
}
private TokenType ScanStringSegment(bool isAtStartOfString)
{
// to handle interpolation, strings are broken down into multiple segments, to detect the portions of string between the 'holes'.
// 'complete' string: a string with no holes (no interpolation), e.g. "'hello'"
// string 'left piece': the portion of an interpolated string up to the first hole, e.g. "'hello$"
// string 'middle piece': the portion of an interpolated string between two holes, e.g. "}hello${"
// string 'right piece': the portion of an interpolated string after the last hole, e.g. "}hello'"
while (true)
{
if (textWindow.IsAtEnd())
{
AddDiagnostic(b => b.UnterminatedString());
return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
}
var nextChar = textWindow.Peek();
if (IsNewLine(nextChar))
{
// do not consume the new line character
AddDiagnostic(b => b.UnterminatedStringWithNewLine());
return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
}
int escapeBeginPosition = textWindow.GetAbsolutePosition();
textWindow.Advance();
if (nextChar == '\'')
{
return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
}
if (nextChar == '$' && !textWindow.IsAtEnd() && textWindow.Peek() == '{')
{
textWindow.Advance();
return isAtStartOfString ? TokenType.StringLeftPiece : TokenType.StringMiddlePiece;
}
if (nextChar != '\\')
{
continue;
}
// an escape sequence was started with \
if (textWindow.IsAtEnd())
{
// the escape was unterminated
AddDiagnostic(b => b.UnterminatedStringEscapeSequenceAtEof());
return isAtStartOfString ? TokenType.StringComplete : TokenType.StringRightPiece;
}
// the escape sequence has a char after the \
// consume it
nextChar = textWindow.Peek();
textWindow.Advance();
if (nextChar == 'u')
{
// unicode escape
if (textWindow.IsAtEnd())
{
// string was prematurely terminated
// reusing the first check in the loop body to produce the diagnostic
continue;
}
nextChar = textWindow.Peek();
if (nextChar != '{')
{
// \u must be followed by {, but it's not
AddDiagnostic(textWindow.GetSpanFromPosition(escapeBeginPosition), b => b.InvalidUnicodeEscape());
continue;
}
textWindow.Advance();
if (textWindow.IsAtEnd())
{
// string was prematurely terminated
// reusing the first check in the loop body to produce the diagnostic
continue;
}
string codePointText = ScanHexNumber(textWindow);
if (textWindow.IsAtEnd())
{
// string was prematurely terminated
// reusing the first check in the loop body to produce the diagnostic
continue;
}
if (string.IsNullOrEmpty(codePointText))
{
// we didn't get any hex digits
AddDiagnostic(textWindow.GetSpanFromPosition(escapeBeginPosition), b => b.InvalidUnicodeEscape());
continue;
}
nextChar = textWindow.Peek();
if (nextChar != '}')
{
// hex digits must be followed by }, but it's not
AddDiagnostic(textWindow.GetSpanFromPosition(escapeBeginPosition), b => b.InvalidUnicodeEscape());
continue;
}
textWindow.Advance();
if (!TryParseCodePoint(codePointText, out _))
{
// code point is not actually valid
AddDiagnostic(textWindow.GetSpanFromPosition(escapeBeginPosition), b => b.InvalidUnicodeEscape());
continue;
}
}
else
{
// not a unicode escape
if (SingleCharacterEscapes.ContainsKey(nextChar) == false)
{
// the span of the error is the incorrect escape sequence
AddDiagnostic(textWindow.GetLookbehindSpan(2), b => b.UnterminatedStringEscapeSequenceUnrecognized(CharacterEscapeSequences));
}
}
}
}
private static string ScanHexNumber(SlidingTextWindow window)
{
var buffer = new StringBuilder();
while (true)
{
if (window.IsAtEnd())
{
return buffer.ToString();
}
char current = window.Peek();
if (!IsHexDigit(current))
{
return buffer.ToString();
}
buffer.Append(current);
window.Advance();
}
}
private void ScanNumber()
{
while (true)
{
if (textWindow.IsAtEnd())
{
return;
}
if (!IsDigit(textWindow.Peek()))
{
return;
}
textWindow.Advance();
}
}
private TokenType GetIdentifierTokenType()
{
var identifier = textWindow.GetText().ToString();
if (LanguageConstants.NonContextualKeywords.TryGetValue(identifier, out var tokenType))
{
return tokenType;
}
if (identifier.Length > LanguageConstants.MaxIdentifierLength)
{
this.AddDiagnostic(b => b.IdentifierNameExceedsLimit());
}
return TokenType.Identifier;
}
private TokenType ScanIdentifier()
{
while (true)
{
if (textWindow.IsAtEnd())
{
return GetIdentifierTokenType();
}
if (!IsIdentifierContinuation(textWindow.Peek()))
{
return GetIdentifierTokenType();
}
textWindow.Advance();
}
}
private TokenType ScanToken()
{
if (textWindow.IsAtEnd())
{
return TokenType.EndOfFile;
}
var nextChar = textWindow.Peek();
textWindow.Advance();
switch (nextChar)
{
case '{':
if (templateStack.Any())
{
// if we're inside a string interpolation hole, and we find an object open brace,
// push it to the stack, so that we can match it up against an object close brace.
// this allows us to determine whether we're terminating an object or closing an interpolation hole.
templateStack.Push(TokenType.LeftBrace);
}
return TokenType.LeftBrace;
case '}':
if (templateStack.Any())
{
var prevTemplateToken = templateStack.Peek();
if (prevTemplateToken != TokenType.LeftBrace)
{
var stringToken = ScanStringSegment(false);
if (stringToken == TokenType.StringRightPiece)
{
templateStack.Pop();
}
return stringToken;
}
}
return TokenType.RightBrace;
case '(':
return TokenType.LeftParen;
case ')':
return TokenType.RightParen;
case '[':
return TokenType.LeftSquare;
case ']':
return TokenType.RightSquare;
case '@':
return TokenType.At;
case ',':
return TokenType.Comma;
case '.':
switch (textWindow.Peek(), textWindow.Peek(1))
{
case ('.', '.'):
textWindow.Advance(2);
return TokenType.Ellipsis;
}
return TokenType.Dot;
case '?':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '?':
textWindow.Advance();
return TokenType.DoubleQuestion;
}
}
return TokenType.Question;
case ':':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case ':':
textWindow.Advance();
return TokenType.DoubleColon;
}
}
return TokenType.Colon;
case ';':
return TokenType.Semicolon;
case '+':
return TokenType.Plus;
case '-':
return TokenType.Minus;
case '%':
return TokenType.Modulo;
case '*':
return TokenType.Asterisk;
case '/':
return TokenType.Slash;
case '^':
return TokenType.Hat;
case '!':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '=':
textWindow.Advance();
return TokenType.NotEquals;
case '~':
textWindow.Advance();
return TokenType.NotEqualsInsensitive;
}
}
return TokenType.Exclamation;
case '<':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '=':
textWindow.Advance();
return TokenType.LessThanOrEqual;
}
}
return TokenType.LeftChevron;
case '>':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '=':
textWindow.Advance();
return TokenType.GreaterThanOrEqual;
}
}
return TokenType.RightChevron;
case '=':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '=':
textWindow.Advance();
return TokenType.Equals;
case '~':
textWindow.Advance();
return TokenType.EqualsInsensitive;
case '>':
textWindow.Advance();
return TokenType.Arrow;
}
}
return TokenType.Assignment;
case '&':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '&':
textWindow.Advance();
return TokenType.LogicalAnd;
}
}
return TokenType.Unrecognized;
case '|':
if (!textWindow.IsAtEnd())
{
switch (textWindow.Peek())
{
case '|':
textWindow.Advance();
return TokenType.LogicalOr;
}
}
return TokenType.Pipe;
case '\'':
// "'''" means we're starting a multiline string.
if (textWindow.Peek(0) == '\'' && textWindow.Peek(1) == '\'')
{
textWindow.Advance(2);
return ScanMultilineString();
}
var token = ScanStringSegment(true);
if (token == TokenType.StringLeftPiece)
{
// if we're beginning a string interpolation statement, we need to keep track of it
templateStack.Push(token);
}
return token;
case '\n':
case '\r':
if (templateStack.Any())
{
// need to re-check the newline token on next pass
textWindow.Rewind();
// do not consume the new line character
// TODO: figure out a way to avoid returning this multiple times for nested interpolation
AddDiagnostic(b => b.UnterminatedStringWithNewLine());
templateStack.Clear();
return TokenType.StringRightPiece;
}
this.ScanNewLine();
return TokenType.NewLine;
default:
if (IsDigit(nextChar))
{
this.ScanNumber();
return TokenType.Integer;
}
if (IsIdentifierStart(nextChar))
{
return this.ScanIdentifier();
}
return TokenType.Unrecognized;
}
}
// obtaining the unicode category is expensive and should be avoided in the main cases
private static bool IsIdentifierStart(char c) => c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_';
// obtaining the unicode category is expensive and should be avoided in the main cases
private static bool IsIdentifierContinuation(char c) => IsIdentifierStart(c) || IsDigit(c);
private static bool IsDigit(char c) => c >= '0' && c <= '9';
private static bool IsHexDigit(char c) => IsDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F';
public static bool IsWhiteSpace(char c) => c == ' ' || c == '\t';
private static bool IsNewLine(char c) => c == '\n' || c == '\r';
}
}