in src/powerquery-parser/lexer/lexer.ts [512:665]
function tokenize(
maybeCancellationToken: ICancellationToken | undefined,
locale: string,
line: TLine,
lineNumber: number,
): TLine {
maybeCancellationToken?.throwIfCancelled();
switch (line.kind) {
// Cannot tokenize something that ended with an error,
// nothing has changed since the last tokenize.
// Update the line's text before trying again.
case LineKind.Error:
return line;
case LineKind.Touched:
// The line was already fully lexed once.
// Without any text changes it should throw eof to help diagnose
// why it's trying to retokenize.
return {
...line,
kind: LineKind.Error,
error: new LexError.LexError(new LexError.EndOfStreamError(locale)),
};
// Cannot tokenize something that previously ended with an error.
// Update the line's text before trying again.
case LineKind.TouchedWithError:
return {
kind: LineKind.Error,
text: line.text,
lineTerminator: line.lineTerminator,
lineModeStart: line.lineModeStart,
lineModeEnd: line.lineModeEnd,
tokens: line.tokens,
error: new LexError.LexError(new LexError.BadStateError(locale, line.error)),
};
case LineKind.Untouched:
break;
default:
throw Assert.isNever(line);
}
const untouchedLine: UntouchedLine = line;
const text: string = untouchedLine.text;
const textLength: number = text.length;
// If there's nothing to tokenize set lineModeEnd to lineModeStart.
if (textLength === 0) {
return {
kind: LineKind.Touched,
text: line.text,
lineTerminator: line.lineTerminator,
lineModeStart: line.lineModeStart,
lineModeEnd: line.lineModeStart,
tokens: [],
};
}
let lineMode: LineMode = line.lineModeStart;
let currentPosition: number = 0;
if (lineMode === LineMode.Default) {
currentPosition = drainWhitespace(text, currentPosition);
}
const newTokens: Token.LineToken[] = [];
let continueLexing: boolean = currentPosition !== text.length;
let maybeError: LexError.TLexError | undefined;
// While neither eof nor having encountered an error:
// * Lex according to lineModeStart, starting from currentPosition.
// * Update currentPosition and lineMode.
// * Drain whitespace.
while (continueLexing) {
maybeCancellationToken?.throwIfCancelled();
try {
let readOutcome: LineModeAlteringRead;
switch (lineMode) {
case LineMode.Comment:
readOutcome = tokenizeMultilineCommentContentOrEnd(line, currentPosition);
break;
case LineMode.Default:
readOutcome = tokenizeDefault(locale, line, lineNumber, currentPosition);
break;
case LineMode.QuotedIdentifier:
readOutcome = tokenizeQuotedIdentifierContentOrEnd(line, currentPosition);
break;
case LineMode.Text:
readOutcome = tokenizeTextLiteralContentOrEnd(line, currentPosition);
break;
default:
throw Assert.isNever(lineMode);
}
lineMode = readOutcome.lineMode;
const token: Token.LineToken = readOutcome.token;
newTokens.push(token);
if (lineMode === LineMode.Default) {
currentPosition = drainWhitespace(text, token.positionEnd);
} else {
currentPosition = token.positionEnd;
}
if (currentPosition === textLength) {
continueLexing = false;
}
} catch (e) {
let error: LexError.TLexError;
if (LexError.isTInnerLexError(e)) {
error = new LexError.LexError(e);
} else {
Assert.isInstanceofError(e);
error = CommonError.ensureCommonError(locale, e);
}
continueLexing = false;
maybeError = error;
}
}
let partialTokenizeResult: PartialResult<TokenizeChanges, TokenizeChanges, LexError.TLexError>;
if (maybeError) {
if (newTokens.length) {
partialTokenizeResult = PartialResultUtils.createMixed(
{
tokens: newTokens,
lineModeEnd: lineMode,
},
maybeError,
);
} else {
partialTokenizeResult = PartialResultUtils.createError(maybeError);
}
} else {
partialTokenizeResult = PartialResultUtils.createOk({
tokens: newTokens,
lineModeEnd: lineMode,
});
}
return updateLineState(line, partialTokenizeResult);
}