in src/vs/editor/standalone/common/monarch/monarchLexer.ts [558:858]
private _myTokenize(lineWithoutLF: string, hasEOL: boolean, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState {
tokensCollector.enterLanguage(this._languageId);
const lineWithoutLFLength = lineWithoutLF.length;
const line = (hasEOL && this._lexer.includeLF ? lineWithoutLF + '\n' : lineWithoutLF);
const lineLength = line.length;
let embeddedLanguageData = lineState.embeddedLanguageData;
let stack = lineState.stack;
let pos = 0;
// regular expression group matching
// these never need cloning or equality since they are only used within a line match
interface GroupMatching {
matches: string[];
rule: monarchCommon.IRule | null;
groups: { action: monarchCommon.FuzzyAction; matched: string; }[];
}
let groupMatching: GroupMatching | null = null;
// See https://github.com/microsoft/monaco-editor/issues/1235
// Evaluate rules at least once for an empty line
let forceEvaluation = true;
while (forceEvaluation || pos < lineLength) {
const pos0 = pos;
const stackLen0 = stack.depth;
const groupLen0 = groupMatching ? groupMatching.groups.length : 0;
const state = stack.state;
let matches: string[] | null = null;
let matched: string | null = null;
let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
let rule: monarchCommon.IRule | null = null;
let enteringEmbeddedLanguage: string | null = null;
// check if we need to process group matches first
if (groupMatching) {
matches = groupMatching.matches;
const groupEntry = groupMatching.groups.shift()!;
matched = groupEntry.matched;
action = groupEntry.action;
rule = groupMatching.rule;
// cleanup if necessary
if (groupMatching.groups.length === 0) {
groupMatching = null;
}
} else {
// otherwise we match on the token stream
if (!forceEvaluation && pos >= lineLength) {
// nothing to do
break;
}
forceEvaluation = false;
// get the rules for this state
let rules: monarchCommon.IRule[] | null = this._lexer.tokenizer[state];
if (!rules) {
rules = monarchCommon.findRules(this._lexer, state); // do parent matching
if (!rules) {
throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state);
}
}
// try each rule until we match
let restOfLine = line.substr(pos);
for (const rule of rules) {
if (pos === 0 || !rule.matchOnlyAtLineStart) {
matches = restOfLine.match(rule.regex);
if (matches) {
matched = matches[0];
action = rule.action;
break;
}
}
}
}
// We matched 'rule' with 'matches' and 'action'
if (!matches) {
matches = [''];
matched = '';
}
if (!action) {
// bad: we didn't match anything, and there is no action to take
// we need to advance the stream or we get progress trouble
if (pos < lineLength) {
matches = [line.charAt(pos)];
matched = matches[0];
}
action = this._lexer.defaultToken;
}
if (matched === null) {
// should never happen, needed for strict null checking
break;
}
// advance stream
pos += matched.length;
// maybe call action function (used for 'cases')
while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) {
action = action.test(matched, matches, state, pos === lineLength);
}
let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null;
// set the result: either a string or an array of actions
if (typeof action === 'string' || Array.isArray(action)) {
result = action;
} else if (action.group) {
result = action.group;
} else if (action.token !== null && action.token !== undefined) {
// do $n replacements?
if (action.tokenSubst) {
result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state);
} else {
result = action.token;
}
// enter embedded language?
if (action.nextEmbedded) {
if (action.nextEmbedded === '@pop') {
if (!embeddedLanguageData) {
throw monarchCommon.createError(this._lexer, 'cannot pop embedded language if not inside one');
}
embeddedLanguageData = null;
} else if (embeddedLanguageData) {
throw monarchCommon.createError(this._lexer, 'cannot enter embedded language from within an embedded language');
} else {
enteringEmbeddedLanguage = monarchCommon.substituteMatches(this._lexer, action.nextEmbedded, matched, matches, state);
}
}
// state transformations
if (action.goBack) { // back up the stream..
pos = Math.max(0, pos - action.goBack);
}
if (action.switchTo && typeof action.switchTo === 'string') {
let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push...
if (nextState[0] === '@') {
nextState = nextState.substr(1); // peel off starting '@'
}
if (!monarchCommon.findRules(this._lexer, nextState)) {
throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
} else {
stack = stack.switchTo(nextState);
}
} else if (action.transform && typeof action.transform === 'function') {
throw monarchCommon.createError(this._lexer, 'action.transform not supported');
} else if (action.next) {
if (action.next === '@push') {
if (stack.depth >= this._lexer.maxStack) {
throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' +
stack.state + ',' + stack.parent!.state + ',...]');
} else {
stack = stack.push(state);
}
} else if (action.next === '@pop') {
if (stack.depth <= 1) {
throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule));
} else {
stack = stack.pop()!;
}
} else if (action.next === '@popall') {
stack = stack.popall();
} else {
let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state);
if (nextState[0] === '@') {
nextState = nextState.substr(1); // peel off starting '@'
}
if (!monarchCommon.findRules(this._lexer, nextState)) {
throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule));
} else {
stack = stack.push(nextState);
}
}
}
if (action.log && typeof (action.log) === 'string') {
monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state));
}
}
// check result
if (result === null) {
throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule));
}
const computeNewStateForEmbeddedLanguage = (enteringEmbeddedLanguage: string) => {
// support language names, mime types, and language ids
const languageId = (
this._languageService.getLanguageIdByLanguageName(enteringEmbeddedLanguage)
|| this._languageService.getLanguageIdByMimeType(enteringEmbeddedLanguage)
|| enteringEmbeddedLanguage
);
const embeddedLanguageData = this._getNestedEmbeddedLanguageData(languageId);
if (pos < lineLength) {
// there is content from the embedded language on this line
const restOfLine = lineWithoutLF.substr(pos);
return this._nestedTokenize(restOfLine, hasEOL, MonarchLineStateFactory.create(stack, embeddedLanguageData), offsetDelta + pos, tokensCollector);
} else {
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
}
};
// is the result a group match?
if (Array.isArray(result)) {
if (groupMatching && groupMatching.groups.length > 0) {
throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule));
}
if (matches.length !== result.length + 1) {
throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule));
}
let totalLen = 0;
for (let i = 1; i < matches.length; i++) {
totalLen += matches[i].length;
}
if (totalLen !== matched.length) {
throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule));
}
groupMatching = {
rule: rule,
matches: matches,
groups: []
};
for (let i = 0; i < result.length; i++) {
groupMatching.groups[i] = {
action: result[i],
matched: matches[i + 1]
};
}
pos -= matched.length;
// call recursively to initiate first result match
continue;
} else {
// regular result
// check for '@rematch'
if (result === '@rematch') {
pos -= matched.length;
matched = ''; // better set the next state too..
matches = null;
result = '';
// Even though `@rematch` was specified, if `nextEmbedded` also specified,
// a state transition should occur.
if (enteringEmbeddedLanguage !== null) {
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
}
}
// check progress
if (matched.length === 0) {
if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) {
continue;
} else {
throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule));
}
}
// return the result (and check for brace matching)
// todo: for efficiency we could pre-sanitize tokenPostfix and substitutions
let tokenType: string | null = null;
if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) {
const rest = result.substr('@brackets'.length);
const bracket = findBracket(this._lexer, matched);
if (!bracket) {
throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched);
}
tokenType = monarchCommon.sanitize(bracket.token + rest);
} else {
const token = (result === '' ? '' : result + this._lexer.tokenPostfix);
tokenType = monarchCommon.sanitize(token);
}
if (pos0 < lineWithoutLFLength) {
tokensCollector.emit(pos0 + offsetDelta, tokenType);
}
}
if (enteringEmbeddedLanguage !== null) {
return computeNewStateForEmbeddedLanguage(enteringEmbeddedLanguage);
}
}
return MonarchLineStateFactory.create(stack, embeddedLanguageData);
}