in src/parser/htmlScanner.ts [202:419]
function internalScan(): TokenType {
const offset = stream.pos();
if (stream.eos()) {
return finishToken(offset, TokenType.EOS);
}
let errorMessage;
switch (state) {
case ScannerState.WithinComment:
if (stream.advanceIfChars([_MIN, _MIN, _RAN])) { // -->
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.EndCommentTag);
}
stream.advanceUntilChars([_MIN, _MIN, _RAN]); // -->
return finishToken(offset, TokenType.Comment);
case ScannerState.WithinDoctype:
if (stream.advanceIfChar(_RAN)) {
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.EndDoctypeTag);
}
stream.advanceUntilChar(_RAN); // >
return finishToken(offset, TokenType.Doctype);
case ScannerState.WithinContent:
if (stream.advanceIfChar(_LAN)) { // <
if (!stream.eos() && stream.peekChar() === _BNG) { // !
if (stream.advanceIfChars([_BNG, _MIN, _MIN])) { // <!--
state = ScannerState.WithinComment;
return finishToken(offset, TokenType.StartCommentTag);
}
if (stream.advanceIfRegExp(/^!doctype/i)) {
state = ScannerState.WithinDoctype;
return finishToken(offset, TokenType.StartDoctypeTag);
}
}
if (stream.advanceIfChar(_FSL)) { // /
state = ScannerState.AfterOpeningEndTag;
return finishToken(offset, TokenType.EndTagOpen);
}
state = ScannerState.AfterOpeningStartTag;
return finishToken(offset, TokenType.StartTagOpen);
}
stream.advanceUntilChar(_LAN);
return finishToken(offset, TokenType.Content);
case ScannerState.AfterOpeningEndTag:
const tagName = nextElementName();
if (tagName.length > 0) {
state = ScannerState.WithinEndTag;
return finishToken(offset, TokenType.EndTag);
}
if (stream.skipWhitespace()) { // white space is not valid here
return finishToken(offset, TokenType.Whitespace, localize('error.unexpectedWhitespace', 'Tag name must directly follow the open bracket.'));
}
state = ScannerState.WithinEndTag;
stream.advanceUntilChar(_RAN);
if (offset < stream.pos()) {
return finishToken(offset, TokenType.Unknown, localize('error.endTagNameExpected', 'End tag name expected.'));
}
return internalScan();
case ScannerState.WithinEndTag:
if (stream.skipWhitespace()) { // white space is valid here
return finishToken(offset, TokenType.Whitespace);
}
if (stream.advanceIfChar(_RAN)) { // >
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.EndTagClose);
}
if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.EndTagClose, localize('error.closingBracketMissing', 'Closing bracket missing.'));
}
errorMessage = localize('error.closingBracketExpected', 'Closing bracket expected.');
break;
case ScannerState.AfterOpeningStartTag:
lastTag = nextElementName();
lastTypeValue = void 0;
lastAttributeName = void 0;
if (lastTag.length > 0) {
hasSpaceAfterTag = false;
state = ScannerState.WithinTag;
return finishToken(offset, TokenType.StartTag);
}
if (stream.skipWhitespace()) { // white space is not valid here
return finishToken(offset, TokenType.Whitespace, localize('error.unexpectedWhitespace', 'Tag name must directly follow the open bracket.'));
}
state = ScannerState.WithinTag;
stream.advanceUntilChar(_RAN);
if (offset < stream.pos()) {
return finishToken(offset, TokenType.Unknown, localize('error.startTagNameExpected', 'Start tag name expected.'));
}
return internalScan();
case ScannerState.WithinTag:
if (stream.skipWhitespace()) {
hasSpaceAfterTag = true; // remember that we have seen a whitespace
return finishToken(offset, TokenType.Whitespace);
}
if (hasSpaceAfterTag) {
lastAttributeName = nextAttributeName();
if (lastAttributeName.length > 0) {
state = ScannerState.AfterAttributeName;
hasSpaceAfterTag = false;
return finishToken(offset, TokenType.AttributeName);
}
}
if (stream.advanceIfChars([_FSL, _RAN])) { // />
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.StartTagSelfClose);
}
if (stream.advanceIfChar(_RAN)) { // >
if (lastTag === 'script') {
if (lastTypeValue && htmlScriptContents[lastTypeValue]) {
// stay in html
state = ScannerState.WithinContent;
} else {
state = ScannerState.WithinScriptContent;
}
} else if (lastTag === 'style') {
state = ScannerState.WithinStyleContent;
} else {
state = ScannerState.WithinContent;
}
return finishToken(offset, TokenType.StartTagClose);
}
if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.StartTagClose, localize('error.closingBracketMissing', 'Closing bracket missing.'));
}
stream.advance(1);
return finishToken(offset, TokenType.Unknown, localize('error.unexpectedCharacterInTag', 'Unexpected character in tag.'));
case ScannerState.AfterAttributeName:
if (stream.skipWhitespace()) {
hasSpaceAfterTag = true;
return finishToken(offset, TokenType.Whitespace);
}
if (stream.advanceIfChar(_EQS)) {
state = ScannerState.BeforeAttributeValue;
return finishToken(offset, TokenType.DelimiterAssign);
}
state = ScannerState.WithinTag;
return internalScan(); // no advance yet - jump to WithinTag
case ScannerState.BeforeAttributeValue:
if (stream.skipWhitespace()) {
return finishToken(offset, TokenType.Whitespace);
}
let attributeValue = stream.advanceIfRegExp(/^[^\s"'`=<>]+/);
if (attributeValue.length > 0) {
if (stream.peekChar() === _RAN && stream.peekChar(-1) === _FSL) { // <foo bar=http://foo/>
stream.goBack(1);
attributeValue = attributeValue.substr(0, attributeValue.length - 1);
}
if (lastAttributeName === 'type') {
lastTypeValue = attributeValue;
}
state = ScannerState.WithinTag;
hasSpaceAfterTag = false;
return finishToken(offset, TokenType.AttributeValue);
}
const ch = stream.peekChar();
if (ch === _SQO || ch === _DQO) {
stream.advance(1); // consume quote
if (stream.advanceUntilChar(ch)) {
stream.advance(1); // consume quote
}
if (lastAttributeName === 'type') {
lastTypeValue = stream.getSource().substring(offset + 1, stream.pos() - 1);
}
state = ScannerState.WithinTag;
hasSpaceAfterTag = false;
return finishToken(offset, TokenType.AttributeValue);
}
state = ScannerState.WithinTag;
hasSpaceAfterTag = false;
return internalScan(); // no advance yet - jump to WithinTag
case ScannerState.WithinScriptContent:
// see http://stackoverflow.com/questions/14574471/how-do-browsers-parse-a-script-tag-exactly
let sciptState = 1;
while (!stream.eos()) {
const match = stream.advanceIfRegExp(/<!--|-->|<\/?script\s*\/?>?/i);
if (match.length === 0) {
stream.goToEnd();
return finishToken(offset, TokenType.Script);
} else if (match === '<!--') {
if (sciptState === 1) {
sciptState = 2;
}
} else if (match === '-->') {
sciptState = 1;
} else if (match[1] !== '/') { // <script
if (sciptState === 2) {
sciptState = 3;
}
} else { // </script
if (sciptState === 3) {
sciptState = 2;
} else {
stream.goBack(match.length); // to the beginning of the closing tag
break;
}
}
}
state = ScannerState.WithinContent;
if (offset < stream.pos()) {
return finishToken(offset, TokenType.Script);
}
return internalScan(); // no advance yet - jump to content
case ScannerState.WithinStyleContent:
stream.advanceUntilRegExp(/<\/style/i);
state = ScannerState.WithinContent;
if (offset < stream.pos()) {
return finishToken(offset, TokenType.Styles);
}
return internalScan(); // no advance yet - jump to content
}
stream.advance(1);
state = ScannerState.WithinContent;
return finishToken(offset, TokenType.Unknown, errorMessage);
}