in experiments/Lexer.php [56:231]
private function scan() : Token {
$pos = & $this->pos;
$endOfFilePos = & $this->endOfFilePos;
$text = & $this->fileContents;
$fullStart = $pos;
while (true) {
$start = $pos;
if ($pos >= $endOfFilePos) {
// TODO manage lookaheads w/ script section state
$token = $this->inScriptSection
? new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos-$fullStart)
: new Token(TokenKind::InlineHtml, $fullStart, $fullStart, $pos-$fullStart);
$this->inScriptSection = true;
// TODO WAT
if ($token->kind === TokenKind::InlineHtml && $pos-$fullStart === 0) {
continue;
}
return $token;
}
if (!$this->inScriptSection) {
// Keep scanning until we hit a script section start tag
if (!$this->isScriptStartTag($text, $pos, $endOfFilePos)) {
$pos++;
continue;
}
// Mark that a script section has begun, and return the scanned text as InlineHtml
$this->inScriptSection = true;
if ($pos-$fullStart === 0) {
continue;
}
return new Token(TokenKind::InlineHtml, $fullStart, $fullStart, $pos-$fullStart);
}
$charCode = ord($text[$pos]);
switch ($charCode) {
case CharacterCodes::_hash:
// Trivia (like comments) prepends a scanned Token
$this->scanSingleLineComment($text, $pos, $endOfFilePos);
continue;
case CharacterCodes::_space:
case CharacterCodes::_tab:
case CharacterCodes::_return:
case CharacterCodes::_newline:
$pos++;
continue;
// Potential 3-char compound
case CharacterCodes::_dot: // ..., .=, . // TODO also applies to floating point literals
if (isset($text[$pos+1]) && $this->isDigitChar(ord($text[$pos+1]))) {
$kind = $this->scanNumericLiteral($text, $pos, $endOfFilePos);
return new Token($kind, $fullStart, $start, $pos-$fullStart);
}
// Otherwise fall through to compounds
case CharacterCodes::_lessThan: // <=>, <=, <<=, <<, < // TODO heredoc and nowdoc
case CharacterCodes::_equals: // ===, ==, =
case CharacterCodes::_greaterThan: // >>=, >>, >=, >
case CharacterCodes::_asterisk: // **=, **, *=, *
case CharacterCodes::_exclamation: // !==, !=, !
// Potential 2-char compound
case CharacterCodes::_plus: // +=, ++, +
case CharacterCodes::_minus: // -= , --, ->, -
case CharacterCodes::_percent: // %=, %
case CharacterCodes::_caret: // ^=, ^
case CharacterCodes::_bar: // |=, ||, |
case CharacterCodes::_ampersand: // &=, &&, &
case CharacterCodes::_question: // ??, ?, end-tag
case CharacterCodes::_colon: // : (TODO should this actually be treated as compound?)
case CharacterCodes::_comma: // , (TODO should this actually be treated as compound?)
// Non-compound
case CharacterCodes::_at: // @
case CharacterCodes::_openBracket:
case CharacterCodes::_closeBracket:
case CharacterCodes::_openParen:
case CharacterCodes::_closeParen:
case CharacterCodes::_openBrace:
case CharacterCodes::_closeBrace:
case CharacterCodes::_semicolon:
case CharacterCodes::_tilde:
case CharacterCodes::_backslash:
// TODO this can be made more performant, but we're going for simple/correct first.
// TODO
for ($tokenEnd = 6; $tokenEnd >= 0; $tokenEnd--) {
if ($pos + $tokenEnd >= $endOfFilePos) {
continue;
}
// TODO get rid of strtolower for perf reasons
$textSubstring = strtolower(substr($text, $pos, $tokenEnd + 1));
if ($this->isOperatorOrPunctuator($textSubstring)) {
$tokenKind = TokenStringMaps::OPERATORS_AND_PUNCTUATORS[$textSubstring];
$pos += $tokenEnd + 1;
if ($tokenKind === TokenKind::ScriptSectionEndTag) {
$this->inScriptSection = false;
}
return new Token($tokenKind, $fullStart, $start, $pos - $fullStart);
}
}
throw new \Exception("Unknown token kind");
case CharacterCodes::_slash:
if ($this->isSingleLineCommentStart($text, $pos, $endOfFilePos)) {
$this->scanSingleLineComment($text, $pos, $endOfFilePos);
continue;
} elseif ($this->isDelimitedCommentStart($text, $pos, $endOfFilePos)) {
$this->scanDelimitedComment($text, $pos, $endOfFilePos);
continue;
} elseif (isset($text[$pos+1]) && $text[$pos+1] === "=") {
$pos+=2;
return new Token(TokenKind::SlashEqualsToken, $fullStart, $start, $pos - $fullStart);
}
$pos++;
return new Token(TokenKind::SlashToken, $fullStart, $start, $pos - $fullStart);
case CharacterCodes::_dollar:
$pos++;
if ($this->isNameStart($text, $pos, $endOfFilePos)) {
$this->scanName($text, $pos, $endOfFilePos);
return new Token(TokenKind::VariableName, $fullStart, $start, $pos - $fullStart);
}
return new Token(TokenKind::DollarToken, $fullStart, $start, $pos - $fullStart);
case CharacterCodes::_doubleQuote:
$doubleQuote = true;
case CharacterCodes::_singleQuote:
$quoteStart = true;
// Flow through to b/B
case CharacterCodes::b:
case CharacterCodes::B:
if ($text[$pos] === "'" || $text[$pos] === "\"" || (isset($text[$pos+1]) && ($text[$pos+1] === "'" || $text[$pos+1] === "\""))) {
$pos += isset($quoteStart) ? 0 : 1;
if ($text[$pos] === "\"") {
$kind = $this->scanTemplateAndSetTokenValue($text, $pos, $endOfFilePos, false);
return new Token($kind, $fullStart, $start, $pos - $fullStart);
}
$pos++;
if ($this->scanStringLiteral($text, $pos, $endOfFilePos)) {
return new Token(TokenKind::StringLiteralToken, $fullStart, $start, $pos-$fullStart);
}
return new Token(TokenKind::UnterminatedStringLiteralToken, $fullStart, $start, $pos-$fullStart);
}
// Flow through to default case
default:
if ($this->isNameStart($text, $pos, $endOfFilePos)) {
$this->scanName($text, $pos, $endOfFilePos);
$token = new Token(TokenKind::Name, $fullStart, $start, $pos - $fullStart);
$tokenText = $token->getText($text);
$lowerText = strtolower($tokenText);
if ($this->isKeywordOrReservedWordStart($lowerText)) {
$token = $this->getKeywordOrReservedWordTokenFromNameToken($token, $lowerText, $text, $pos, $endOfFilePos);
}
return $token;
} elseif ($this->isDigitChar(ord($text[$pos]))) {
$kind = $this->scanNumericLiteral($text, $pos, $endOfFilePos);
return new Token($kind, $fullStart, $start, $pos - $fullStart);
}
$pos++;
return new Token(TokenKind::Unknown, $fullStart, $start, $pos - $fullStart);
}
}
}