private function scan()

in experiments/Lexer.php [56:231]


    private function scan() : Token {
        $pos = & $this->pos;
        $endOfFilePos = & $this->endOfFilePos;
        $text = & $this->fileContents;
        $fullStart = $pos;

        while (true) {
            $start = $pos;
            if ($pos >= $endOfFilePos) {
                // TODO manage lookaheads w/ script section state
                $token = $this->inScriptSection
                    ? new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos-$fullStart)
                    : new Token(TokenKind::InlineHtml, $fullStart, $fullStart, $pos-$fullStart);
                $this->inScriptSection = true;
                // TODO WAT
                if ($token->kind === TokenKind::InlineHtml && $pos-$fullStart === 0) {
                    continue;
                }
                return $token;
            }

            if (!$this->inScriptSection) {
                // Keep scanning until we hit a script section start tag
                if (!$this->isScriptStartTag($text, $pos, $endOfFilePos)) {
                    $pos++;
                    continue;
                }
                
                // Mark that a script section has begun, and return the scanned text as InlineHtml
                $this->inScriptSection = true;
                if ($pos-$fullStart === 0) {
                    continue;
                }
                
                return new Token(TokenKind::InlineHtml, $fullStart, $fullStart, $pos-$fullStart);
            }
            
            $charCode = ord($text[$pos]);

            switch ($charCode) {
                case CharacterCodes::_hash:
                    // Trivia (like comments) prepends a scanned Token
                    $this->scanSingleLineComment($text, $pos, $endOfFilePos);
                    continue;

                case CharacterCodes::_space:
                case CharacterCodes::_tab:
                case CharacterCodes::_return:
                case CharacterCodes::_newline:
                    $pos++;
                    continue;

                // Potential 3-char compound
                case CharacterCodes::_dot: // ..., .=, . // TODO also applies to floating point literals
                    if (isset($text[$pos+1]) && $this->isDigitChar(ord($text[$pos+1]))) {
                        $kind = $this->scanNumericLiteral($text, $pos, $endOfFilePos);
                        return new Token($kind, $fullStart, $start, $pos-$fullStart);
                    }
                    // Otherwise fall through to compounds

                case CharacterCodes::_lessThan: // <=>, <=, <<=, <<, < // TODO heredoc and nowdoc
                case CharacterCodes::_equals: // ===, ==, =
                case CharacterCodes::_greaterThan: // >>=, >>, >=, >
                case CharacterCodes::_asterisk: // **=, **, *=, *
                case CharacterCodes::_exclamation: // !==, !=, !

                // Potential 2-char compound
                case CharacterCodes::_plus: // +=, ++, +
                case CharacterCodes::_minus: // -= , --, ->, -
                case CharacterCodes::_percent: // %=, %
                case CharacterCodes::_caret: // ^=, ^
                case CharacterCodes::_bar: // |=, ||, |
                case CharacterCodes::_ampersand: // &=, &&, &
                case CharacterCodes::_question: // ??, ?, end-tag

                case CharacterCodes::_colon: // : (TODO should this actually be treated as compound?)
                case CharacterCodes::_comma: // , (TODO should this actually be treated as compound?)

                // Non-compound
                case CharacterCodes::_at: // @
                case CharacterCodes::_openBracket:
                case CharacterCodes::_closeBracket:
                case CharacterCodes::_openParen:
                case CharacterCodes::_closeParen:
                case CharacterCodes::_openBrace:
                case CharacterCodes::_closeBrace:
                case CharacterCodes::_semicolon:
                case CharacterCodes::_tilde:
                case CharacterCodes::_backslash:
                    // TODO this can be made more performant, but we're going for simple/correct first.
                    // TODO
                    for ($tokenEnd = 6; $tokenEnd >= 0; $tokenEnd--) {
                        if ($pos + $tokenEnd >= $endOfFilePos) {
                            continue;
                        }

                        // TODO get rid of strtolower for perf reasons
                        $textSubstring = strtolower(substr($text, $pos, $tokenEnd + 1));
                        if ($this->isOperatorOrPunctuator($textSubstring)) {
                            $tokenKind = TokenStringMaps::OPERATORS_AND_PUNCTUATORS[$textSubstring];
                            $pos += $tokenEnd + 1;

                            if ($tokenKind === TokenKind::ScriptSectionEndTag) {
                                $this->inScriptSection = false;
                            }

                            return new Token($tokenKind, $fullStart, $start, $pos - $fullStart);
                        }
                    }

                    throw new \Exception("Unknown token kind");

                case CharacterCodes::_slash:
                    if ($this->isSingleLineCommentStart($text, $pos, $endOfFilePos)) {
                        $this->scanSingleLineComment($text, $pos, $endOfFilePos);
                        continue;
                    } elseif ($this->isDelimitedCommentStart($text, $pos, $endOfFilePos)) {
                        $this->scanDelimitedComment($text, $pos, $endOfFilePos);
                        continue;
                    } elseif (isset($text[$pos+1]) && $text[$pos+1] === "=") {
                        $pos+=2;
                        return new Token(TokenKind::SlashEqualsToken, $fullStart, $start, $pos - $fullStart);
                    }
                    $pos++;
                    return new Token(TokenKind::SlashToken, $fullStart, $start, $pos - $fullStart);

                case CharacterCodes::_dollar:
                    $pos++;
                    if ($this->isNameStart($text, $pos, $endOfFilePos)) {
                        $this->scanName($text, $pos, $endOfFilePos);
                        return new Token(TokenKind::VariableName, $fullStart, $start, $pos - $fullStart);
                    }
                    return new Token(TokenKind::DollarToken, $fullStart, $start, $pos - $fullStart);

                case CharacterCodes::_doubleQuote:
                    $doubleQuote = true;
                case CharacterCodes::_singleQuote:
                    $quoteStart = true;
                    // Flow through to b/B
                case CharacterCodes::b:
                case CharacterCodes::B:
                    if ($text[$pos] === "'" || $text[$pos] === "\"" || (isset($text[$pos+1]) && ($text[$pos+1] === "'" || $text[$pos+1] === "\""))) {
                        $pos += isset($quoteStart) ? 0 : 1;
                        if ($text[$pos] === "\"") {
                            $kind = $this->scanTemplateAndSetTokenValue($text, $pos, $endOfFilePos, false);
                            return new Token($kind, $fullStart, $start, $pos - $fullStart);
                        }

                        $pos++;
                        if ($this->scanStringLiteral($text, $pos, $endOfFilePos)) {
                            return new Token(TokenKind::StringLiteralToken, $fullStart, $start, $pos-$fullStart);
                        }
                        return new Token(TokenKind::UnterminatedStringLiteralToken, $fullStart, $start, $pos-$fullStart);
                    }

                    // Flow through to default case

                default:
                    if ($this->isNameStart($text, $pos, $endOfFilePos)) {
                        $this->scanName($text, $pos, $endOfFilePos);
                        $token = new Token(TokenKind::Name, $fullStart, $start, $pos - $fullStart);
                        $tokenText = $token->getText($text);
                        $lowerText = strtolower($tokenText);
                        if ($this->isKeywordOrReservedWordStart($lowerText)) {
                            $token = $this->getKeywordOrReservedWordTokenFromNameToken($token, $lowerText, $text, $pos, $endOfFilePos);
                        }
                        return $token;
                    } elseif ($this->isDigitChar(ord($text[$pos]))) {
                        $kind = $this->scanNumericLiteral($text, $pos, $endOfFilePos);
                        return new Token($kind, $fullStart, $start, $pos - $fullStart);
                    }
                    $pos++;
                    return new Token(TokenKind::Unknown, $fullStart, $start, $pos - $fullStart);
            }
        }
    }