public static function getTokensArrayFromContent()

in src/PhpTokenizer.php [77:208]


    public static function getTokensArrayFromContent(
        $content, $parseContext = null, $initialPos = 0, $treatCommentsAsTrivia = true
    ) : array {
        if ($parseContext !== null) {
            // If needed, add a prefix so that token_get_all will tokenize the remaining $contents
            $prefix = self::PARSE_CONTEXT_TO_PREFIX[$parseContext];
            $content = $prefix . $content;
        }

        $tokens = static::tokenGetAll($content, $parseContext);

        $arr = [];
        $fullStart = $start = $pos = $initialPos;
        if ($parseContext !== null) {
            // If needed, skip over the prefix we added for token_get_all and remove those tokens.
            // This was moved out of the below main loop as an optimization.
            // (the common case of parsing an entire file uses a null parseContext)
            foreach ($tokens as $i => $token) {
                unset($tokens[$i]);
                if (\is_array($token)) {
                    $pos += \strlen($token[1]);
                } else {
                    $pos += \strlen($token);
                }
                if (\strlen($prefix) < $pos) {
                    $fullStart = $start = $pos = $initialPos;
                    break;
                }
            }
        }

        // Convert tokens from token_get_all to Token instances,
        // skipping whitespace and (usually, when parseContext is null) comments.
        foreach ($tokens as $token) {
            if (\is_array($token)) {
                $tokenKind = $token[0];
                $strlen = \strlen($token[1]);
            } else {
                $pos += \strlen($token);
                $newTokenKind = self::TOKEN_MAP[$token] ?? TokenKind::Unknown;
                $arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
                $start = $fullStart = $pos;
                continue;
            }

            $pos += $strlen;

            // Optimization note: In PHP < 7.2, the switch statement would check case by case,
            // so putting the most common cases first is slightly faster
            switch ($tokenKind) {
                case \T_WHITESPACE:
                    $start += $strlen;
                    break;
                case \T_STRING:
                    $name = \strtolower($token[1]);
                    $newTokenKind = TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name;
                    $arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
                    $start = $fullStart = $pos;
                    break;
                case \T_OPEN_TAG:
                    $arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
                    $start = $fullStart = $pos;
                    break;
                case \PHP_VERSION_ID >= 80000 ? \T_NAME_QUALIFIED : -1000:
                case \PHP_VERSION_ID >= 80000 ? \T_NAME_FULLY_QUALIFIED : -1001:
                    // NOTE: This switch is called on every token of every file being parsed, so this traded performance for readability.
                    //
                    // PHP's Opcache is able to optimize switches that are exclusively known longs,
                    // but not switches that mix strings and longs or have unknown longs.
                    // Longs are only known if they're declared within the same *class* or an internal constant (tokenizer).
                    //
                    // For some reason, the SWITCH_LONG opcode was not generated when the expression was part of a class constant.
                    // (seen with php -d opcache.opt_debug_level=0x20000)
                    //
                    // Use negative values because that's not expected to overlap with token kinds that token_get_all() will return.
                    //
                    // T_NAME_* was added in php 8.0 to forbid whitespace between parts of names.
                    // Here, emulate the tokenization of php 7 by splitting it up into 1 or more tokens.
                    foreach (\explode('\\', $token[1]) as $i => $name) {
                        if ($i) {
                            $arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1 + $start - $fullStart);
                            $start++;
                            $fullStart = $start;
                        }
                        if ($name === '') {
                            continue;
                        }
                        // TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility?
                        $len = \strlen($name);
                        $arr[] = new Token(TokenKind::Name, $fullStart, $start, $len + $start - $fullStart);
                        $start += $len;
                        $fullStart = $start;
                    }
                    break;
                case \PHP_VERSION_ID >= 80000 ? \T_NAME_RELATIVE : -1002:
                    // This is a namespace-relative name: namespace\...
                    foreach (\explode('\\', $token[1]) as $i => $name) {
                        $len = \strlen($name);
                        if (!$i) {
                            $arr[] = new Token(TokenKind::NamespaceKeyword, $fullStart, $start, $len + $start - $fullStart);
                            $start += $len;
                            $fullStart = $start;
                            continue;
                        }
                        $arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1);
                        $start++;

                        // TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility?
                        $arr[] = new Token(TokenKind::Name, $start, $start, $len);

                        $start += $len;
                        $fullStart = $start;
                    }
                    break;
                case \T_COMMENT:
                case \T_DOC_COMMENT:
                    if ($treatCommentsAsTrivia) {
                        $start += $strlen;
                        break;
                    }
                    // fall through
                default:
                    $newTokenKind = self::TOKEN_MAP[$tokenKind] ?? TokenKind::Unknown;
                    $arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
                    $start = $fullStart = $pos;
                    break;
            }
        }

        $arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
        return $arr;
    }