in src/PhpTokenizer.php [77:208]
public static function getTokensArrayFromContent(
$content, $parseContext = null, $initialPos = 0, $treatCommentsAsTrivia = true
) : array {
if ($parseContext !== null) {
// If needed, add a prefix so that token_get_all will tokenize the remaining $contents
$prefix = self::PARSE_CONTEXT_TO_PREFIX[$parseContext];
$content = $prefix . $content;
}
$tokens = static::tokenGetAll($content, $parseContext);
$arr = [];
$fullStart = $start = $pos = $initialPos;
if ($parseContext !== null) {
// If needed, skip over the prefix we added for token_get_all and remove those tokens.
// This was moved out of the below main loop as an optimization.
// (the common case of parsing an entire file uses a null parseContext)
foreach ($tokens as $i => $token) {
unset($tokens[$i]);
if (\is_array($token)) {
$pos += \strlen($token[1]);
} else {
$pos += \strlen($token);
}
if (\strlen($prefix) < $pos) {
$fullStart = $start = $pos = $initialPos;
break;
}
}
}
// Convert tokens from token_get_all to Token instances,
// skipping whitespace and (usually, when parseContext is null) comments.
foreach ($tokens as $token) {
if (\is_array($token)) {
$tokenKind = $token[0];
$strlen = \strlen($token[1]);
} else {
$pos += \strlen($token);
$newTokenKind = self::TOKEN_MAP[$token] ?? TokenKind::Unknown;
$arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
$start = $fullStart = $pos;
continue;
}
$pos += $strlen;
// Optimization note: In PHP < 7.2, the switch statement would check case by case,
// so putting the most common cases first is slightly faster
switch ($tokenKind) {
case \T_WHITESPACE:
$start += $strlen;
break;
case \T_STRING:
$name = \strtolower($token[1]);
$newTokenKind = TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name;
$arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
$start = $fullStart = $pos;
break;
case \T_OPEN_TAG:
$arr[] = new Token(TokenKind::ScriptSectionStartTag, $fullStart, $start, $pos-$fullStart);
$start = $fullStart = $pos;
break;
case \PHP_VERSION_ID >= 80000 ? \T_NAME_QUALIFIED : -1000:
case \PHP_VERSION_ID >= 80000 ? \T_NAME_FULLY_QUALIFIED : -1001:
// NOTE: This switch is called on every token of every file being parsed, so this traded performance for readability.
//
// PHP's Opcache is able to optimize switches that are exclusively known longs,
// but not switches that mix strings and longs or have unknown longs.
// Longs are only known if they're declared within the same *class* or an internal constant (tokenizer).
//
// For some reason, the SWITCH_LONG opcode was not generated when the expression was part of a class constant.
// (seen with php -d opcache.opt_debug_level=0x20000)
//
// Use negative values because that's not expected to overlap with token kinds that token_get_all() will return.
//
// T_NAME_* was added in php 8.0 to forbid whitespace between parts of names.
// Here, emulate the tokenization of php 7 by splitting it up into 1 or more tokens.
foreach (\explode('\\', $token[1]) as $i => $name) {
if ($i) {
$arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1 + $start - $fullStart);
$start++;
$fullStart = $start;
}
if ($name === '') {
continue;
}
// TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility?
$len = \strlen($name);
$arr[] = new Token(TokenKind::Name, $fullStart, $start, $len + $start - $fullStart);
$start += $len;
$fullStart = $start;
}
break;
case \PHP_VERSION_ID >= 80000 ? \T_NAME_RELATIVE : -1002:
// This is a namespace-relative name: namespace\...
foreach (\explode('\\', $token[1]) as $i => $name) {
$len = \strlen($name);
if (!$i) {
$arr[] = new Token(TokenKind::NamespaceKeyword, $fullStart, $start, $len + $start - $fullStart);
$start += $len;
$fullStart = $start;
continue;
}
$arr[] = new Token(TokenKind::BackslashToken, $fullStart, $start, 1);
$start++;
// TODO: TokenStringMaps::RESERVED_WORDS[$name] ?? TokenKind::Name for compatibility?
$arr[] = new Token(TokenKind::Name, $start, $start, $len);
$start += $len;
$fullStart = $start;
}
break;
case \T_COMMENT:
case \T_DOC_COMMENT:
if ($treatCommentsAsTrivia) {
$start += $strlen;
break;
}
// fall through
default:
$newTokenKind = self::TOKEN_MAP[$tokenKind] ?? TokenKind::Unknown;
$arr[] = new Token($newTokenKind, $fullStart, $start, $pos - $fullStart);
$start = $fullStart = $pos;
break;
}
}
$arr[] = new Token(TokenKind::EndOfFileToken, $fullStart, $start, $pos - $fullStart);
return $arr;
}