in lib/Parser/JSLexer.cpp [1359:1620]
void JSLexer::scanNumber(GrammarContext grammarContext) {
// A somewhat ugly state machine for scanning a number
unsigned radix = 10;
bool real = false;
bool ok = true;
const char *rawStart = curCharPtr_;
const char *start = curCharPtr_;
// True when we encounter the numeric literal separator: '_'.
bool seenSeparator = false;
// True when we encounter a legacy octal number (starts with '0').
bool legacyOctal = false;
// Detect the radix
if (*curCharPtr_ == '0') {
if ((curCharPtr_[1] | 32) == 'x') {
radix = 16;
curCharPtr_ += 2;
start += 2;
} else if ((curCharPtr_[1] | 32) == 'o') {
radix = 8;
curCharPtr_ += 2;
start += 2;
} else if ((curCharPtr_[1] | 32) == 'b') {
radix = 2;
curCharPtr_ += 2;
start += 2;
} else if (curCharPtr_[1] == '.') {
curCharPtr_ += 2;
goto fraction;
} else if ((curCharPtr_[1] | 32) == 'e') {
curCharPtr_ += 2;
goto exponent;
} else {
radix = 8;
legacyOctal = true;
++curCharPtr_;
}
}
while (isdigit(*curCharPtr_) ||
(radix == 16 && (*curCharPtr_ | 32) >= 'a' &&
(*curCharPtr_ | 32) <= 'f') ||
(*curCharPtr_ == '_')) {
seenSeparator |= *curCharPtr_ == '_';
++curCharPtr_;
}
if (radix == 10 || legacyOctal) {
// It is not necessarily an integer.
// We could have interpreted as legacyOctal initially but will have to
// change to decimal later.
if (*curCharPtr_ == '.') {
++curCharPtr_;
goto fraction;
}
if ((*curCharPtr_ | 32) == 'e') {
++curCharPtr_;
goto exponent;
}
}
goto end;
fraction:
// We arrive here after we have consumed the decimal dot ".".
//
real = true;
while (isdigit(*curCharPtr_) || *curCharPtr_ == '_') {
seenSeparator |= *curCharPtr_ == '_';
++curCharPtr_;
}
if ((*curCharPtr_ | 32) == 'e') {
++curCharPtr_;
goto exponent;
} else {
goto end;
}
exponent:
// We arrive here after we have consumed the exponent character 'e' or 'E'.
//
real = true;
if (*curCharPtr_ == '+' || *curCharPtr_ == '-')
++curCharPtr_;
if (isdigit(*curCharPtr_)) {
do {
seenSeparator |= *curCharPtr_ == '_';
++curCharPtr_;
} while (isdigit(*curCharPtr_) || *curCharPtr_ == '_');
} else {
ok = false;
}
end:
// We arrive here after we have consumed all we can from the number. Now,
// as per the spec, we consume a sequence of identifier characters if they
// follow directly, which means the number is invalid if it's not BigInt.
if (consumeIdentifierStart()) {
consumeIdentifierParts<IdentifierMode::JS>();
llvh::StringRef raw{rawStart, (size_t)(curCharPtr_ - rawStart)};
if (ok && !real && (!legacyOctal || raw == "0n") && tmpStorage_ == "n") {
// This is a BigInt.
rawStorage_.clear();
rawStorage_.append(raw);
token_.setBigIntLiteral(getStringLiteral(rawStorage_));
return;
}
ok = false;
}
double val;
/// ES6.0 B.1.1
/// If we encounter a "legacy" octal number (starting with a '0') but if
/// the integer contains '8' or '9' we interpret it as decimal.
const auto updateLegacyOctalRadix =
[this, &radix, start, &legacyOctal]() -> void {
assert(
legacyOctal &&
"updateLegacyOctalRadix can only be called in legacyOctal mode");
(void)legacyOctal;
for (auto *scanPtr = start; scanPtr != curCharPtr_; ++scanPtr) {
if (*scanPtr == '.' || *scanPtr == 'e') {
break;
}
if (LLVM_UNLIKELY(*scanPtr >= '8') && LLVM_LIKELY(*scanPtr != '_')) {
sm_.warning(
SMRange(token_.getStartLoc(), SMLoc::getFromPointer(curCharPtr_)),
"Numeric literal starts with 0 but contains an 8 or 9 digit. "
"Interpreting as decimal (not octal).");
radix = 10;
break;
}
}
};
if (!ok) {
errorRange(token_.getStartLoc(), "invalid numeric literal");
val = std::numeric_limits<double>::quiet_NaN();
} else if (
!real && radix == 10 && curCharPtr_ - start <= 9 &&
LLVM_LIKELY(!seenSeparator)) {
// If this is a decimal integer of at most 9 digits (log10(2**31-1), it
// can fit in a 32-bit integer. Use a faster conversion.
int32_t ival = *start - '0';
while (++start != curCharPtr_)
ival = ival * 10 + (*start - '0');
val = ival;
} else if (real || radix == 10) {
if (legacyOctal) {
if (strictMode_ || grammarContext == GrammarContext::Type) {
if (!errorRange(
token_.getStartLoc(),
"Decimals with leading zeros are not allowed in strict mode")) {
val = std::numeric_limits<double>::quiet_NaN();
goto done;
}
} else {
// Check to see if we can actually scan this as radix 10.
// Non-integer numbers must be in base 10, otherwise we error.
updateLegacyOctalRadix();
if (LLVM_LIKELY(radix != 10)) {
if (!errorRange(
token_.getStartLoc(),
"Octal numeric literals must be integers")) {
val = std::numeric_limits<double>::quiet_NaN();
goto done;
}
}
}
}
// We need a zero-terminated buffer for hermes_g_strtod().
llvh::SmallString<32> buf;
buf.reserve(curCharPtr_ - start + 1);
if (LLVM_UNLIKELY(seenSeparator)) {
for (const char *it = start; it != curCharPtr_; ++it) {
if (LLVM_LIKELY(*it != '_')) {
buf.push_back(*it);
} else {
// Check to ensure that '_' is surrounded by digits.
// This is safe because the source buffer is zero-terminated and
// we know that the numeric literal didn't start with '_'.
// Note that we could have a 0b_11 literal, but we'd still fail
// properly because of the radix==16 check.
char prev = *(it - 1);
char next = *(it + 1);
if (!isdigit(prev) &&
!(radix == 16 && 'a' <= (prev | 32) && (prev | 32) <= 'f')) {
errorRange(
token_.getStartLoc(),
"numeric separator must come after a digit");
} else if (
!isdigit(next) &&
!(radix == 16 && 'a' <= (next | 32) && (next | 32) <= 'f')) {
errorRange(
token_.getStartLoc(),
"numeric separator must come before a digit");
}
}
}
} else {
buf.append(start, curCharPtr_);
}
buf.push_back(0);
char *endPtr;
val = ::hermes_g_strtod(buf.data(), &endPtr);
if (endPtr != &buf.back()) {
errorRange(token_.getStartLoc(), "invalid numeric literal");
val = std::numeric_limits<double>::quiet_NaN();
}
} else {
if (legacyOctal &&
(strictMode_ || grammarContext == GrammarContext::Type) &&
curCharPtr_ - start > 1) {
if (!errorRange(
token_.getStartLoc(),
"Octal literals must use '0o' in strict mode")) {
val = std::numeric_limits<double>::quiet_NaN();
goto done;
}
}
// Handle the zero-radix case. This could only happen with radix 16
// because otherwise start wouldn't have been changed.
if (curCharPtr_ == start) {
errorRange(
token_.getStartLoc(),
llvh::Twine("No digits after ") + StringRef(start - 2, 2));
val = std::numeric_limits<double>::quiet_NaN();
} else {
// Parse the rest of the number:
if (legacyOctal) {
updateLegacyOctalRadix();
// LegacyOctalLikeDecimalIntegerLiteral cannot contain separators.
if (LLVM_UNLIKELY(seenSeparator)) {
errorRange(
token_.getStartLoc(),
"Numeric separator cannot be used in literal after leading 0");
}
}
auto parsedInt = parseIntWithRadix</* AllowNumericSeparator */ true>(
llvh::ArrayRef<char>{start, (size_t)(curCharPtr_ - start)}, radix);
if (!parsedInt) {
errorRange(token_.getStartLoc(), "invalid integer literal");
val = std::numeric_limits<double>::quiet_NaN();
} else {
val = parsedInt.getValue();
}
}
}
done:
token_.setNumericLiteral(val);
}