void JSLexer::scanNumber()

in lib/Parser/JSLexer.cpp [1359:1620]


void JSLexer::scanNumber(GrammarContext grammarContext) {
  // A somewhat ugly state machine for scanning a number

  unsigned radix = 10;
  bool real = false;
  bool ok = true;
  const char *rawStart = curCharPtr_;
  const char *start = curCharPtr_;

  // True when we encounter the numeric literal separator: '_'.
  bool seenSeparator = false;

  // True when we encounter a legacy octal number (starts with '0').
  bool legacyOctal = false;

  // Detect the radix
  if (*curCharPtr_ == '0') {
    if ((curCharPtr_[1] | 32) == 'x') {
      radix = 16;
      curCharPtr_ += 2;
      start += 2;
    } else if ((curCharPtr_[1] | 32) == 'o') {
      radix = 8;
      curCharPtr_ += 2;
      start += 2;
    } else if ((curCharPtr_[1] | 32) == 'b') {
      radix = 2;
      curCharPtr_ += 2;
      start += 2;
    } else if (curCharPtr_[1] == '.') {
      curCharPtr_ += 2;
      goto fraction;
    } else if ((curCharPtr_[1] | 32) == 'e') {
      curCharPtr_ += 2;
      goto exponent;
    } else {
      radix = 8;
      legacyOctal = true;
      ++curCharPtr_;
    }
  }

  while (isdigit(*curCharPtr_) ||
         (radix == 16 && (*curCharPtr_ | 32) >= 'a' &&
          (*curCharPtr_ | 32) <= 'f') ||
         (*curCharPtr_ == '_')) {
    seenSeparator |= *curCharPtr_ == '_';
    ++curCharPtr_;
  }

  if (radix == 10 || legacyOctal) {
    // It is not necessarily an integer.
    // We could have interpreted as legacyOctal initially but will have to
    // change to decimal later.
    if (*curCharPtr_ == '.') {
      ++curCharPtr_;
      goto fraction;
    }

    if ((*curCharPtr_ | 32) == 'e') {
      ++curCharPtr_;
      goto exponent;
    }
  }

  goto end;

fraction:
  // We arrive here after we have consumed the decimal dot ".".
  //
  real = true;
  while (isdigit(*curCharPtr_) || *curCharPtr_ == '_') {
    seenSeparator |= *curCharPtr_ == '_';
    ++curCharPtr_;
  }

  if ((*curCharPtr_ | 32) == 'e') {
    ++curCharPtr_;
    goto exponent;
  } else {
    goto end;
  }

exponent:
  // We arrive here after we have consumed the exponent character 'e' or 'E'.
  //
  real = true;
  if (*curCharPtr_ == '+' || *curCharPtr_ == '-')
    ++curCharPtr_;
  if (isdigit(*curCharPtr_)) {
    do {
      seenSeparator |= *curCharPtr_ == '_';
      ++curCharPtr_;
    } while (isdigit(*curCharPtr_) || *curCharPtr_ == '_');
  } else {
    ok = false;
  }

end:
  // We arrive here after we have consumed all we can from the number. Now,
  // as per the spec, we consume a sequence of identifier characters if they
  // follow directly, which means the number is invalid if it's not BigInt.
  if (consumeIdentifierStart()) {
    consumeIdentifierParts<IdentifierMode::JS>();

    llvh::StringRef raw{rawStart, (size_t)(curCharPtr_ - rawStart)};
    if (ok && !real && (!legacyOctal || raw == "0n") && tmpStorage_ == "n") {
      // This is a BigInt.
      rawStorage_.clear();
      rawStorage_.append(raw);
      token_.setBigIntLiteral(getStringLiteral(rawStorage_));
      return;
    }

    ok = false;
  }

  double val;

  /// ES6.0 B.1.1
  /// If we encounter a "legacy" octal number (starting with a '0') but if
  /// the integer contains '8' or '9' we interpret it as decimal.
  const auto updateLegacyOctalRadix =
      [this, &radix, start, &legacyOctal]() -> void {
    assert(
        legacyOctal &&
        "updateLegacyOctalRadix can only be called in legacyOctal mode");
    (void)legacyOctal;
    for (auto *scanPtr = start; scanPtr != curCharPtr_; ++scanPtr) {
      if (*scanPtr == '.' || *scanPtr == 'e') {
        break;
      }
      if (LLVM_UNLIKELY(*scanPtr >= '8') && LLVM_LIKELY(*scanPtr != '_')) {
        sm_.warning(
            SMRange(token_.getStartLoc(), SMLoc::getFromPointer(curCharPtr_)),
            "Numeric literal starts with 0 but contains an 8 or 9 digit. "
            "Interpreting as decimal (not octal).");
        radix = 10;
        break;
      }
    }
  };

  if (!ok) {
    errorRange(token_.getStartLoc(), "invalid numeric literal");
    val = std::numeric_limits<double>::quiet_NaN();
  } else if (
      !real && radix == 10 && curCharPtr_ - start <= 9 &&
      LLVM_LIKELY(!seenSeparator)) {
    // If this is a decimal integer of at most 9 digits (log10(2**31-1), it
    // can fit in a 32-bit integer. Use a faster conversion.
    int32_t ival = *start - '0';
    while (++start != curCharPtr_)
      ival = ival * 10 + (*start - '0');
    val = ival;
  } else if (real || radix == 10) {
    if (legacyOctal) {
      if (strictMode_ || grammarContext == GrammarContext::Type) {
        if (!errorRange(
                token_.getStartLoc(),
                "Decimals with leading zeros are not allowed in strict mode")) {
          val = std::numeric_limits<double>::quiet_NaN();
          goto done;
        }
      } else {
        // Check to see if we can actually scan this as radix 10.
        // Non-integer numbers must be in base 10, otherwise we error.
        updateLegacyOctalRadix();
        if (LLVM_LIKELY(radix != 10)) {
          if (!errorRange(
                  token_.getStartLoc(),
                  "Octal numeric literals must be integers")) {
            val = std::numeric_limits<double>::quiet_NaN();
            goto done;
          }
        }
      }
    }

    // We need a zero-terminated buffer for hermes_g_strtod().
    llvh::SmallString<32> buf;
    buf.reserve(curCharPtr_ - start + 1);
    if (LLVM_UNLIKELY(seenSeparator)) {
      for (const char *it = start; it != curCharPtr_; ++it) {
        if (LLVM_LIKELY(*it != '_')) {
          buf.push_back(*it);
        } else {
          // Check to ensure that '_' is surrounded by digits.
          // This is safe because the source buffer is zero-terminated and
          // we know that the numeric literal didn't start with '_'.
          // Note that we could have a 0b_11 literal, but we'd still fail
          // properly because of the radix==16 check.
          char prev = *(it - 1);
          char next = *(it + 1);
          if (!isdigit(prev) &&
              !(radix == 16 && 'a' <= (prev | 32) && (prev | 32) <= 'f')) {
            errorRange(
                token_.getStartLoc(),
                "numeric separator must come after a digit");
          } else if (
              !isdigit(next) &&
              !(radix == 16 && 'a' <= (next | 32) && (next | 32) <= 'f')) {
            errorRange(
                token_.getStartLoc(),
                "numeric separator must come before a digit");
          }
        }
      }
    } else {
      buf.append(start, curCharPtr_);
    }
    buf.push_back(0);
    char *endPtr;
    val = ::hermes_g_strtod(buf.data(), &endPtr);
    if (endPtr != &buf.back()) {
      errorRange(token_.getStartLoc(), "invalid numeric literal");
      val = std::numeric_limits<double>::quiet_NaN();
    }
  } else {
    if (legacyOctal &&
        (strictMode_ || grammarContext == GrammarContext::Type) &&
        curCharPtr_ - start > 1) {
      if (!errorRange(
              token_.getStartLoc(),
              "Octal literals must use '0o' in strict mode")) {
        val = std::numeric_limits<double>::quiet_NaN();
        goto done;
      }
    }

    // Handle the zero-radix case. This could only happen with radix 16
    // because otherwise start wouldn't have been changed.
    if (curCharPtr_ == start) {
      errorRange(
          token_.getStartLoc(),
          llvh::Twine("No digits after ") + StringRef(start - 2, 2));
      val = std::numeric_limits<double>::quiet_NaN();
    } else {
      // Parse the rest of the number:
      if (legacyOctal) {
        updateLegacyOctalRadix();
        // LegacyOctalLikeDecimalIntegerLiteral cannot contain separators.
        if (LLVM_UNLIKELY(seenSeparator)) {
          errorRange(
              token_.getStartLoc(),
              "Numeric separator cannot be used in literal after leading 0");
        }
      }
      auto parsedInt = parseIntWithRadix</* AllowNumericSeparator */ true>(
          llvh::ArrayRef<char>{start, (size_t)(curCharPtr_ - start)}, radix);
      if (!parsedInt) {
        errorRange(token_.getStartLoc(), "invalid integer literal");
        val = std::numeric_limits<double>::quiet_NaN();
      } else {
        val = parsedInt.getValue();
      }
    }
  }

done:
  token_.setNumericLiteral(val);
}