OptValue parseIntWithRadix()

in include/hermes/Support/Conversions.h [133:276]


OptValue<double> parseIntWithRadix(Iterable str, int radix) {
  assert(
      radix >= 2 && radix <= 36 && "Invalid radix passed to parseIntWithRadix");

  assert(str.begin() != str.end() && "Empty string");
  double result = 0;
  for (auto it = str.begin(); it != str.end(); ++it) {
    auto c = *it;
    auto cLow = charLetterToLower(c);
    if ('0' <= c && c <= '9' && c < '0' + radix) {
      result *= radix;
      result += c - '0';
    } else if ('a' <= cLow && cLow < 'a' + radix - 10) {
      result *= radix;
      result += cLow - 'a' + 0xa;
    } else if (AllowNumericSeparator && LLVM_UNLIKELY(c == '_')) {
      // Ensure the '_' is in a valid location.
      // It can only be between two existing digits.
      if (it == str.begin() || it == str.end() - 1) {
        return llvh::None;
      }
      // Note that the previous character must not be '_' if the current
      // character is '_', because we would have returned None.
      // So just check if the next character is '_'.
      char next = *(it + 1);
      if (next == '_') {
        return llvh::None;
      }
    } else {
      return llvh::None;
    }
  }

  // The largest value that fits in the 53-bit mantissa (2**53).
  const double MAX_MANTISSA = 9007199254740992.0;
  if (result >= MAX_MANTISSA && llvh::isPowerOf2_32(radix)) {
    // If the result is too high, manually reconstruct the double if
    // the radix is 2, 4, 8, 16, 32.
    // Go through the digits bit by bit, and manually round when necessary.
    result = 0;

    // Keep track of how far along parsing is using this enum.
    enum Mode {
      LEADING_ZERO, // Haven't seen a set bit yet.
      MANTISSA, // Lower bits that allow exact representation.
      EXP_LOW_BIT, // Lowest bit of the exponent (determine rounding).
      EXP_LEADING_ZERO, // Zeros in the exponent.
      EXPONENT, // Seen a set bit in the exponent.
    };

    size_t remainingMantissa = 53;
    double expFactor = 0.0;
    size_t curDigit = 0;

    bool lastMantissaBit = false;
    bool lowestExponentBit = false;

    Mode curMode = Mode::LEADING_ZERO;
    auto itr = str.begin();
    auto e = str.end();
    for (size_t bitMask = 0;;) {
      if (bitMask == 0) {
        // Only need to do this check every log2(radix) iterations.
        if (itr == e) {
          break;
        }
        // We know it fits in 7 bits after the first pass.
        char c = (char)*itr;
        if (AllowNumericSeparator && LLVM_UNLIKELY(c == '_')) {
          ++itr;
          continue;
        }
        auto cLow = charLetterToLower(c);
        if ('0' <= c && c <= '9') {
          curDigit = c - '0';
        } else {
          // Must be valid, else we would have returned NaN on first pass.
          assert('a' <= cLow && cLow < 'a' + radix - 10);
          curDigit = cLow - 'a' + 0xa;
        }
        ++itr;
        // Reset bitmask to look at the first bit.
        bitMask = radix >> 1;
      }
      bool curBit = (curDigit & bitMask) != 0;
      bitMask >>= 1;

      switch (curMode) {
        case Mode::LEADING_ZERO:
          // Go through the string until we hit a nonzero bit.
          if (curBit) {
            --remainingMantissa;
            result = 1;
            // No more leading zeros.
            curMode = Mode::MANTISSA;
          }
          break;
        case Mode::MANTISSA:
          // Read into the lower bits of the mantissa (plain binary).
          result *= 2;
          result += curBit;
          --remainingMantissa;
          if (remainingMantissa == 0) {
            // Out of bits, set the last bit and go to the next curMode.
            lastMantissaBit = curBit;
            curMode = Mode::EXP_LOW_BIT;
          }
          break;
        case Mode::EXP_LOW_BIT:
          lowestExponentBit = curBit;
          expFactor = 2.0;
          curMode = Mode::EXP_LEADING_ZERO;
          break;
        case Mode::EXP_LEADING_ZERO:
          if (curBit) {
            curMode = Mode::EXPONENT;
          }
          expFactor *= 2.0;
          break;
        case Mode::EXPONENT:
          expFactor *= 2.0;
          break;
      }
    }
    switch (curMode) {
      case Mode::LEADING_ZERO:
      case Mode::MANTISSA:
      case Mode::EXP_LOW_BIT:
        // Nothing to do here, already read those in.
        break;
      case Mode::EXP_LEADING_ZERO:
        // Rounding up.
        result += lowestExponentBit && lastMantissaBit;
        result *= expFactor;
        break;
      case Mode::EXPONENT:
        // Rounding up.
        result += lowestExponentBit;
        result *= expFactor;
        break;
    }
  }
  return result;
}