fastfloat_really_inline parsed_number_string parse_number

fastfloat_really_inline parsed_number_string parse_number_string()

in include/ylt/standalone/iguana/detail/fast_float.h [2440:2600]
136 lines of code
44 McCabe index (conditional complexity)

fastfloat_really_inline parsed_number_string parse_number_string(
    const char* p, const char* pend, parse_options options) noexcept {
  const chars_format fmt = options.format;
  const char decimal_point = options.decimal_point;

  parsed_number_string answer;
  answer.valid = false;
  answer.too_many_digits = false;
  answer.negative = (*p == '-');
  if (*p == '-') {  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
    ++p;
    if (p == pend) {
      return answer;
    }
    if (!is_integer(*p) &&
        (*p !=
         decimal_point)) {  // a sign must be followed by an integer or the dot
      return answer;
    }
  }
  const char* const start_digits = p;

  uint64_t i = 0;  // an unsigned int avoids signed overflows (which are bad)

  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
    i = i * 100000000 +
        parse_eight_digits_unrolled(
            p);  // in rare cases, this will overflow, but that's ok
    p += 8;
  }
  while ((p != pend) && is_integer(*p)) {
    // a multiplication by 10 is cheaper than an arbitrary integer
    // multiplication
    i = 10 * i +
        uint64_t(*p -
                 '0');  // might overflow, we will handle the overflow later
    ++p;
  }
  const char* const end_of_integer_part = p;
  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
  answer.integer = byte_span(start_digits, size_t(digit_count));
  int64_t exponent = 0;
  if ((p != pend) && (*p == decimal_point)) {
    ++p;
    const char* before = p;
    // can occur at most twice without overflowing, but let it occur more, since
    // for integers with many digits, digit parsing is the primary bottleneck.
    while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
      i = i * 100000000 +
          parse_eight_digits_unrolled(
              p);  // in rare cases, this will overflow, but that's ok
      p += 8;
    }
    while ((p != pend) && is_integer(*p)) {
      uint8_t digit = uint8_t(*p - '0');
      ++p;
      i = i * 10 + digit;  // in rare cases, this will overflow, but that's ok
    }
    exponent = before - p;
    answer.fraction = byte_span(before, size_t(p - before));
    digit_count -= exponent;
  }
  // we must have encountered at least one integer!
  if (digit_count == 0) {
    return answer;
  }
  int64_t exp_number = 0;  // explicit exponential part
  if ((fmt & chars_format::scientific) && (p != pend) &&
      (('e' == *p) || ('E' == *p))) {
    const char* location_of_e = p;
    ++p;
    bool neg_exp = false;
    if ((p != pend) && ('-' == *p)) {
      neg_exp = true;
      ++p;
    }
    else if ((p != pend) &&
             ('+' ==
              *p)) {  // '+' on exponent is allowed by C++17 20.19.3.(7.1)
      ++p;
    }
    if ((p == pend) || !is_integer(*p)) {
      if (!(fmt & chars_format::fixed)) {
        // We are in error.
        return answer;
      }
      // Otherwise, we will be ignoring the 'e'.
      p = location_of_e;
    }
    else {
      while ((p != pend) && is_integer(*p)) {
        uint8_t digit = uint8_t(*p - '0');
        if (exp_number < 0x10000000) {
          exp_number = 10 * exp_number + digit;
        }
        ++p;
      }
      if (neg_exp) {
        exp_number = -exp_number;
      }
      exponent += exp_number;
    }
  }
  else {
    // If it scientific and not fixed, we have to bail out.
    if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) {
      return answer;
    }
  }
  answer.lastmatch = p;
  answer.valid = true;

  // If we frequently had to deal with long strings of digits,
  // we could extend our code by using a 128-bit integer instead
  // of a 64-bit integer. However, this is uncommon.
  //
  // We can deal with up to 19 digits.
  if (digit_count > 19) {  // this is uncommon
    // It is possible that the integer had an overflow.
    // We have to handle the case where we have 0.0000somenumber.
    // We need to be mindful of the case where we only have zeroes...
    // E.g., 0.000000000...000.
    const char* start = start_digits;
    while ((start != pend) && (*start == '0' || *start == decimal_point)) {
      if (*start == '0') {
        digit_count--;
      }
      start++;
    }
    if (digit_count > 19) {
      answer.too_many_digits = true;
      // Let us start again, this time, avoiding overflows.
      // We don't need to check if is_integer, since we use the
      // pre-tokenized spans from above.
      i = 0;
      p = answer.integer.ptr;
      const char* int_end = p + answer.integer.len();
      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
        i = i * 10 + uint64_t(*p - '0');
        ++p;
      }
      if (i >= minimal_nineteen_digit_integer) {  // We have a big integers
        exponent = end_of_integer_part - p + exp_number;
      }
      else {  // We have a value with a fractional component.
        p = answer.fraction.ptr;
        const char* frac_end = p + answer.fraction.len();
        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
          i = i * 10 + uint64_t(*p - '0');
          ++p;
        }
        exponent = answer.fraction.ptr - p + exp_number;
      }
      // We have now corrected both exponent and i, to a truncated value
    }
  }
  answer.exponent = exponent;
  answer.mantissa = i;
  return answer;
}