static inline DecimalValue StringToDecimal()

in be/src/util/string-parser.h [135:285]


  static inline DecimalValue<T> StringToDecimal(const char* s, int len,
      int type_precision, int type_scale, bool round, StringParser::ParseResult* result) {
    // Special cases:
    //   1) '' == Fail, an empty string fails to parse.
    //   2) '   #   ' == #, leading and trailing white space is ignored.
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
    //   4) '#.' == '#', a trailing dot is ignored.

    // Ignore leading and trailing spaces.
    while (len > 0 && IsWhitespace(*s)) {
      ++s;
      --len;
    }
    while (len > 0 && IsWhitespace(s[len - 1])) {
      --len;
    }

    bool is_negative = false;
    if (len > 0) {
      switch (*s) {
        case '-':
          is_negative = true;
          [[fallthrough]];
        case '+':
          ++s;
          --len;
      }
    }

    // Ignore leading zeros.
    bool found_value = false;
    while (len > 0 && UNLIKELY(*s == '0')) {
      found_value = true;
      ++s;
      --len;
    }

    // Ignore leading zeros even after a dot. This allows for differentiating between
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
    // overflow.
    int digits_after_dot_count = 0;
    int found_dot = 0;
    if (len > 0 && *s == '.') {
      found_dot = 1;
      ++s;
      --len;
      while (len > 0 && UNLIKELY(*s == '0')) {
        found_value = true;
        ++digits_after_dot_count;
        ++s;
        --len;
      }
    }

    int total_digits_count = 0;
    bool found_exponent = false;
    int8_t exponent = 0;
    int first_truncated_digit = 0;
    T value = 0;
    for (int i = 0; i < len; ++i) {
      const char c = s[i];
      if (LIKELY('0' <= c && c <= '9')) {
        found_value = true;
        // Ignore digits once the type's precision limit is reached. This avoids
        // overflowing the underlying storage while handling a string like
        // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
        // an exponent will be made later.
        if (LIKELY(total_digits_count < type_precision)) {
          // Benchmarks are faster with parenthesis.
          T new_value = (value * 10) + (c - '0');
          DCHECK(new_value >= value);
          value = new_value;
        } else if (UNLIKELY(round && total_digits_count == type_precision)) {
          first_truncated_digit = c - '0';
        }
        DCHECK(value >= 0); // DCHECK_GE does not work with int128_t
        ++total_digits_count;
        digits_after_dot_count += found_dot;
      } else if (c == '.' && LIKELY(!found_dot)) {
        found_dot = 1;
      } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
        found_exponent = true;
        exponent = StringToIntInternal<int8_t>(s + i + 1, len - i - 1, result);
        if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
          if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
            *result = StringParser::PARSE_UNDERFLOW;
          }
          return DecimalValue<T>(0);
        }
        break;
      } else {
        *result = StringParser::PARSE_FAILURE;
        return DecimalValue<T>(0);
      }
    }

    // Find the number of truncated digits before adjusting the precision for an exponent.
    int truncated_digit_count = std::max(total_digits_count - type_precision, 0);
    // 'scale' and 'precision' refer to the scale and precision of the number that
    // is contained the string that we are parsing. The scale of 'value' may be
    // different because some digits may have been truncated.
    int scale, precision;
    ApplyExponent(total_digits_count, digits_after_dot_count,
        exponent, &value, &precision, &scale);

    // Microbenchmarks show that beyond this point, returning on parse failure is slower
    // than just letting the function run out.
    *result = StringParser::PARSE_SUCCESS;
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
      // The number in the string has too many digits to the left of the dot,
      // so we overflow.
      *result = StringParser::PARSE_OVERFLOW;
    } else if (UNLIKELY(scale > type_scale)) {
      // There are too many digits to the right of the dot in the string we are parsing.
      *result = StringParser::PARSE_UNDERFLOW;
      // The scale of 'value'.
      int value_scale = scale - truncated_digit_count;
      int shift = value_scale - type_scale;
      if (shift > 0) {
        // There are less than maximum number of digits to the left of the dot.
        value = DecimalUtil::ScaleDownAndRound<T>(value, shift, round);
        DCHECK(value >= 0);
        DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
      } else {
        // There are a maximum number of digits to the left of the dot. We round by
        // looking at the first truncated digit.
        DCHECK_EQ(shift, 0);
        DCHECK(0 <= first_truncated_digit && first_truncated_digit <= 9);
        DCHECK(first_truncated_digit == 0 || truncated_digit_count != 0);
        DCHECK(first_truncated_digit == 0 || round);
        // Apply the rounding.
        value += (first_truncated_digit >= 5);
        DCHECK(value >= 0);
        DCHECK(value <= DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
        if (UNLIKELY(value == DecimalUtil::GetScaleMultiplier<T>(type_precision))) {
          // Overflow due to rounding.
          *result = StringParser::PARSE_OVERFLOW;
        }
      }
    } else if (UNLIKELY(!found_value && !found_dot)) {
      *result = StringParser::PARSE_FAILURE;
    } else if (type_scale > scale) {
      // There were not enough digits after the dot, so we have scale up the value.
      DCHECK_EQ(truncated_digit_count, 0);
      value *= DecimalUtil::GetScaleMultiplier<T>(type_scale - scale);
      // Overflow should be impossible.
      DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
    }

    return DecimalValue<T>(is_negative ? -value : value);
  }