in be/src/util/string-parser.h [135:285]
static inline DecimalValue<T> StringToDecimal(const char* s, int len,
int type_precision, int type_scale, bool round, StringParser::ParseResult* result) {
// Special cases:
// 1) '' == Fail, an empty string fails to parse.
// 2) ' # ' == #, leading and trailing white space is ignored.
// 3) '.' == 0, a single dot parses as zero (for consistency with other types).
// 4) '#.' == '#', a trailing dot is ignored.
// Ignore leading and trailing spaces.
while (len > 0 && IsWhitespace(*s)) {
++s;
--len;
}
while (len > 0 && IsWhitespace(s[len - 1])) {
--len;
}
bool is_negative = false;
if (len > 0) {
switch (*s) {
case '-':
is_negative = true;
[[fallthrough]];
case '+':
++s;
--len;
}
}
// Ignore leading zeros.
bool found_value = false;
while (len > 0 && UNLIKELY(*s == '0')) {
found_value = true;
++s;
--len;
}
// Ignore leading zeros even after a dot. This allows for differentiating between
// cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
// overflow.
int digits_after_dot_count = 0;
int found_dot = 0;
if (len > 0 && *s == '.') {
found_dot = 1;
++s;
--len;
while (len > 0 && UNLIKELY(*s == '0')) {
found_value = true;
++digits_after_dot_count;
++s;
--len;
}
}
int total_digits_count = 0;
bool found_exponent = false;
int8_t exponent = 0;
int first_truncated_digit = 0;
T value = 0;
for (int i = 0; i < len; ++i) {
const char c = s[i];
if (LIKELY('0' <= c && c <= '9')) {
found_value = true;
// Ignore digits once the type's precision limit is reached. This avoids
// overflowing the underlying storage while handling a string like
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
// an exponent will be made later.
if (LIKELY(total_digits_count < type_precision)) {
// Benchmarks are faster with parenthesis.
T new_value = (value * 10) + (c - '0');
DCHECK(new_value >= value);
value = new_value;
} else if (UNLIKELY(round && total_digits_count == type_precision)) {
first_truncated_digit = c - '0';
}
DCHECK(value >= 0); // DCHECK_GE does not work with int128_t
++total_digits_count;
digits_after_dot_count += found_dot;
} else if (c == '.' && LIKELY(!found_dot)) {
found_dot = 1;
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
found_exponent = true;
exponent = StringToIntInternal<int8_t>(s + i + 1, len - i - 1, result);
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
*result = StringParser::PARSE_UNDERFLOW;
}
return DecimalValue<T>(0);
}
break;
} else {
*result = StringParser::PARSE_FAILURE;
return DecimalValue<T>(0);
}
}
// Find the number of truncated digits before adjusting the precision for an exponent.
int truncated_digit_count = std::max(total_digits_count - type_precision, 0);
// 'scale' and 'precision' refer to the scale and precision of the number that
// is contained the string that we are parsing. The scale of 'value' may be
// different because some digits may have been truncated.
int scale, precision;
ApplyExponent(total_digits_count, digits_after_dot_count,
exponent, &value, &precision, &scale);
// Microbenchmarks show that beyond this point, returning on parse failure is slower
// than just letting the function run out.
*result = StringParser::PARSE_SUCCESS;
if (UNLIKELY(precision - scale > type_precision - type_scale)) {
// The number in the string has too many digits to the left of the dot,
// so we overflow.
*result = StringParser::PARSE_OVERFLOW;
} else if (UNLIKELY(scale > type_scale)) {
// There are too many digits to the right of the dot in the string we are parsing.
*result = StringParser::PARSE_UNDERFLOW;
// The scale of 'value'.
int value_scale = scale - truncated_digit_count;
int shift = value_scale - type_scale;
if (shift > 0) {
// There are less than maximum number of digits to the left of the dot.
value = DecimalUtil::ScaleDownAndRound<T>(value, shift, round);
DCHECK(value >= 0);
DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
} else {
// There are a maximum number of digits to the left of the dot. We round by
// looking at the first truncated digit.
DCHECK_EQ(shift, 0);
DCHECK(0 <= first_truncated_digit && first_truncated_digit <= 9);
DCHECK(first_truncated_digit == 0 || truncated_digit_count != 0);
DCHECK(first_truncated_digit == 0 || round);
// Apply the rounding.
value += (first_truncated_digit >= 5);
DCHECK(value >= 0);
DCHECK(value <= DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
if (UNLIKELY(value == DecimalUtil::GetScaleMultiplier<T>(type_precision))) {
// Overflow due to rounding.
*result = StringParser::PARSE_OVERFLOW;
}
}
} else if (UNLIKELY(!found_value && !found_dot)) {
*result = StringParser::PARSE_FAILURE;
} else if (type_scale > scale) {
// There were not enough digits after the dot, so we have scale up the value.
DCHECK_EQ(truncated_digit_count, 0);
value *= DecimalUtil::GetScaleMultiplier<T>(type_scale - scale);
// Overflow should be impossible.
DCHECK(value < DecimalUtil::GetScaleMultiplier<int128_t>(type_precision));
}
return DecimalValue<T>(is_negative ? -value : value);
}