in cpp-ch/local-engine/Storages/Serializations/ExcelNumberReader.h [335:507]
bool readExcelIntTextImpl(T & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
bool number_force = settings.try_infer_integers==1;
const UInt8 MAX_HEAD_SKIP = 2;
const UInt8 MAX_TAIL_SKIP = 2;
UInt8 head_skip=0;
UInt8 tail_skip=0;
using UnsignedT = make_unsigned_t<T>;
bool negative = false;
UnsignedT res{};
if (buf.eof())
return false;
/// '+' or '-'
bool has_sign = false;
bool has_number = false;
UInt32 length = 0;
while (!buf.eof())
{
if (*buf.position() == '+')
{
/// 123+ or +123+, just stop after 123 or +123.
if (has_number)
break;
/// No digits read yet, but we already read sign, like ++, -+.
if (has_sign)
return false;
has_sign = true;
++buf.position();
}
else if (*buf.position() == '-')
{
if (has_number)
break;
if (has_sign)
return false;
if constexpr (is_signed_v<T>)
negative = true;
else
return false;
has_sign = true;
++buf.position();
}
else if (*buf.position() == ',')
{
/// invalidate like 1,00010
if (checkNumberComma(buf, has_quote, settings))
{
++buf.position();
continue;
}
else
break;
}
else if (*buf.position() == '.')
{
++buf.position();
if (has_number)
{
while (!buf.eof())
{
if (!(*buf.position() >= '0' && *buf.position() <= '9'))
{
if (number_force)
break;
else
return false;
}
else
{
++buf.position();
}
}
}
else
return false;
}
else if (*buf.position() >= '0' && *buf.position() <= '9')
{
has_number = true;
++length;
if (length >= std::numeric_limits<T>::max_digits10)
{
if (negative)
{
T signed_res = -res;
if (common::mulOverflow<T>(signed_res, 10, signed_res)
|| common::subOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
return false;
res = -static_cast<UnsignedT>(signed_res);
}
else
{
T signed_res = res;
if (common::mulOverflow<T>(signed_res, 10, signed_res)
|| common::addOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
return false;
res = signed_res;
}
}
else
{
res *= 10;
res += *buf.position() - '0';
}
++buf.position();
}
else if (!has_number && !has_sign && checkMoneySymbol(buf))
{
continue;
}
else if (has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process suffix
{
while (!buf.eof())
{
if(*buf.position() == settings.csv.delimiter ||*buf.position() == '\'' ||*buf.position() == '\"'
|| *buf.position() == '\n' || *buf.position() == '\r')
{
break;
}
if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
{
tail_skip++;
if (tail_skip>MAX_TAIL_SKIP)
return false;
}
++buf.position();
}
break;
}
else if (!has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process prefix
{
if(*buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r')
{
break;
}
if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
{
head_skip++;
if (head_skip>MAX_HEAD_SKIP)
return false;
}
++buf.position();
}
else
break;
}
if (!has_number)
return false;
x = res;
if constexpr (is_signed_v<T>)
{
if (negative)
{
x = -res;
}
}
return true;
}