bool readExcelIntTextImpl()

in cpp-ch/local-engine/Storages/Serializations/ExcelNumberReader.h [335:507]


bool readExcelIntTextImpl(T & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
    bool number_force = settings.try_infer_integers==1;
    const UInt8 MAX_HEAD_SKIP = 2;
    const UInt8 MAX_TAIL_SKIP = 2;
    UInt8 head_skip=0;
    UInt8 tail_skip=0;
    
    using UnsignedT = make_unsigned_t<T>;

    bool negative = false;
    UnsignedT res{};
    if (buf.eof())
        return false;

    /// '+' or '-'
    bool has_sign = false;
    bool has_number = false;
    UInt32 length = 0;
    while (!buf.eof())
    {
        if (*buf.position() == '+')
        {
            /// 123+ or +123+, just stop after 123 or +123.
            if (has_number)
                break;

            /// No digits read yet, but we already read sign, like ++, -+.
            if (has_sign)
                return false;

            has_sign = true;
            ++buf.position();
        }
        else if (*buf.position() == '-')
        {
            if (has_number)
                break;

            if (has_sign)
                return false;

            if constexpr (is_signed_v<T>)
                negative = true;
            else
                return false;

            has_sign = true;
            ++buf.position();
        }
        else if (*buf.position() == ',')
        {
            /// invalidate like 1,00010
            if (checkNumberComma(buf, has_quote, settings))
            {
                ++buf.position();
                continue;
            }
            else
                break;
        }
        else if (*buf.position() == '.')
        {
            ++buf.position();
            if (has_number)
            {
                while (!buf.eof())
                {
                    if (!(*buf.position() >= '0' && *buf.position() <= '9'))
                    {
                        if (number_force)
                            break;
                        else
                            return false;
                    }
                    else
                    {
                        ++buf.position();
                    }
                }
            }
            else
                return false;
        }
        else if (*buf.position() >= '0' && *buf.position() <= '9')
        {
            has_number = true;
            ++length;
            if (length >= std::numeric_limits<T>::max_digits10)
            {
                if (negative)
                {
                    T signed_res = -res;
                    if (common::mulOverflow<T>(signed_res, 10, signed_res)
                        || common::subOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
                        return false;

                    res = -static_cast<UnsignedT>(signed_res);
                }
                else
                {
                    T signed_res = res;
                    if (common::mulOverflow<T>(signed_res, 10, signed_res)
                        || common::addOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
                        return false;

                    res = signed_res;
                }
            }
            else
            {
                res *= 10;
                res += *buf.position() - '0';
            }

            ++buf.position();
        }
        else if (!has_number && !has_sign && checkMoneySymbol(buf))
        {
            continue;
        }
        else if (has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process suffix
        {
            while (!buf.eof())
            {
                if(*buf.position() == settings.csv.delimiter ||*buf.position() == '\'' ||*buf.position() == '\"'
                    || *buf.position() == '\n' || *buf.position() == '\r')
                {
                    break;
                }
                if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
                {
                    tail_skip++;
                    if (tail_skip>MAX_TAIL_SKIP)
                        return false;
                }
                ++buf.position();
            }
            break;
        }
        else if (!has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process prefix
        {
            if(*buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r')
            {
                break;
            }

            if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
            {
                head_skip++;
                if (head_skip>MAX_HEAD_SKIP)
                    return false;
            }
            ++buf.position();
        }
        else
            break;
    }

    if (!has_number)
        return false;

    x = res;
    if constexpr (is_signed_v<T>)
    {
        if (negative)
        {
            x = -res;
        }
    }

    return true;
}