/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#pragma once

#include <IO/readFloatText.h>

namespace DB
{
namespace ErrorCodes
{
    extern const int CANNOT_PARSE_NUMBER;
}
}

namespace local_engine
{

static String zh_cn_symbol = "¥"; // std::use_facet<std::moneypunct<char>>(std::locale("zh_cn.utf8")).curr_symbol();
static String en_us_symbol = "$"; // std::use_facet<std::moneypunct<char>>(std::locale("en_US.utf8")).curr_symbol();


inline bool checkMoneySymbol(DB::ReadBuffer & buf, String & symbol)
{
    auto pr = static_cast<DB::PeekableReadBuffer>(buf);
    DB::PeekableReadBufferCheckpoint checkpoint{pr, false};

    std::string::iterator it = symbol.begin();
    while (it != symbol.end())
    {
        if (pr.eof() || *pr.position() != *it)
        {
            pr.rollbackToCheckpoint();
            return false;
        }

        pr.position()++;
        it++;
    }

    return true;
}

inline bool checkMoneySymbol(DB::ReadBuffer & buf)
{
    return checkMoneySymbol(buf, zh_cn_symbol) || checkMoneySymbol(buf, en_us_symbol);
}

inline bool checkNumberComma(DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
    if (!has_quote && settings.csv.delimiter == ',')
        return false;

    /// if has ',', next ',' must has 3 digits, eg:  1,000 / 11,000,000
    /// error is 11,00 / 1,0
    if ((buf.position() + 4 <= buf.buffer().end() && isNumericASCII(buf.position()[1]) && isNumericASCII(buf.position()[2])
         && isNumericASCII(buf.position()[3]) && !isNumericASCII(buf.position()[4]))
        || (buf.position() + 3 == buf.buffer().end() && isNumericASCII(buf.position()[1]) && isNumericASCII(buf.position()[2])
            && isNumericASCII(buf.position()[3])))
    {
        return true;
    }
    else
        return false;
}

template <size_t N, bool before_point = false, typename T>
static inline bool readUIntTextUpToNSignificantDigits(T & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
    bool has_values = false;
    /// In optimistic case we can skip bound checking for first loop.
    if (buf.position() + N <= buf.buffer().end())
    {
        for (size_t i = 0; i < N; ++i)
        {
            if (isNumericASCII(*buf.position()))
            {
                x *= 10;
                x += *buf.position() & 0x0F;
                ++buf.position();
                has_values = true;
            }
            else if constexpr (before_point) // 10,000,000
            {
                if (!has_values)
                    return false;

                if (*buf.position() == ',' && checkNumberComma(buf, has_quote, settings))
                {
                    ++buf.position();
                    continue;
                }
                else
                    return true;
            }
            else
                return true;
        }
    }
    else
    {
        for (size_t i = 0; i < N; ++i)
        {
            if (!buf.eof() && isNumericASCII(*buf.position()))
            {
                x *= 10;
                x += *buf.position() & 0x0F;
                ++buf.position();
                has_values = true;
            }
            else if constexpr (before_point) // 10,000,000
            {
                if (!has_values)
                    return false;

                if (*buf.position() == ',' && checkNumberComma(buf, has_quote, settings))
                {
                    ++buf.position();
                    continue;
                }
                else
                    return true;
            }
            else
                return true;
        }
    }

    while (!buf.eof() && (buf.position() + 8 <= buf.buffer().end()) && DB::is_made_of_eight_digits_fast(buf.position()))
    {
        buf.position() += 8;
    }

    while (!buf.eof() && isNumericASCII(*buf.position()))
        ++buf.position();

    return true;
}


template <typename T>
inline bool readExcelFloatTextFastImpl(T & x, DB::ReadBuffer & in, bool has_quote, const DB::FormatSettings & settings)
{
    static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
    static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");

    const UInt8 MAX_HEAD_SKIP = 2;
    const UInt8 MAX_TAIL_SKIP = 2;
    UInt8 head_skip = 0;
    UInt8 tail_skip = 0;

    bool negative = false;
    x = 0;
    UInt64 before_point = 0;
    UInt64 after_point = 0;
    int after_point_exponent = 0;
    int exponent = 0;

    if (in.eof())
        return false;

    while (!in.eof())
    {

        if ((*in.position() < '0' || *in.position() > '9') && *in.position() != '-' && *in.position() != '+' && *in.position() != '.'
            && !checkMoneySymbol(in))
        {
            if (!((static_cast<UInt8>(*in.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
            {
                head_skip++;
                if (head_skip > MAX_HEAD_SKIP)
                    return false;
            }
            ++in.position();
        }
        else
            break ;
    }


    if (*in.position() == '-')
    {
        negative = true;
        ++in.position();
    }
    else if (*in.position() == '+')
        ++in.position();

    auto count_after_sign = in.count();

    constexpr int significant_digits = std::numeric_limits<UInt64>::digits10;
    if (!local_engine::readUIntTextUpToNSignificantDigits<significant_digits, true>(before_point, in, has_quote, settings))
        return false;

    size_t read_digits = in.count() - count_after_sign;

    if (unlikely(read_digits > significant_digits))
    {
        int before_point_additional_exponent = static_cast<int>(read_digits) - significant_digits;
        x = static_cast<T>(shift10(before_point, before_point_additional_exponent));
    }
    else
    {
        x = before_point;
    }

    if (checkChar('.', in))
    {
        auto after_point_count = in.count();

        while (!in.eof() && *in.position() == '0')
            ++in.position();

        auto after_leading_zeros_count = in.count();
        int after_point_num_leading_zeros = static_cast<int>(after_leading_zeros_count - after_point_count);

        local_engine::readUIntTextUpToNSignificantDigits<significant_digits>(after_point, in, has_quote, settings);
        read_digits = in.count() - after_leading_zeros_count;
        after_point_exponent
            = (read_digits > significant_digits ? -significant_digits : static_cast<int>(-read_digits)) - after_point_num_leading_zeros;
    }

    if (checkChar('e', in) || checkChar('E', in))
    {
        if (in.eof())
            return false;


        bool exponent_negative = false;
        if (*in.position() == '-')
        {
            exponent_negative = true;
            ++in.position();
        }
        else if (*in.position() == '+')
        {
            ++in.position();
        }

        local_engine::readUIntTextUpToNSignificantDigits<4>(exponent, in, has_quote, settings);
        if (exponent_negative)
            exponent = -exponent;
    }


    if (!(*in.position() >= '0' && *in.position() <= '9')) // process suffix
    {
        while (!in.eof())
        {
            if(*in.position() == settings.csv.delimiter ||*in.position() == '\'' ||*in.position() == '\"'
                || *in.position() == '\n' || *in.position() == '\r')
            {
                break;
            }
            if (!((static_cast<UInt8>(*in.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
            {
                tail_skip++;
                if (tail_skip>MAX_TAIL_SKIP)
                    return false;
            }
            ++in.position();
        }
    }


    if (after_point)
        x += static_cast<T>(shift10(after_point, after_point_exponent));

    if (exponent)
        x = static_cast<T>(shift10(x, exponent));

    if (negative)
        x = -x;

    auto num_characters_without_sign = in.count() - count_after_sign;

    /// Denormals. At most one character is read before denormal and it is '-'.
    if (num_characters_without_sign == 0)
    {
        if (in.eof())
            return false;

        if (*in.position() == '+')
        {
            ++in.position();
            if (in.eof())
                return false;
            else if (negative)
                return false;
        }

        if (*in.position() == 'i' || *in.position() == 'I')
        {
            if (assertOrParseInfinity<false>(in))
            {
                x = std::numeric_limits<T>::infinity();
                if (negative)
                    x = -x;
                return true;
            }
            return false;
        }
        else if (*in.position() == 'n' || *in.position() == 'N')
        {
            if (assertOrParseNaN<false>(in))
            {
                x = std::numeric_limits<T>::quiet_NaN();
                if (negative)
                    x = -x;
                return true;
            }
            return false;
        }
    }

    return true;
}


template <typename T>
bool readExcelIntTextImpl(T & x, DB::ReadBuffer & buf, bool has_quote, const DB::FormatSettings & settings)
{
    bool number_force = settings.try_infer_integers==1;
    const UInt8 MAX_HEAD_SKIP = 2;
    const UInt8 MAX_TAIL_SKIP = 2;
    UInt8 head_skip=0;
    UInt8 tail_skip=0;
    
    using UnsignedT = make_unsigned_t<T>;

    bool negative = false;
    UnsignedT res{};
    if (buf.eof())
        return false;

    /// '+' or '-'
    bool has_sign = false;
    bool has_number = false;
    UInt32 length = 0;
    while (!buf.eof())
    {
        if (*buf.position() == '+')
        {
            /// 123+ or +123+, just stop after 123 or +123.
            if (has_number)
                break;

            /// No digits read yet, but we already read sign, like ++, -+.
            if (has_sign)
                return false;

            has_sign = true;
            ++buf.position();
        }
        else if (*buf.position() == '-')
        {
            if (has_number)
                break;

            if (has_sign)
                return false;

            if constexpr (is_signed_v<T>)
                negative = true;
            else
                return false;

            has_sign = true;
            ++buf.position();
        }
        else if (*buf.position() == ',')
        {
            /// invalidate like 1,00010
            if (checkNumberComma(buf, has_quote, settings))
            {
                ++buf.position();
                continue;
            }
            else
                break;
        }
        else if (*buf.position() == '.')
        {
            ++buf.position();
            if (has_number)
            {
                while (!buf.eof())
                {
                    if (!(*buf.position() >= '0' && *buf.position() <= '9'))
                    {
                        if (number_force)
                            break;
                        else
                            return false;
                    }
                    else
                    {
                        ++buf.position();
                    }
                }
            }
            else
                return false;
        }
        else if (*buf.position() >= '0' && *buf.position() <= '9')
        {
            has_number = true;
            ++length;
            if (length >= std::numeric_limits<T>::max_digits10)
            {
                if (negative)
                {
                    T signed_res = -res;
                    if (common::mulOverflow<T>(signed_res, 10, signed_res)
                        || common::subOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
                        return false;

                    res = -static_cast<UnsignedT>(signed_res);
                }
                else
                {
                    T signed_res = res;
                    if (common::mulOverflow<T>(signed_res, 10, signed_res)
                        || common::addOverflow<T>(signed_res, (*buf.position() - '0'), signed_res))
                        return false;

                    res = signed_res;
                }
            }
            else
            {
                res *= 10;
                res += *buf.position() - '0';
            }

            ++buf.position();
        }
        else if (!has_number && !has_sign && checkMoneySymbol(buf))
        {
            continue;
        }
        else if (has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process suffix
        {
            while (!buf.eof())
            {
                if(*buf.position() == settings.csv.delimiter ||*buf.position() == '\'' ||*buf.position() == '\"'
                    || *buf.position() == '\n' || *buf.position() == '\r')
                {
                    break;
                }
                if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
                {
                    tail_skip++;
                    if (tail_skip>MAX_TAIL_SKIP)
                        return false;
                }
                ++buf.position();
            }
            break;
        }
        else if (!has_number && !(*buf.position() >= '0' && *buf.position() <= '9') && number_force) // process prefix
        {
            if(*buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r')
            {
                break;
            }

            if (!((static_cast<UInt8>(*buf.position()) & 0b11000000u) == 0b10000000u)) // learn from UTF8Helpers.h
            {
                head_skip++;
                if (head_skip>MAX_HEAD_SKIP)
                    return false;
            }
            ++buf.position();
        }
        else
            break;
    }

    if (!has_number)
        return false;

    x = res;
    if constexpr (is_signed_v<T>)
    {
        if (negative)
        {
            x = -res;
        }
    }

    return true;
}
}
