cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include <Functions/FunctionsRound.h> namespace DB::ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace local_engine { template <typename T, DB::Vectorize vectorize> class BaseFloatRoundingHalfUpComputation; #ifdef __SSE4_1__ /// vectorized implementation for x86 template <> class BaseFloatRoundingHalfUpComputation<Float32, DB::Vectorize::Yes> { public: using ScalarType = Float32; using VectorType = __m128; static const size_t data_count = 4; static VectorType load(const ScalarType * in) { return _mm_loadu_ps(in); } static VectorType load1(const ScalarType in) { return _mm_load1_ps(&in); } static void store(ScalarType * out, VectorType val) { _mm_storeu_ps(out, val); } static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_ps(val, scale); } static VectorType divide(VectorType val, VectorType scale) { return _mm_div_ps(val, scale); } template <DB::RoundingMode mode> static VectorType apply(VectorType val) { ScalarType tempFloatsIn[data_count]; ScalarType tempFloatsOut[data_count]; store(tempFloatsIn, val); for (size_t i = 0; i < data_count; ++i) tempFloatsOut[i] = std::roundf(tempFloatsIn[i]); return load(tempFloatsOut); } static VectorType prepare(size_t scale) { return load1(scale); } }; template <> class BaseFloatRoundingHalfUpComputation<Float64, DB::Vectorize::Yes> { public: using ScalarType = Float64; using VectorType = __m128d; static const size_t data_count = 2; static VectorType load(const ScalarType * in) { return _mm_loadu_pd(in); } static VectorType load1(const ScalarType in) { return _mm_load1_pd(&in); } static void store(ScalarType * out, VectorType val) { _mm_storeu_pd(out, val); } static VectorType multiply(VectorType val, VectorType scale) { return _mm_mul_pd(val, scale); } static VectorType divide(VectorType val, VectorType scale) { return _mm_div_pd(val, scale); } template <DB::RoundingMode mode> static VectorType apply(VectorType val) { ScalarType tempFloatsIn[data_count]; ScalarType tempFloatsOut[data_count]; store(tempFloatsIn, val); for (size_t i = 0; i < data_count; ++i) tempFloatsOut[i] = std::round(tempFloatsIn[i]); return load(tempFloatsOut); } static VectorType prepare(size_t scale) { return load1(scale); } }; /// end __SSE4_1__ #endif /// Sequential implementation for ARM. Also used for scalar arguments template <typename T> class BaseFloatRoundingHalfUpComputation<T, DB::Vectorize::No> { public: using ScalarType = T; using VectorType = T; static const size_t data_count = 1; static VectorType load(const ScalarType * in) { return *in; } static VectorType load1(const ScalarType in) { return in; } static VectorType store(ScalarType * out, ScalarType val) { return *out = val;} static VectorType multiply(VectorType val, VectorType scale) { return val * scale; } static VectorType divide(VectorType val, VectorType scale) { return val / scale; } template <DB::RoundingMode mode> static VectorType apply(VectorType val) { if constexpr (std::is_same_v<ScalarType, Float32>) { return std::roundf(val); } else { return std::round(val); } } static VectorType prepare(size_t scale) { return load1(scale); } }; template <> class BaseFloatRoundingHalfUpComputation<BFloat16, DB::Vectorize::No> { public: using ScalarType = BFloat16; using VectorType = BFloat16; static const size_t data_count = 1; static VectorType load(const ScalarType * in) { return *in; } static VectorType load1(const ScalarType in) { return in; } static VectorType store(ScalarType * out, ScalarType val) { return *out = val;} static VectorType multiply(VectorType val, VectorType scale) { return val * scale; } static VectorType divide(VectorType val, VectorType scale) { return val / scale; } template <DB::RoundingMode mode> static VectorType apply(VectorType val) { return BFloat16(std::roundf(static_cast<Float32>(val))); } static VectorType prepare(size_t scale) { return load1(BFloat16(static_cast<Float32>(scale))); } }; /** Implementation of low-level round-off functions for floating-point values. */ template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode, DB::Vectorize vectorize> class FloatRoundingHalfUpComputation : public BaseFloatRoundingHalfUpComputation<T, vectorize> { using Base = BaseFloatRoundingHalfUpComputation<T, vectorize>; public: static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out) { auto val = Base::load(in); if (scale_mode == DB::ScaleMode::Positive) val = Base::multiply(val, scale); else if (scale_mode == DB::ScaleMode::Negative) val = Base::divide(val, scale); val = Base::template apply<rounding_mode>(val); if (scale_mode == DB::ScaleMode::Positive) val = Base::divide(val, scale); else if (scale_mode == DB::ScaleMode::Negative) val = Base::multiply(val, scale); Base::store(out, val); } }; /** Implementing high-level rounding functions. */ template <typename T, DB::RoundingMode rounding_mode, DB::ScaleMode scale_mode> struct FloatRoundingHalfUpImpl { private: static_assert(!DB::is_decimal<T>); template <DB::Vectorize vectorize = #ifdef __SSE4_1__ std::is_same_v<T, BFloat16> ? DB::Vectorize::No : DB::Vectorize::Yes #else DB::Vectorize::No #endif > using Op = FloatRoundingHalfUpComputation<T, rounding_mode, scale_mode, vectorize>; using Data = std::array<T, Op<>::data_count>; using ColumnType = DB::ColumnVector<T>; using Container = typename ColumnType::Container; public: static NO_INLINE void apply(const Container & in, size_t scale, Container & out) { auto mm_scale = Op<>::prepare(scale); const size_t data_count = std::tuple_size<Data>(); const T * end_in = in.data() + in.size(); const T * limit = in.data() + in.size() / data_count * data_count; const T * __restrict p_in = in.data(); T * __restrict p_out = out.data(); while (p_in < limit) { Op<>::compute(p_in, mm_scale, p_out); p_in += data_count; p_out += data_count; } if (p_in < end_in) { Data tmp_src{{}}; Data tmp_dst; size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in); memcpy(&tmp_src, p_in, tail_size_bytes); Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst)); memcpy(p_out, &tmp_dst, tail_size_bytes); } } }; /** Select the appropriate processing algorithm depending on the scale. */ template <typename T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode> struct DispatcherRoundingHalfUp { template <DB::ScaleMode scale_mode> using FunctionRoundingImpl = std::conditional_t< std::is_floating_point_v<T> || std::is_same_v<T, BFloat16>, FloatRoundingHalfUpImpl<T, rounding_mode, scale_mode>, DB::IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>; static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg) { const auto * const col = checkAndGetColumn<DB::ColumnVector<T>>(col_general); auto col_res = DB::ColumnVector<T>::create(); typename DB::ColumnVector<T>::Container & vec_res = col_res->getData(); vec_res.resize_exact(col->getData().size()); if (!vec_res.empty()) { if (scale_arg == 0) { size_t scale = 1; FunctionRoundingImpl<DB::ScaleMode::Zero>::apply(col->getData(), scale, vec_res); } else if (scale_arg > 0) { size_t scale = intExp10(scale_arg); FunctionRoundingImpl<DB::ScaleMode::Positive>::apply(col->getData(), scale, vec_res); } else { size_t scale = intExp10(-scale_arg); FunctionRoundingImpl<DB::ScaleMode::Negative>::apply(col->getData(), scale, vec_res); } } return col_res; } }; template <DB::is_decimal T, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode> struct DispatcherRoundingHalfUp<T, rounding_mode, tie_breaking_mode> { public: static DB::ColumnPtr apply(const DB::IColumn * col_general, DB::Scale scale_arg) { const auto * const col = checkAndGetColumn<DB::ColumnDecimal<T>>(col_general); const typename DB::ColumnDecimal<T>::Container & vec_src = col->getData(); auto col_res = DB::ColumnDecimal<T>::create(vec_src.size(), col->getScale()); auto & vec_res = col_res->getData(); if (!vec_res.empty()) DB::DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(col->getData(), col->getScale(), vec_res, scale_arg); return col_res; } }; /** A template for functions that round the value of an input parameter of type * (U)Int8/16/32/64, Float32/64 or Decimal32/64/128, and accept an additional optional parameter (default is 0). */ template <typename Name, DB::RoundingMode rounding_mode, DB::TieBreakingMode tie_breaking_mode> class FunctionRoundingHalfUp : public DB::IFunction { public: static constexpr auto name = "roundHalfUp"; static DB::FunctionPtr create(DB::ContextPtr) { return std::make_shared<FunctionRoundingHalfUp>(); } String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DB::DataTypesWithConstInfo & /*arguments*/) const override { return false; } /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. DB::DataTypePtr getReturnTypeImpl(const DB::DataTypes & arguments) const override { if ((arguments.empty()) || (arguments.size() > 2)) throw DB::Exception( DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.", getName(), arguments.size()); for (const auto & type : arguments) if (!isNumber(type)) throw DB::Exception( DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); return arguments[0]; } static DB::Scale getScaleArg(const DB::ColumnsWithTypeAndName & arguments) { if (arguments.size() == 2) { const DB::IColumn & scale_column = *arguments[1].column; if (!isColumnConst(scale_column)) throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must be constant"); DB::Field scale_field = assert_cast<const DB::ColumnConst &>(scale_column).getField(); if (scale_field.getType() != DB::Field::Types::UInt64 && scale_field.getType() != DB::Field::Types::Int64) throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "DB::Scale argument for rounding functions must have integer type"); Int64 scale64 = scale_field.safeGet<Int64>(); if (scale64 > std::numeric_limits<DB::Scale>::max() || scale64 < std::numeric_limits<DB::Scale>::min()) throw DB::Exception(DB::ErrorCodes::ARGUMENT_OUT_OF_BOUND, "DB::Scale argument for rounding function is too large"); return scale64; } return 0; } bool useDefaultImplementationForConstants() const override { return true; } DB::ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } DB::ColumnPtr executeImpl(const DB::ColumnsWithTypeAndName & arguments, const DB::DataTypePtr &, size_t /*input_rows_count*/) const override { const DB::ColumnWithTypeAndName & column = arguments[0]; DB::Scale scale_arg = getScaleArg(arguments); DB::ColumnPtr res; auto call = [&](const auto & types) -> bool { using Types = std::decay_t<decltype(types)>; using DataType = typename Types::LeftType; if constexpr (DB::IsDataTypeNumber<DataType> || DB::IsDataTypeDecimal<DataType>) { using FieldType = typename DataType::FieldType; res = DispatcherRoundingHalfUp<FieldType, rounding_mode, tie_breaking_mode>::apply(column.column.get(), scale_arg); return true; } return false; }; if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call)) throw DB::Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName()); return res; } bool hasInformationAboutMonotonicity() const override { return true; } Monotonicity getMonotonicityForRange(const DB::IDataType &, const DB::Field &, const DB::Field &) const override { return {.is_monotonic = true, .is_always_monotonic = true}; } }; struct NameRoundHalfUp { static constexpr auto name = "roundHalfUp"; }; using FunctionRoundHalfUp = FunctionRoundingHalfUp<NameRoundHalfUp, DB::RoundingMode::Round, DB::TieBreakingMode::Auto>; }

cpp-ch/local-engine/Functions/SparkFunctionRoundHalfUp.h (299 lines of code) (raw):