sql_utils/public/functions/convert.h (616 lines of code) (raw):
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CONVERT_H_
#define THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CONVERT_H_
#include <math.h> // for round and roundf
#include <cmath>
#include <cstdint>
#include <limits>
#include <type_traits>
#include "sql_utils/base/logging.h"
#include "sql_utils/public/functions/convert_internal.h"
#include "sql_utils/public/functions/util.h"
#include "sql_utils/public/numeric_value.h"
#include <cstdint>
#include "absl/base/optimization.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "sql_utils/base/status.h"
namespace bigquery_ml_utils {
namespace functions {
// Do not use any methods from the internal namespace.
namespace internal {
extern const char* const kConvertOverflowInt32;
extern const char* const kConvertOverflowInt64;
extern const char* const kConvertOverflowUint32;
extern const char* const kConvertOverflowUint64;
extern const char* const kConvertOverflowFloat;
extern const char* const kConvertNonFinite;
// Check to see if a pointer to POD points to a value within a valid range.
// This function assumes that the FromType is wider than the ToType;
// that is, any possible ToType instance i satisfies:
// numeric_limits<FromType>::lowest() <= i <= numeric_limits<FromType>::max()
template <typename FromType, typename ToType>
static inline bool CheckRange(const FromType& value) {
static_assert(sizeof(FromType) > sizeof(ToType),
"FromType must be larger than ToType");
// Not a static_assert since floating point PODs are not integral constants.
SQL_DCHECK_LE(std::numeric_limits<FromType>::lowest(),
std::numeric_limits<ToType>::lowest());
SQL_DCHECK_GE(std::numeric_limits<FromType>::max(),
std::numeric_limits<ToType>::max());
FromType min = static_cast<FromType>(std::numeric_limits<ToType>::lowest());
FromType max = static_cast<FromType>(std::numeric_limits<ToType>::max());
// Not a static_assert since floating point PODs are not integral constants.
SQL_DCHECK_LE(min, 0);
SQL_DCHECK_LT(0, max);
return value >= min && value <= max;
}
template <typename FromType, typename ToType>
static inline bool CheckFloatToIntRange(const FromType& value) {
static_assert((std::is_same<FromType, float>::value ||
std::is_same<FromType, double>::value),
"Invalid FromType");
static_assert((std::is_same<ToType, int32_t>::value ||
std::is_same<ToType, int64_t>::value ||
std::is_same<ToType, uint32_t>::value ||
std::is_same<ToType, uint64_t>::value),
"Invalid ToType");
return convert_internal::InRangeNoTruncate<FromType, ToType>(value);
}
// We use an internal struct since function template partial specialization is
// not allowed. Baseline template simply does a static_cast(). It also covers
// the case when FromType is the same as ToType.
template <typename FromType, typename ToType> struct Converter {
static inline bool Convert(
const FromType& in, ToType* out, absl::Status* error) {
*out = static_cast<ToType>(in);
return true;
}
};
// Partial specialization for all conversions to a Boolean value.
template <typename FromType> struct Converter<FromType, bool> {
static inline bool Convert(
const FromType& in, bool* out, absl::Status* error) {
*out = (in != 0);
return true;
}
};
} // namespace internal
template <typename FromType, typename ToType>
inline bool Convert(
const FromType& in, ToType* out, absl::Status* error) {
static_assert((std::is_same<FromType, int32_t>::value ||
std::is_same<FromType, int64_t>::value ||
std::is_same<FromType, uint32_t>::value ||
std::is_same<FromType, uint64_t>::value ||
std::is_same<FromType, bool>::value ||
std::is_same<FromType, float>::value ||
std::is_same<FromType, double>::value ||
std::is_same<FromType, NumericValue>::value ||
std::is_same<FromType, BigNumericValue>::value),
"Invalid FromType");
static_assert((std::is_same<ToType, int32_t>::value ||
std::is_same<ToType, int64_t>::value ||
std::is_same<ToType, uint32_t>::value ||
std::is_same<ToType, uint64_t>::value ||
std::is_same<ToType, bool>::value ||
std::is_same<ToType, float>::value ||
std::is_same<ToType, double>::value ||
std::is_same<ToType, NumericValue>::value ||
std::is_same<ToType, BigNumericValue>::value),
"Invalid ToType");
return internal::Converter<FromType, ToType>::Convert(in, out, error);
}
// The following specializations are done only for the cases where static_cast()
// is not sufficient and overflow checking is required.
// -------------- int32_t --------------
template <>
inline bool Convert<int32_t, uint32_t>(const int32_t& in, uint32_t* out,
absl::Status* error) {
if (in < 0) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint32, in));
}
*out = static_cast<uint32_t>(in);
return true;
}
template <>
inline bool Convert<int32_t, uint64_t>(const int32_t& in, uint64_t* out,
absl::Status* error) {
if (in < 0) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint64, in));
}
*out = static_cast<uint64_t>(in);
return true;
}
template <>
inline bool Convert<int32_t, NumericValue>(const int32_t& in, NumericValue* out,
absl::Status* error) {
*out = NumericValue(in);
return true;
}
template <>
inline bool Convert<int32_t, BigNumericValue>(const int32_t& in,
BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(in);
return true;
}
// -------------- int64_t --------------
template <>
inline bool Convert<int64_t, int32_t>(const int64_t& in, int32_t* out,
absl::Status* error) {
if (!internal::CheckRange<int64_t, int32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt32, in));
}
*out = static_cast<int32_t>(in);
return true;
}
template <>
inline bool Convert<int64_t, uint32_t>(const int64_t& in, uint32_t* out,
absl::Status* error) {
if (!internal::CheckRange<int64_t, uint32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint32, in));
}
*out = static_cast<uint32_t>(in);
return true;
}
template <>
inline bool Convert<int64_t, uint64_t>(const int64_t& in, uint64_t* out,
absl::Status* error) {
if (in < 0) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint64, in));
}
*out = static_cast<uint64_t>(in);
return true;
}
template <>
inline bool Convert<int64_t, NumericValue>(const int64_t& in, NumericValue* out,
absl::Status* error) {
*out = NumericValue(in);
return true;
}
template <>
inline bool Convert<int64_t, BigNumericValue>(const int64_t& in,
BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(in);
return true;
}
// -------------- uint32_t --------------
template <>
inline bool Convert<uint32_t, int32_t>(const uint32_t& in, int32_t* out,
absl::Status* error) {
if (!internal::CheckRange<int64_t, int32_t>(static_cast<int64_t>(in))) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt32, in));
}
*out = static_cast<int32_t>(in);
return true;
}
template <>
inline bool Convert<uint32_t, NumericValue>(const uint32_t& in,
NumericValue* out,
absl::Status* error) {
*out = NumericValue(in);
return true;
}
template <>
inline bool Convert<uint32_t, BigNumericValue>(const uint32_t& in,
BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(in);
return true;
}
// -------------- uint64_t --------------
template <>
inline bool Convert<uint64_t, int32_t>(const uint64_t& in, int32_t* out,
absl::Status* error) {
if (in > static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt32, in));
}
*out = static_cast<int32_t>(in);
return true;
}
template <>
inline bool Convert<uint64_t, int64_t>(const uint64_t& in, int64_t* out,
absl::Status* error) {
if (in > static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt64, in));
}
*out = static_cast<int64_t>(in);
return true;
}
template <>
inline bool Convert<uint64_t, uint32_t>(const uint64_t& in, uint32_t* out,
absl::Status* error) {
if (in > static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint32, in));
}
*out = static_cast<uint32_t>(in);
return true;
}
template <>
inline bool Convert<uint64_t, NumericValue>(const uint64_t& in,
NumericValue* out,
absl::Status* error) {
*out = NumericValue(in);
return true;
}
template <>
inline bool Convert<uint64_t, BigNumericValue>(const uint64_t& in,
BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(in);
return true;
}
// -------------- bool --------------
// We have to provide a specialization for this due to template instantations
// used in tests, even though SQL does not support casting between BOOL
// and NUMERIC/BIGNUMERIC.
template <>
inline bool Convert<bool, NumericValue>(const bool& in, NumericValue* out,
absl::Status* error) {
*out = NumericValue(static_cast<int64_t>(in));
return true;
}
template <>
inline bool Convert<bool, BigNumericValue>(const bool& in, BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(static_cast<int64_t>(in));
return true;
}
// -------------- float --------------
template <>
inline bool Convert<float, int32_t>(const float& in, int32_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<float, int32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt32, in));
}
*out = static_cast<int32_t>(roundf(in));
return true;
}
template <>
inline bool Convert<float, int64_t>(const float& in, int64_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<float, int64_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt64, in));
}
*out = static_cast<int64_t>(roundf(in));
return true;
}
template <>
inline bool Convert<float, uint32_t>(const float& in, uint32_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<float, uint32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint32, in));
}
*out = static_cast<uint32_t>(roundf(in));
return true;
}
template <>
inline bool Convert<float, uint64_t>(const float& in, uint64_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<float, uint64_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint64, in));
}
*out = static_cast<uint64_t>(roundf(in));
return true;
}
template <> inline bool Convert<float, bool>(
const float& in, bool* out, absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
*out = (in != 0);
return true;
}
template <>
inline bool Convert<float, NumericValue>(const float& in, NumericValue* out,
absl::Status* error) {
const absl::StatusOr<NumericValue> numeric_value_status =
NumericValue::FromDouble(in);
if (ABSL_PREDICT_TRUE(numeric_value_status.ok())) {
*out = numeric_value_status.value();
return true;
}
if (error != nullptr) {
*error = numeric_value_status.status();
}
return false;
}
template <>
inline bool Convert<float, BigNumericValue>(const float& in,
BigNumericValue* out,
absl::Status* error) {
const absl::StatusOr<BigNumericValue> bignumeric_value_status =
BigNumericValue::FromDouble(in);
if (ABSL_PREDICT_TRUE(bignumeric_value_status.ok())) {
*out = *bignumeric_value_status;
return true;
}
if (error != nullptr) {
*error = bignumeric_value_status.status();
}
return false;
}
// -------------- double --------------
template <>
inline bool Convert<double, int32_t>(const double& in, int32_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<double, int32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt32, in));
}
*out = static_cast<int32_t>(round(in));
return true;
}
template <>
inline bool Convert<double, int64_t>(const double& in, int64_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<double, int64_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowInt64, in));
}
*out = static_cast<int64_t>(round(in));
return true;
}
template <>
inline bool Convert<double, uint32_t>(const double& in, uint32_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<double, uint32_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint32, in));
}
*out = static_cast<uint32_t>(round(in));
return true;
}
template <>
inline bool Convert<double, uint64_t>(const double& in, uint64_t* out,
absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
if (!internal::CheckFloatToIntRange<double, uint64_t>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowUint64, in));
}
*out = static_cast<uint64_t>(round(in));
return true;
}
template <> inline bool Convert<double, bool>(
const double& in, bool* out, absl::Status* error) {
if (!std::isfinite(in)) {
return internal::UpdateError(error,
absl::StrCat(internal::kConvertNonFinite, in));
}
*out = (in != 0);
return true;
}
template <> inline bool Convert<double, float>(
const double& in, float* out, absl::Status* error) {
if (std::isfinite(in) && !internal::CheckRange<double, float>(in)) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowFloat, in));
}
*out = static_cast<float>(in);
return true;
}
template <>
inline bool Convert<double, NumericValue>(const double& in, NumericValue* out,
absl::Status* error) {
const absl::StatusOr<NumericValue> numeric_value_status =
NumericValue::FromDouble(in);
if (ABSL_PREDICT_TRUE(numeric_value_status.ok())) {
*out = numeric_value_status.value();
return true;
}
if (error != nullptr) {
*error = numeric_value_status.status();
}
return false;
}
template <>
inline bool Convert<double, BigNumericValue>(const double& in,
BigNumericValue* out,
absl::Status* error) {
const absl::StatusOr<BigNumericValue> bignumeric_value_status =
BigNumericValue::FromDouble(in);
if (ABSL_PREDICT_TRUE(bignumeric_value_status.ok())) {
*out = *bignumeric_value_status;
return true;
}
if (error != nullptr) {
*error = bignumeric_value_status.status();
}
return false;
}
// -------------- numeric --------------
template <>
inline bool Convert<NumericValue, int32_t>(const NumericValue& in, int32_t* out,
absl::Status* error) {
const absl::StatusOr<int32_t> int32_status = in.To<int32_t>();
if (ABSL_PREDICT_TRUE(int32_status.ok())) {
*out = int32_status.value();
return true;
}
if (error != nullptr) {
*error = int32_status.status();
}
return false;
}
template <>
inline bool Convert<NumericValue, int64_t>(const NumericValue& in, int64_t* out,
absl::Status* error) {
const absl::StatusOr<int64_t> int64_status = in.To<int64_t>();
if (ABSL_PREDICT_TRUE(int64_status.ok())) {
*out = int64_status.value();
return true;
}
if (error != nullptr) {
*error = int64_status.status();
}
return false;
}
template <>
inline bool Convert<NumericValue, uint32_t>(const NumericValue& in,
uint32_t* out,
absl::Status* error) {
const absl::StatusOr<uint32_t> uint32_status = in.To<uint32_t>();
if (ABSL_PREDICT_TRUE(uint32_status.ok())) {
*out = uint32_status.value();
return true;
}
if (error != nullptr) {
*error = uint32_status.status();
}
return false;
}
template <>
inline bool Convert<NumericValue, uint64_t>(const NumericValue& in,
uint64_t* out,
absl::Status* error) {
const absl::StatusOr<uint64_t> uint64_status = in.To<uint64_t>();
if (ABSL_PREDICT_TRUE(uint64_status.ok())) {
*out = uint64_status.value();
return true;
}
if (error != nullptr) {
*error = uint64_status.status();
}
return false;
}
template <> inline bool Convert<NumericValue, float>(
const NumericValue& in, float* out, absl::Status* error) {
*out = static_cast<float>(in.ToDouble());
return true;
}
template <>
inline bool Convert<NumericValue, double>(const NumericValue& in, double* out,
absl::Status* error) {
*out = in.ToDouble();
return true;
}
template <>
inline bool Convert<NumericValue, BigNumericValue>(const NumericValue& in,
BigNumericValue* out,
absl::Status* error) {
*out = BigNumericValue(in);
return true;
}
template <>
inline bool Convert<NumericValue, bool>(const NumericValue& in, bool* out,
absl::Status* error) {
*out = in != NumericValue();
return true;
}
// -------------- bignumeric --------------
template <>
inline bool Convert<BigNumericValue, int32_t>(const BigNumericValue& in,
int32_t* out,
absl::Status* error) {
const absl::StatusOr<int32_t> int32_status = in.To<int32_t>();
if (ABSL_PREDICT_TRUE(int32_status.ok())) {
*out = *int32_status;
return true;
}
if (error != nullptr) {
*error = int32_status.status();
}
return false;
}
template <>
inline bool Convert<BigNumericValue, int64_t>(const BigNumericValue& in,
int64_t* out,
absl::Status* error) {
const absl::StatusOr<int64_t> int64_status = in.To<int64_t>();
if (ABSL_PREDICT_TRUE(int64_status.ok())) {
*out = *int64_status;
return true;
}
if (error != nullptr) {
*error = int64_status.status();
}
return false;
}
template <>
inline bool Convert<BigNumericValue, uint32_t>(const BigNumericValue& in,
uint32_t* out,
absl::Status* error) {
const absl::StatusOr<uint32_t> uint32_status = in.To<uint32_t>();
if (ABSL_PREDICT_TRUE(uint32_status.ok())) {
*out = *uint32_status;
return true;
}
if (error != nullptr) {
*error = uint32_status.status();
}
return false;
}
template <>
inline bool Convert<BigNumericValue, uint64_t>(const BigNumericValue& in,
uint64_t* out,
absl::Status* error) {
const absl::StatusOr<uint64_t> uint64_status = in.To<uint64_t>();
if (ABSL_PREDICT_TRUE(uint64_status.ok())) {
*out = *uint64_status;
return true;
}
if (error != nullptr) {
*error = uint64_status.status();
}
return false;
}
template <> inline bool Convert<BigNumericValue, float>(
const BigNumericValue& in, float* out, absl::Status* error) {
// There are some edge cases where conversion to double and then to float
// yields slightly different results than conversion directly to float, but
// the usage of float implies that the precision is not critical.
*out = static_cast<float>(in.ToDouble());
if (ABSL_PREDICT_FALSE(std::isinf(*out))) {
return internal::UpdateError(
error, absl::StrCat(internal::kConvertOverflowFloat, in.ToString()));
}
return true;
}
template <>
inline bool Convert<BigNumericValue, double>(const BigNumericValue& in,
double* out, absl::Status* error) {
*out = in.ToDouble();
return true;
}
template <>
inline bool Convert<BigNumericValue, NumericValue>(const BigNumericValue& in,
NumericValue* out,
absl::Status* error) {
const absl::StatusOr<NumericValue> numeric_value_status = in.ToNumericValue();
if (ABSL_PREDICT_TRUE(numeric_value_status.ok())) {
*out = *numeric_value_status;
return true;
}
if (error != nullptr) {
*error = numeric_value_status.status();
}
return false;
}
template <>
inline bool Convert<BigNumericValue, bool>(const BigNumericValue& in, bool* out,
absl::Status* error) {
*out = in != BigNumericValue();
return true;
}
} // namespace functions
} // namespace bigquery_ml_utils
#endif // THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CONVERT_H_