sql_utils/public/functions/cast_date_time.h (274 lines of code) (raw):
//
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CAST_DATE_TIME_H_
#define THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CAST_DATE_TIME_H_
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
#include "absl/time/time.h"
#include "sql_utils/base/status.h"
#include "sql_utils/public/functions/date_time_util.h"
#include "sql_utils/public/type.pb.h"
namespace bigquery_ml_utils {
namespace functions {
// CastStringToTimestamp functions are used for CAST(input AS
// Timestamp FORMAT '...') syntax.
//
// Parses an input <timestamp_string> with the given input <format_string>,
// and produces the appropriate timestamp as output. Timestamp parts that are
// unspecified in the format are derived from 'current_year-current_month-01
// 00:00:00.000000' at the <default_timezone> ('current_year' and
// 'current_month' are from <current_timestamp> at the <default_timezone>).
// Produces <timestamp_micros> at microseconds precision, and returns an error
// if the resulting timestamp is not in the SQL valid range.
//
// Requires that the string_view arguments are UTF8.
//
// The supported format elements and their semantics are different from those
// for ParseStringToTimestamp functions and they are defined in:
// (broken link).
absl::Status CastStringToTimestamp(absl::string_view format_string,
absl::string_view timestamp_string,
absl::TimeZone default_timezone,
absl::Time current_timestamp,
int64_t* timestamp_micros);
// Invokes MakeTimeZone() on <default_timezone_string> and invokes the prior
// function. Returns error status if <default_timezone_string> is invalid
// or conversion fails.
absl::Status CastStringToTimestamp(absl::string_view format_string,
absl::string_view timestamp_string,
absl::string_view default_timezone_string,
absl::Time current_timestamp,
int64_t* timestamp_micros);
// The 2 functions below are similar to the above functions but support
// nanoseconds precision.
absl::Status CastStringToTimestamp(absl::string_view format_string,
absl::string_view timestamp_string,
absl::TimeZone default_timezone,
absl::Time current_timestamp,
absl::Time* timestamp);
absl::Status CastStringToTimestamp(absl::string_view format_string,
absl::string_view timestamp_string,
absl::string_view default_timezone_string,
absl::Time current_timestamp,
absl::Time* timestamp);
// CastStringToDate function is used for CAST(input AS Date FORMAT '...')
// syntax.
//
// Parses an input <date_string> with the given input <format_string>, and
// produces the appropriate date as output. Date parts that are unspecified
// in the format are derived from 'current_year-current_month-01'
// ('current_year' and 'current_month' are from <current_date>). Returns an
// error if the <format_string> contains format elements unsupported for DATE or
// the resulting date is not in the SQL valid range.
//
// Requires that the string_view arguments are UTF8.
//
// The supported format elements and their semantics are different from those
// for ParseStringToDate functions and they are defined in:
// (broken link).
absl::Status CastStringToDate(absl::string_view format_string,
absl::string_view date_string,
int32_t current_date, int32_t* date);
// CastStringToTime function is used for CAST(input AS Time FORMAT '...')
// syntax.
//
// Parses an input <time_string> with the given input <format_string>, and
// produces the appropriate time as output. Time parts that are unspecified in
// the format are derived from '00:00:00.000000000'. Returns an error if the
// <format_string> contains a format element unsupported for TIME or the
// resulting time is not in the range of [00:00:00, 24:00:00).
//
// <scale> is used to specify the maximum precision supported for the format
// elements of type "kFFN", which will be 6 and 9 for micros and nanos,
// respectively. The parsed result value also respects this specified <scale>,
// which means that for the format element of "kFFN" type, while parsing will
// consume as many numeric digits as present, the parsed result will truncate
// any digits beyond 6 or 9 for micros and nanos, respectively. The same
// behavior also applys to the <scale> argument for CastStringToDateTime below.
//
// Requires that the string_view arguments are UTF8.
//
// The supported format elements and their semantics are different from those
// for ParseStringToTime function and they are defined in:
// (broken link).
absl::Status CastStringToTime(absl::string_view format_string,
absl::string_view time_string,
TimestampScale scale, TimeValue* time);
// CastStringToDatetime function is used for CAST(input AS
// Datetime FORMAT '...') syntax.
//
// Parses an input <datetime_string> with the given input <format_string>, and
// produces the appropriate datetime as output. Date and time parts that are
// unspecified in the format are derived from 'current_year-current_month-01
// 00:00:00.000000000' ('current_year' and 'current_month' are from
// <current_date>). Returns an error if the <format_string> contains a format
// element unsupported for DATETIME or the resulting datetime is not in the
// SQL valid range.
//
// Requires that the string_view arguments are UTF8.
//
// The supported format elements and their semantics are different from those
// for ParseStringToDatetime function and they are defined in:
// (broken link).
absl::Status CastStringToDatetime(absl::string_view format_string,
absl::string_view datetime_string,
TimestampScale scale, int32_t current_date,
DatetimeValue* datetime);
// Perform validations on the <format_string> that is used for parse the given
// <out_type> according to specifications at (broken link).
absl::Status ValidateFormatStringForParsing(
absl::string_view format_string, bigquery_ml_utils::TypeKind out_type);
// Perform validations on the <format_string> that is used to format the given
// <out_type>, according to specifications at (broken link).
absl::Status ValidateFormatStringForFormatting(
absl::string_view format_string, bigquery_ml_utils::TypeKind out_type);
// Populates <out> using the <format_string> following the formatting rules from
// (broken link).
//
// Assumes <timestamp> is the number of microseconds from
// 1970-01-01 UTC.
// Returns error status if conversion fails.
//
// <format_string> must be a valid utf-8 string, else this and all the other
// CastFormat functions will fail.
//
// Note, this method is not locale aware and generally formats in an en-US
// style. For example, months and days of week will use the en-US names.
absl::Status CastFormatTimestampToString(absl::string_view format_string,
int64_t timestamp_micros,
absl::TimeZone timezone,
std::string* out);
// Invokes MakeTimeZone() on <timezone_string> and invokes the prior function.
// Returns error status if <timezone_string> is invalid or conversion fails.
absl::Status CastFormatTimestampToString(absl::string_view format_string,
int64_t timestamp_micros,
absl::string_view timezone_string,
std::string* out);
absl::Status CastFormatTimestampToString(absl::string_view format_string,
absl::Time timestamp,
absl::TimeZone timezone,
std::string* out);
absl::Status CastFormatTimestampToString(absl::string_view format_string,
absl::Time timestamp,
absl::string_view timezone_string,
std::string* out);
// Populates <out> using the <format_str> following the formatting rules from
// (broken link).
// Assumes <date>: number of days since the epoch (1970-01-01)
//
// Does not allow timezone or time format elements.
// Returns error status if the conversion fails.
absl::Status CastFormatDateToString(absl::string_view format_string,
int32_t date, std::string* out);
// Populates <out> using the <format_str> following the formatting rules from
// (broken link).
//
// Does not allow timezone related format elements.
// Returns error status if the conversion fails.
absl::Status CastFormatDatetimeToString(absl::string_view format_string,
const DatetimeValue& datetime,
std::string* out);
// Populates <out> using the <format_string> as defined by absl::FormatTime() in
// base/time.h. Returns error status if conversion fails.
//
// Does not allow timezone or date format elements.
absl::Status CastFormatTimeToString(absl::string_view format_string,
const TimeValue& time, std::string* out);
namespace cast_date_time_internal {
enum class FormatElementCategory {
kFormatElementCategoryUnspecified = 0,
kLiteral,
kYear,
kMonth,
kDay,
kHour,
kMinute,
kSecond,
kMeridianIndicator,
kTimeZone,
kCentury,
kQuarter,
kWeek,
kEraIndicator,
kMisc,
};
enum class FormatElementType {
kFormatElementTypeUnspecified = 0,
kSimpleLiteral,
kDoubleQuotedLiteral,
kWhitespace,
kYYYY,
kYYY,
kYY,
kY,
kRRRR,
kRR,
kYCommaYYY,
kIYYY,
kIYY,
kIY,
kI,
kSYYYY,
kYEAR,
kSYEAR,
kMM,
kMON,
kMONTH,
kRM,
kDDD,
kDD,
kD,
kDAY,
kDY,
kJ,
kHH,
kHH12,
kHH24,
kMI,
kSS,
kSSSSS,
kFFN,
kAM,
kPM,
kAMWithDots,
kPMWithDots,
kTZH,
kTZM,
kCC,
kSCC,
kQ,
kIW,
kWW,
kW,
kAD,
kBC,
kADWithDots,
kBCWithDots,
kSP,
kTH,
kSPTH,
kTHSP,
kFM
};
// This enum is used to specify the cases of the output letters when
// formatting timestamp with the format element.
enum FormatCasingType {
kFormatCasingTypeUnspecified = 0,
// Preserves casing of the output letters in the original input format string.
kPreserveCase,
// All of the letters in the output are capitalized, e.g.
// "TWELVE THIRTY-FOUR".
kAllLettersUppercase,
// For each word in the output, only the first letter is capitalized, and the
// other letters are lowercase, e.g. "Twelve Thirty-Four".
kOnlyFirstLetterUppercase,
// All of the letters in the output are lowercase, e.g. "twelve thirty-four".
kAllLettersLowercase
};
struct DateTimeFormatElement {
FormatElementType type = FormatElementType::kFormatElementTypeUnspecified;
FormatElementCategory category =
FormatElementCategory::kFormatElementCategoryUnspecified;
// Length of the original format element string in the input format string.
int len_in_format_str = 0;
FormatCasingType format_casing_type =
FormatCasingType::kFormatCasingTypeUnspecified;
// <literal_value> is set only for the format element of "kSimpleLiteral" or
// "kDoubleQuotedLiteral" type. If the element is of "kSimpleLiteral" type,
// <literal_value> is the same as the original format element string; if the
// element is of "kDoubleQuotedLiteral" type, format element string will be
// first unquoted and unescaped. For example, if the format element string is
// R"("abc\\")", the <literal_value> of the format element is R"(abc\)".
std::string literal_value;
// <subsecond_digit_count> is only set for elements of "kFFN" type to
// indicate number of digits of subsecond part.
int subsecond_digit_count = 0;
// Returns single quoted string to represent the format element. If the
// element is not in "kLiteral" category, the cases of output string may be
// different from its original form in the user input format string. The
// output string is intended to be visible to end users.
std::string ToString() const;
};
absl::StatusOr<std::vector<DateTimeFormatElement>> GetDateTimeFormatElements(
absl::string_view format_str);
} // namespace cast_date_time_internal
class StringToDateCaster {
public:
static absl::StatusOr<StringToDateCaster> Create(
absl::string_view format_string);
// Cast the string to Date using the format string.
absl::Status Cast(absl::string_view date_string, int32_t current_date,
int32_t* date) const;
private:
explicit StringToDateCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class StringToTimeCaster {
public:
static absl::StatusOr<StringToTimeCaster> Create(
absl::string_view format_string);
// Cast the string to Time using the format string.
absl::Status Cast(absl::string_view time_string, TimestampScale scale,
TimeValue* time) const;
private:
explicit StringToTimeCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class StringToDatetimeCaster {
public:
static absl::StatusOr<StringToDatetimeCaster> Create(
absl::string_view format_string);
// Cast the string to Datetime using the format string.
absl::Status Cast(absl::string_view datetime_string, TimestampScale scale,
int32_t current_date, DatetimeValue* datetime) const;
private:
explicit StringToDatetimeCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class StringToTimestampCaster {
public:
static absl::StatusOr<StringToTimestampCaster> Create(
absl::string_view format_string);
// Cast the string to Timestamp using the format string.
absl::Status Cast(absl::string_view timestamp_string,
absl::TimeZone default_timezone,
absl::Time current_timestamp,
int64_t* timestamp_micros) const;
private:
explicit StringToTimestampCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class DateToStringCaster {
public:
static absl::StatusOr<DateToStringCaster> Create(
absl::string_view format_string);
// Cast the date to string using the format string.
absl::Status Cast(int32_t date, std::string* out) const;
private:
explicit DateToStringCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class DatetimeToStringCaster {
public:
static absl::StatusOr<DatetimeToStringCaster> Create(
absl::string_view format_string);
// Cast the datetime to string using the format string.
absl::Status Cast(const DatetimeValue& datetime, std::string* out) const;
private:
explicit DatetimeToStringCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class TimeToStringCaster {
public:
static absl::StatusOr<TimeToStringCaster> Create(
absl::string_view format_string);
// Cast the time to string using the format string.
absl::Status Cast(const TimeValue& time, std::string* out) const;
private:
explicit TimeToStringCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
class TimestampToStringCaster {
public:
static absl::StatusOr<TimestampToStringCaster> Create(
absl::string_view format_string);
// Cast the timestamp to string using the format string.
absl::Status Cast(int64_t timestamp_micros, absl::TimeZone timezone,
std::string* out) const;
absl::Status Cast(absl::Time timestamp, absl::TimeZone timezone,
std::string* out) const;
private:
explicit TimestampToStringCaster(
std::vector<cast_date_time_internal::DateTimeFormatElement>&&
format_elements)
: format_elements_(std::move(format_elements)) {}
std::vector<cast_date_time_internal::DateTimeFormatElement> format_elements_;
};
} // namespace functions
} // namespace bigquery_ml_utils
#endif // THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_FUNCTIONS_CAST_DATE_TIME_H_