velox/functions/prestosql/DateTimeFunctions.h (638 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/core/QueryConfig.h"
#include "velox/external/date/tz.h"
#include "velox/functions/Macros.h"
#include "velox/functions/lib/DateTimeFormatter.h"
#include "velox/functions/lib/JodaDateTime.h"
#include "velox/functions/prestosql/DateTimeImpl.h"
#include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h"
#include "velox/type/tz/TimeZoneMap.h"
namespace facebook::velox::functions {
template <typename T>
struct ToUnixtimeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
double& result,
const arg_type<Timestamp>& timestamp) {
result = toUnixtime(timestamp);
return true;
}
FOLLY_ALWAYS_INLINE bool call(
double& result,
const arg_type<TimestampWithTimezone>& timestampWithTimezone) {
const auto milliseconds = *timestampWithTimezone.template at<0>();
result = (double)milliseconds / kMillisecondsInSecond;
return true;
}
};
template <typename T>
struct FromUnixtimeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
Timestamp& result,
const arg_type<double>& unixtime) {
auto resultOptional = fromUnixtime(unixtime);
if (LIKELY(resultOptional.has_value())) {
result = resultOptional.value();
return true;
}
return false;
}
};
namespace {
inline constexpr int64_t kSecondsInDay = 86'400;
FOLLY_ALWAYS_INLINE const date::time_zone* getTimeZoneFromConfig(
const core::QueryConfig& config) {
if (config.adjustTimestampToTimezone()) {
auto sessionTzName = config.sessionTimezone();
if (!sessionTzName.empty()) {
return date::locate_zone(sessionTzName);
}
}
return nullptr;
}
FOLLY_ALWAYS_INLINE int64_t
getSeconds(Timestamp timestamp, const date::time_zone* timeZone) {
if (timeZone != nullptr) {
timestamp.toTimezone(*timeZone);
return timestamp.getSeconds();
} else {
return timestamp.getSeconds();
}
}
FOLLY_ALWAYS_INLINE
std::tm getDateTime(Timestamp timestamp, const date::time_zone* timeZone) {
int64_t seconds = getSeconds(timestamp, timeZone);
std::tm dateTime;
gmtime_r((const time_t*)&seconds, &dateTime);
return dateTime;
}
FOLLY_ALWAYS_INLINE
std::tm getDateTime(Date date) {
int64_t seconds = date.days() * kSecondsInDay;
std::tm dateTime;
gmtime_r((const time_t*)&seconds, &dateTime);
return dateTime;
}
template <typename T>
struct InitSessionTimezone {
VELOX_DEFINE_FUNCTION_TYPES(T);
const date::time_zone* timeZone_{nullptr};
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Timestamp>* /*timestamp*/) {
timeZone_ = getTimeZoneFromConfig(config);
}
};
} // namespace
template <typename T>
struct YearFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = 1900 + getDateTime(timestamp, this->timeZone_).tm_year;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = 1900 + getDateTime(date).tm_year;
return true;
}
};
template <typename T>
struct QuarterFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getDateTime(timestamp, this->timeZone_).tm_mon / 3 + 1;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = getDateTime(date).tm_mon / 3 + 1;
return true;
}
};
template <typename T>
struct MonthFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = 1 + getDateTime(timestamp, this->timeZone_).tm_mon;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = 1 + getDateTime(date).tm_mon;
return true;
}
};
template <typename T>
struct DayFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getDateTime(timestamp, this->timeZone_).tm_mday;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = getDateTime(date).tm_mday;
return true;
}
};
template <typename T>
struct DayOfWeekFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
std::tm dateTime = getDateTime(timestamp, this->timeZone_);
result = dateTime.tm_wday == 0 ? 7 : dateTime.tm_wday;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
std::tm dateTm = getDateTime(date);
result = dateTm.tm_wday == 0 ? 7 : dateTm.tm_wday;
return true;
}
};
template <typename T>
struct DayOfYearFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = 1 + getDateTime(timestamp, this->timeZone_).tm_yday;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = 1 + getDateTime(date).tm_yday;
return true;
}
};
template <typename T>
struct YearOfWeekFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE int64_t computeYearOfWeek(const std::tm& dateTime) {
int isoWeekDay = dateTime.tm_wday == 0 ? 7 : dateTime.tm_wday;
// The last few days in December may belong to the next year if they are
// in the same week as the next January 1 and this January 1 is a Thursday
// or before.
if (UNLIKELY(
dateTime.tm_mon == 11 && dateTime.tm_mday >= 29 &&
dateTime.tm_mday - isoWeekDay >= 31 - 3)) {
return 1900 + dateTime.tm_year + 1;
}
// The first few days in January may belong to the last year if they are
// in the same week as January 1 and January 1 is a Friday or after.
else if (UNLIKELY(
dateTime.tm_mon == 0 && dateTime.tm_mday <= 3 &&
isoWeekDay - (dateTime.tm_mday - 1) >= 5)) {
return 1900 + dateTime.tm_year - 1;
} else {
return 1900 + dateTime.tm_year;
}
}
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
auto dateTime = getDateTime(timestamp, this->timeZone_);
result = computeYearOfWeek(dateTime);
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
auto dateTime = getDateTime(date);
result = computeYearOfWeek(dateTime);
return true;
}
};
template <typename T>
struct HourFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getDateTime(timestamp, this->timeZone_).tm_hour;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = getDateTime(date).tm_hour;
return true;
}
FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<TimestampWithTimezone>& timestampWithTimezone) {
const auto milliseconds = *timestampWithTimezone.template at<0>();
Timestamp timestamp{milliseconds / kMillisecondsInSecond, 0UL};
timestamp.toTimezone(*timestampWithTimezone.template at<1>());
result = getDateTime(timestamp, nullptr).tm_hour;
}
};
template <typename T>
struct MinuteFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getDateTime(timestamp, this->timeZone_).tm_min;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = getDateTime(date).tm_min;
return true;
}
};
template <typename T>
struct SecondFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getDateTime(timestamp, nullptr).tm_sec;
return true;
}
FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Date>& date) {
result = getDateTime(date).tm_sec;
return true;
}
};
template <typename T>
struct MillisecondFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = timestamp.getNanos() / kNanosecondsInMillisecond;
return true;
}
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Date>& /*date*/) {
// Dates do not have millisecond granularity.
result = 0;
return true;
}
};
namespace {
inline std::optional<DateTimeUnit> fromDateTimeUnitString(
const StringView& unitString,
bool throwIfInvalid) {
static const StringView kMillisecond("millisecond");
static const StringView kSecond("second");
static const StringView kMinute("minute");
static const StringView kHour("hour");
static const StringView kDay("day");
static const StringView kMonth("month");
static const StringView kQuarter("quarter");
static const StringView kYear("year");
if (unitString == kMillisecond) {
return DateTimeUnit::kMillisecond;
}
if (unitString == kSecond) {
return DateTimeUnit::kSecond;
}
if (unitString == kMinute) {
return DateTimeUnit::kMinute;
}
if (unitString == kHour) {
return DateTimeUnit::kHour;
}
if (unitString == kDay) {
return DateTimeUnit::kDay;
}
if (unitString == kMonth) {
return DateTimeUnit::kMonth;
}
if (unitString == kQuarter) {
return DateTimeUnit::kQuarter;
}
if (unitString == kYear) {
return DateTimeUnit::kYear;
}
// TODO Add support for "week".
if (throwIfInvalid) {
VELOX_UNSUPPORTED("Unsupported datetime unit: {}", unitString);
}
return std::nullopt;
}
inline bool isTimeUnit(const DateTimeUnit unit) {
return unit == DateTimeUnit::kMillisecond || unit == DateTimeUnit::kSecond ||
unit == DateTimeUnit::kMinute || unit == DateTimeUnit::kHour;
}
inline bool isDateUnit(const DateTimeUnit unit) {
return unit == DateTimeUnit::kDay || unit == DateTimeUnit::kMonth ||
unit == DateTimeUnit::kQuarter || unit == DateTimeUnit::kYear;
}
inline std::optional<DateTimeUnit> getDateUnit(
const StringView& unitString,
bool throwIfInvalid) {
std::optional<DateTimeUnit> unit =
fromDateTimeUnitString(unitString, throwIfInvalid);
if (unit.has_value() && !isDateUnit(unit.value())) {
if (throwIfInvalid) {
VELOX_USER_FAIL("{} is not a valid DATE field", unitString);
}
return std::nullopt;
}
return unit;
}
} // namespace
template <typename T>
struct DateTruncFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
const date::time_zone* timeZone_ = nullptr;
std::optional<DateTimeUnit> unit_;
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Varchar>* unitString,
const arg_type<Timestamp>* /*timestamp*/) {
timeZone_ = getTimeZoneFromConfig(config);
if (unitString != nullptr) {
unit_ = fromDateTimeUnitString(*unitString, false /*throwIfInvalid*/);
VELOX_USER_CHECK(
!(unit_.has_value() && unit_.value() == DateTimeUnit::kMillisecond),
"{} is not a valid TIMESTAMP field",
*unitString);
}
}
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& /*config*/,
const arg_type<Varchar>* unitString,
const arg_type<Date>* /*date*/) {
if (unitString != nullptr) {
unit_ = getDateUnit(*unitString, false);
}
}
FOLLY_ALWAYS_INLINE void adjustDateTime(
std::tm& dateTime,
const DateTimeUnit& unit) {
switch (unit) {
case DateTimeUnit::kYear:
dateTime.tm_mon = 0;
dateTime.tm_yday = 0;
FMT_FALLTHROUGH;
case DateTimeUnit::kQuarter:
dateTime.tm_mon = dateTime.tm_mon / 3 * 3;
FMT_FALLTHROUGH;
case DateTimeUnit::kMonth:
dateTime.tm_mday = 1;
FMT_FALLTHROUGH;
case DateTimeUnit::kDay:
dateTime.tm_hour = 0;
FMT_FALLTHROUGH;
case DateTimeUnit::kHour:
dateTime.tm_min = 0;
FMT_FALLTHROUGH;
case DateTimeUnit::kMinute:
dateTime.tm_sec = 0;
break;
default:
VELOX_UNREACHABLE();
}
}
FOLLY_ALWAYS_INLINE bool call(
out_type<Timestamp>& result,
const arg_type<Varchar>& unitString,
const arg_type<Timestamp>& timestamp) {
DateTimeUnit unit;
if (unit_.has_value()) {
unit = unit_.value();
} else {
unit =
fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value();
VELOX_USER_CHECK(
unit != DateTimeUnit::kMillisecond,
"{} is not a valid TIMESTAMP field",
unitString);
}
if (unit == DateTimeUnit::kSecond) {
result = Timestamp(timestamp.getSeconds(), 0);
return true;
}
auto dateTime = getDateTime(timestamp, timeZone_);
adjustDateTime(dateTime, unit);
result = Timestamp(timegm(&dateTime), 0);
if (timeZone_ != nullptr) {
result.toGMT(*timeZone_);
}
return true;
}
FOLLY_ALWAYS_INLINE bool call(
out_type<Date>& result,
const arg_type<Varchar>& unitString,
const arg_type<Date>& date) {
DateTimeUnit unit = unit_.has_value()
? unit_.value()
: getDateUnit(unitString, true).value();
if (unit == DateTimeUnit::kDay) {
result = Date(date.days());
return true;
}
auto dateTime = getDateTime(date);
adjustDateTime(dateTime, unit);
result = Date(timegm(&dateTime) / kSecondsInDay);
return true;
}
};
template <typename T>
struct DateAddFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
const date::time_zone* sessionTimeZone_ = nullptr;
std::optional<DateTimeUnit> unit_ = std::nullopt;
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Varchar>* unitString,
const int64_t* /*value*/,
const arg_type<Timestamp>* /*timestamp*/) {
sessionTimeZone_ = getTimeZoneFromConfig(config);
if (unitString != nullptr) {
unit_ = fromDateTimeUnitString(*unitString, false /*throwIfInvalid*/);
}
}
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& /*config*/,
const arg_type<Varchar>* unitString,
const int64_t* /*value*/,
const arg_type<Date>* /*date*/) {
if (unitString != nullptr) {
unit_ = getDateUnit(*unitString, false);
}
}
FOLLY_ALWAYS_INLINE bool call(
out_type<Timestamp>& result,
const arg_type<Varchar>& unitString,
const int64_t value,
const arg_type<Timestamp>& timestamp) {
const auto unit = unit_.has_value()
? unit_.value()
: fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value();
if (value != (int32_t)value) {
VELOX_UNSUPPORTED("integer overflow");
}
if (LIKELY(sessionTimeZone_ != nullptr)) {
// sessionTimeZone not null means that the config
// adjust_timestamp_to_timezone is on.
Timestamp zonedTimestamp = timestamp;
zonedTimestamp.toTimezone(*sessionTimeZone_);
Timestamp resultTimestamp =
addToTimestamp(zonedTimestamp, unit, (int32_t)value);
if (isTimeUnit(unit)) {
const int64_t offset = static_cast<Timestamp>(timestamp).getSeconds() -
zonedTimestamp.getSeconds();
result = Timestamp(
resultTimestamp.getSeconds() + offset, resultTimestamp.getNanos());
} else {
resultTimestamp.toGMT(*sessionTimeZone_);
result = resultTimestamp;
}
} else {
result = addToTimestamp(timestamp, unit, (int32_t)value);
}
return true;
}
FOLLY_ALWAYS_INLINE bool call(
out_type<Date>& result,
const arg_type<Varchar>& unitString,
const int64_t value,
const arg_type<Date>& date) {
DateTimeUnit unit = unit_.has_value()
? unit_.value()
: getDateUnit(unitString, true).value();
if (value != (int32_t)value) {
VELOX_UNSUPPORTED("integer overflow");
}
result = addToDate(date, unit, (int32_t)value);
return true;
}
};
template <typename T>
struct DateDiffFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
const date::time_zone* sessionTimeZone_ = nullptr;
std::optional<DateTimeUnit> unit_ = std::nullopt;
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Varchar>* unitString,
const arg_type<Timestamp>* /*timestamp1*/,
const arg_type<Timestamp>* /*timestamp2*/) {
if (unitString != nullptr) {
unit_ = fromDateTimeUnitString(*unitString, false /*throwIfInvalid*/);
}
sessionTimeZone_ = getTimeZoneFromConfig(config);
}
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& /*config*/,
const arg_type<Varchar>* unitString,
const arg_type<Date>* /*date1*/,
const arg_type<Date>* /*date2*/) {
if (unitString != nullptr) {
unit_ = getDateUnit(*unitString, false);
}
}
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Varchar>& unitString,
const arg_type<Timestamp>& timestamp1,
const arg_type<Timestamp>& timestamp2) {
const auto unit = unit_.has_value()
? unit_.value()
: fromDateTimeUnitString(unitString, true /*throwIfInvalid*/).value();
if (LIKELY(sessionTimeZone_ != nullptr)) {
// sessionTimeZone not null means that the config
// adjust_timestamp_to_timezone is on.
Timestamp fromZonedTimestamp = timestamp1;
fromZonedTimestamp.toTimezone(*sessionTimeZone_);
Timestamp toZonedTimestamp = timestamp2;
if (isTimeUnit(unit)) {
const int64_t offset = static_cast<Timestamp>(timestamp1).getSeconds() -
fromZonedTimestamp.getSeconds();
toZonedTimestamp = Timestamp(
toZonedTimestamp.getSeconds() - offset,
toZonedTimestamp.getNanos());
} else {
toZonedTimestamp.toTimezone(*sessionTimeZone_);
}
result = diffTimestamp(unit, fromZonedTimestamp, toZonedTimestamp);
} else {
result = diffTimestamp(unit, timestamp1, timestamp2);
}
return true;
}
FOLLY_ALWAYS_INLINE bool call(
int64_t& result,
const arg_type<Varchar>& unitString,
const arg_type<Date>& date1,
const arg_type<Date>& date2) {
DateTimeUnit unit = unit_.has_value()
? unit_.value()
: getDateUnit(unitString, true).value();
result = diffDate(unit, date1, date2);
return true;
}
};
template <typename T>
struct DateFormatFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
const date::time_zone* sessionTimeZone_ = nullptr;
std::shared_ptr<DateTimeFormatter> mysqlDateTime_;
bool isConstFormat_ = false;
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Timestamp>* /*timestamp*/,
const arg_type<Varchar>* formatString) {
sessionTimeZone_ = getTimeZoneFromConfig(config);
if (formatString != nullptr) {
mysqlDateTime_ = buildMysqlDateTimeFormatter(
std::string_view(formatString->data(), formatString->size()));
isConstFormat_ = true;
}
}
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Timestamp>& timestamp,
const arg_type<Varchar>& formatString) {
if (!isConstFormat_) {
mysqlDateTime_ = buildMysqlDateTimeFormatter(
std::string_view(formatString.data(), formatString.size()));
}
auto formattedResult = mysqlDateTime_->format(timestamp, sessionTimeZone_);
auto resultSize = formattedResult.size();
result.resize(resultSize);
if (resultSize != 0) {
std::memcpy(result.data(), formattedResult.data(), resultSize);
}
return true;
}
};
template <typename T>
struct ParseDateTimeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
std::optional<JodaFormatter> format_;
std::optional<int64_t> sessionTzID_;
FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<Varchar>* /*input*/,
const arg_type<Varchar>* format) {
if (format != nullptr) {
format_.emplace(*format);
}
auto sessionTzName = config.sessionTimezone();
if (!sessionTzName.empty()) {
sessionTzID_ = util::getTimeZoneID(sessionTzName);
}
}
FOLLY_ALWAYS_INLINE bool call(
out_type<TimestampWithTimezone>& result,
const arg_type<Varchar>& input,
const arg_type<Varchar>& format) {
auto jodaResult = format_.has_value() ? format_->parse(input)
: JodaFormatter(format).parse(input);
// If timezone was not parsed, fallback to the session timezone. If there's
// no session timezone, fallback to 0 (GMT).
int16_t timezoneId = jodaResult.timezoneId != -1 ? jodaResult.timezoneId
: sessionTzID_.value_or(0);
jodaResult.timestamp.toGMT(timezoneId);
result = std::make_tuple(jodaResult.timestamp.toMillis(), timezoneId);
return true;
}
};
} // namespace facebook::velox::functions