sql_utils/public/civil_time.cc (315 lines of code) (raw):
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sql_utils/public/civil_time.h"
#include <cstdint>
#include <string>
#include "absl/base/casts.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "absl/time/civil_time.h"
#include "sql_utils/base/logging.h"
#include "sql_utils/base/mathutil.h"
namespace {
// With sub-second part stripped, Y-m-D H:M:S is encoded as the following:
// 6 5 4 3 2 1
// MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
// |--- year ---||m || D || H || M || S |
static const int kSecondShift = 0;
static const uint64_t kSecondMask = 0b111111;
static const int kMinuteShift = 6;
static const uint64_t kMinuteMask = 0b111111 << kMinuteShift;
static const int kHourShift = 12;
static const uint64_t kHourMask = 0b11111 << kHourShift;
static const int kDayShift = 17;
static const uint64_t kDayMask = 0b11111 << kDayShift;
static const int kMonthShift = 22;
static const uint64_t kMonthMask = 0b1111 << kMonthShift;
static const int kYearShift = 26;
static constexpr int64_t kNanosPerSecond = 1000000000;
inline bool IsValidNanoseconds(int64_t nanoseconds) {
return nanoseconds >= 0 && nanoseconds < kNanosPerSecond;
}
// A day is strictly 24 hours, an hour is 60 minutes and a minute is 60 seconds.
// Leap seconds are not allowed.
inline bool IsValidTimeFields(int64_t hour, int64_t minute, int64_t second,
int64_t nanosecond) {
return hour >= 0 && hour < 24 && minute >= 0 && minute < 60 && second >= 0 &&
second < 60 && IsValidNanoseconds(nanosecond);
}
// A day is strictly 24 hours, an hour is 60 minutes and a minute is 60 seconds.
// Leap seconds are not allowed.
inline bool IsValidDatetimeFields(int64_t year, int64_t month, int64_t day,
int64_t hour, int64_t minute, int64_t second,
int64_t nanosecond) {
// CivilDay helps to determine if the specified day is valid for the month,
// and in the case of February, the year.
const absl::CivilDay civil_day(year, month, day);
return year >= 1 && year <= 9999 && month >= 1 && month <= 12 && day >= 1 &&
day <= 31 && civil_day.day() == day &&
IsValidTimeFields(hour, minute, second, nanosecond);
}
inline int64_t GetPartFromBitField(uint64_t bit_field, uint64_t mask,
int shift) {
return absl::bit_cast<int64_t>((bit_field & mask) >> shift);
}
// Do not mask the bits when getting the largest part - hour for TimeValue and
// year for DatetimeValue - so that if there is anything on the higher unused
// bits, it will be carried to the largest part and make it out of valid range.
inline int64_t GetLargestPartFromBitField(uint64_t bit_field, int shift) {
return absl::bit_cast<int64_t>(bit_field >> shift);
}
// Unpack the hours, minutes, and seconds from a packed time representation.
void UnpackHMS(uint64_t bit_field, int64_t* h, int64_t* m, int64_t* s) {
*h = GetLargestPartFromBitField(bit_field, kHourShift);
*m = GetPartFromBitField(bit_field, kMinuteMask, kMinuteShift);
*s = GetPartFromBitField(bit_field, kSecondMask, kSecondShift);
}
// Normalize time parts by carrying any overage of the legal range of each part
// into adjacent fields. Hours are wrapped around the 24 hour clock, so
// hour 24 -> hour 0, hour 25 -> hour 1, hour -1 -> hour 23, etc.
void NormalizeTime(int32_t* h, int32_t* m, int32_t* s, int64_t* ns) {
int64_t carry_seconds =
bigquery_ml_utils_base::MathUtil::FloorOfRatio(*ns, kNanosPerSecond);
absl::CivilSecond cs(1970, 1, 1, *h, *m, *s);
cs += carry_seconds;
*h = cs.hour();
*m = cs.minute();
*s = cs.second();
*ns -= (carry_seconds * kNanosPerSecond);
// The CivilTime constructor should have coerced all the values to the
// appropriate range.
SQL_DCHECK(IsValidTimeFields(*h, *m, *s, *ns));
}
// Normalize date and time parts by carrying any overage of the legal range of
// each part into adjacent fields.
void NormalizeDatetime(int64_t* y, int32_t* mo, int32_t* d, int32_t* h,
int32_t* m, int32_t* s, int64_t* ns) {
int64_t carry_seconds =
bigquery_ml_utils_base::MathUtil::FloorOfRatio(*ns, kNanosPerSecond);
absl::CivilSecond cs(*y, *mo, *d, *h, *m, *s);
cs += carry_seconds;
*y = cs.year();
*mo = cs.month();
*d = cs.day();
*h = cs.hour();
*m = cs.minute();
*s = cs.second();
*ns -= (carry_seconds * kNanosPerSecond);
// The CivilTime constructor should have coerced all the time values to the
// appropriate range.
SQL_DCHECK(IsValidTimeFields(*h, *m, *s, *ns));
}
} // namespace
namespace bigquery_ml_utils {
static_assert(sizeof(TimeValue) <= 8, "TimeValue is larger than 8 bytes");
TimeValue::TimeValue()
: valid_(true), hour_(0), minute_(0), second_(0), nanosecond_(0) {}
TimeValue TimeValue::FromHMSAndNanos(int32_t hour, int32_t minute,
int32_t second, int32_t nanosecond) {
return FromHMSAndNanosInternal(hour, minute, second, nanosecond);
}
TimeValue TimeValue::FromHMSAndNanosNormalized(int32_t hour, int32_t minute,
int32_t second,
int32_t nanosecond) {
int64_t nanos64 = static_cast<int64_t>(nanosecond);
NormalizeTime(&hour, &minute, &second, &nanos64);
TimeValue ret = FromHMSAndNanosInternal(hour, minute, second, nanos64);
SQL_DCHECK(ret.IsValid());
return ret;
}
TimeValue TimeValue::FromHMSAndMicros(int32_t hour, int32_t minute,
int32_t second, int32_t microsecond) {
int64_t nanosecond = static_cast<int64_t>(microsecond) * 1000;
return FromHMSAndNanosInternal(hour, minute, second, nanosecond);
}
TimeValue TimeValue::FromHMSAndMicrosNormalized(int32_t hour, int32_t minute,
int32_t second,
int32_t microsecond) {
int64_t nanos64 = static_cast<int64_t>(microsecond) * 1000;
NormalizeTime(&hour, &minute, &second, &nanos64);
TimeValue ret = FromHMSAndNanosInternal(hour, minute, second, nanos64);
SQL_DCHECK(ret.IsValid());
return ret;
}
TimeValue TimeValue::FromHMSAndNanosInternal(int64_t hour, int64_t minute,
int64_t second,
int64_t nanosecond) {
TimeValue ret;
ret.valid_ = IsValidTimeFields(hour, minute, second, nanosecond);
if (ret.valid_) {
// These are narrowing casts. We know they do not lose information because
// IsValidTimeFields checks that the value ranges are appropriate.
ret.hour_ = static_cast<int8_t>(hour);
ret.minute_ = static_cast<int8_t>(minute);
ret.second_ = static_cast<int8_t>(second);
ret.nanosecond_ = static_cast<int32_t>(nanosecond);
} else {
// When TimeValue is invalid, also set hour to -1 to make it more likely
// the difference between an invalid and default initialized TimeValue will
// be noticed.
ret.hour_ = -1;
}
return ret;
}
TimeValue TimeValue::InternalFromPacked64SecondsAndNanos(
uint64_t bit_field_time_seconds, int64_t nanosecond) {
int64_t hour, minute, second;
UnpackHMS(bit_field_time_seconds, &hour, &minute, &second);
return FromHMSAndNanosInternal(hour, minute, second, nanosecond);
}
TimeValue TimeValue::FromPacked64Micros(int64_t bit_field_time_micros) {
uint64_t bit_field = absl::bit_cast<uint64_t>(bit_field_time_micros);
int64_t microsecond =
GetPartFromBitField(bit_field, kMicrosMask, /*shift=*/0);
// Cannot overflow because micros is less than 1 << 20.
SQL_DCHECK_LT(microsecond, 1 << 20);
int64_t nanosecond = microsecond * 1000;
return InternalFromPacked64SecondsAndNanos(bit_field >> kMicrosShift,
nanosecond);
}
TimeValue TimeValue::FromPacked64Nanos(int64_t bit_field_time_nanos) {
uint64_t bit_field = absl::bit_cast<uint64_t>(bit_field_time_nanos);
int64_t nanosecond = GetPartFromBitField(bit_field, kNanosMask, /*shift=*/0);
return InternalFromPacked64SecondsAndNanos(bit_field >> kNanosShift,
nanosecond);
}
TimeValue TimeValue::FromPacked32SecondsAndNanos(int32_t bit_field_time_seconds,
int32_t nanosecond) {
uint32_t bit_field = absl::bit_cast<uint32_t>(bit_field_time_seconds);
return InternalFromPacked64SecondsAndNanos(bit_field, nanosecond);
}
TimeValue TimeValue::FromPacked32SecondsAndMicros(
int32_t bit_field_time_seconds, int32_t microsecond) {
uint32_t bit_field = absl::bit_cast<uint32_t>(bit_field_time_seconds);
int64_t nanosecond = static_cast<int64_t>(microsecond) * 1000;
return InternalFromPacked64SecondsAndNanos(bit_field, nanosecond);
}
int32_t TimeValue::Packed32TimeSeconds() const {
return (hour_ << kHourShift) |
(minute_ << kMinuteShift) |
(second_ << kSecondShift);
}
int64_t TimeValue::Packed64TimeMicros() const {
return (static_cast<uint64_t>(Packed32TimeSeconds()) << kMicrosShift) |
Microseconds();
}
int64_t TimeValue::Packed64TimeNanos() const {
return (static_cast<uint64_t>(Packed32TimeSeconds()) << kNanosShift) |
nanosecond_;
}
std::string TimeValue::DebugString() const {
if (!IsValid()) {
return "[INVALID]";
}
std::string raw_output = absl::StrFormat("%02d:%02d:%02d.%09d", hour_,
minute_, second_, nanosecond_);
absl::string_view output(raw_output);
while (absl::ConsumeSuffix(&output, "000")) {
// Do nothing more
}
absl::ConsumeSuffix(&output, ".");
return std::string(output);
}
static_assert(sizeof(DatetimeValue) <= 12,
"DatetimeValue is larger than 12 bytes");
DatetimeValue::DatetimeValue()
: year_(1970),
month_(1),
day_(1),
hour_(0),
minute_(0),
second_(0),
valid_(true),
nanosecond_(0) {}
DatetimeValue DatetimeValue::FromYMDHMSAndNanosInternal(
int64_t year, int64_t month, int64_t day, int64_t hour, int64_t minute,
int64_t second, int64_t nanosecond) {
DatetimeValue ret;
ret.valid_ =
IsValidDatetimeFields(year, month, day, hour, minute, second, nanosecond);
if (ret.valid_) {
ret.year_ = static_cast<int16_t>(year);
ret.month_ = static_cast<int8_t>(month);
ret.day_ = static_cast<int8_t>(day);
ret.hour_ = static_cast<int8_t>(hour);
ret.minute_ = static_cast<int8_t>(minute);
ret.second_ = static_cast<int8_t>(second);
ret.nanosecond_ = static_cast<int32_t>(nanosecond);
} else {
// When DatetimeValue is invalid, also set year to -1 to make it more likely
// the difference between an invalid and default initialized DatetimeValue
// will be noticed.
ret.year_ = -1;
}
return ret;
}
DatetimeValue DatetimeValue::FromYMDHMSAndMicros(int32_t year, int32_t month,
int32_t day, int32_t hour,
int32_t minute, int32_t second,
int32_t microsecond) {
int64_t nanos64 = static_cast<int64_t>(microsecond) * 1000;
return FromYMDHMSAndNanosInternal(year, month, day, hour, minute, second,
nanos64);
}
DatetimeValue DatetimeValue::FromYMDHMSAndMicrosNormalized(
int32_t year, int32_t month, int32_t day, int32_t hour, int32_t minute,
int32_t second, int32_t microsecond) {
int64_t nanos64 = static_cast<int64_t>(microsecond) * 1000;
int64_t year64 = static_cast<int64_t>(year);
NormalizeDatetime(&year64, &month, &day, &hour, &minute, &second, &nanos64);
return FromYMDHMSAndNanosInternal(year64, month, day, hour, minute, second,
nanos64);
}
DatetimeValue DatetimeValue::FromYMDHMSAndNanos(int32_t year, int32_t month,
int32_t day, int32_t hour,
int32_t minute, int32_t second,
int32_t nanosecond) {
int64_t nanos64 = static_cast<int64_t>(nanosecond);
return FromYMDHMSAndNanosInternal(year, month, day, hour, minute, second,
nanos64);
}
DatetimeValue DatetimeValue::FromYMDHMSAndNanosNormalized(
int32_t year, int32_t month, int32_t day, int32_t hour, int32_t minute,
int32_t second, int32_t nanosecond) {
int64_t nanos64 = static_cast<int64_t>(nanosecond);
int64_t year64 = static_cast<int64_t>(year);
NormalizeDatetime(&year64, &month, &day, &hour, &minute, &second, &nanos64);
return FromYMDHMSAndNanosInternal(year64, month, day, hour, minute, second,
nanos64);
}
DatetimeValue DatetimeValue::FromCivilSecondAndMicros(
absl::CivilSecond civil_second, int32_t microsecond) {
int64_t nanos64 = static_cast<int64_t>(microsecond) * 1000;
return FromCivilSecondAndNanosInternal(civil_second, nanos64);
}
DatetimeValue DatetimeValue::FromCivilSecondAndNanos(
absl::CivilSecond civil_second, int32_t nanosecond) {
int64_t nanos64 = static_cast<int64_t>(nanosecond);
return FromCivilSecondAndNanosInternal(civil_second, nanos64);
}
DatetimeValue DatetimeValue::FromCivilSecondAndNanosInternal(
absl::CivilSecond civil_second, int64_t nanosecond) {
return FromYMDHMSAndNanosInternal(civil_second.year(), civil_second.month(),
civil_second.day(), civil_second.hour(),
civil_second.minute(),
civil_second.second(), nanosecond);
}
DatetimeValue DatetimeValue::FromPacked64SecondsAndMicros(
int64_t bit_field_datetime_seconds, int32_t microsecond) {
uint64_t bit_field = static_cast<uint64_t>(bit_field_datetime_seconds);
int64_t nanos64 = static_cast<int64_t>(microsecond) * 1000;
return FromPacked64SecondsAndNanosInternal(bit_field, nanos64);
}
DatetimeValue DatetimeValue::FromPacked64SecondsAndNanos(
int64_t bit_field_datetime_seconds, int32_t nanosecond) {
uint64_t bit_field = static_cast<uint64_t>(bit_field_datetime_seconds);
int64_t nanos64 = static_cast<int64_t>(nanosecond);
return FromPacked64SecondsAndNanosInternal(bit_field, nanos64);
}
DatetimeValue DatetimeValue::FromPacked64Micros(
int64_t bit_field_datetime_micros) {
uint64_t bit_field = static_cast<uint64_t>(bit_field_datetime_micros);
int64_t nanos64 = (bit_field & kMicrosMask) * 1000;
return FromPacked64SecondsAndNanosInternal(bit_field >> kMicrosShift,
nanos64);
}
DatetimeValue DatetimeValue::FromPacked64SecondsAndNanosInternal(
uint64_t bit_field, int64_t nanosecond) {
return FromYMDHMSAndNanosInternal(
GetLargestPartFromBitField(bit_field, kYearShift),
GetPartFromBitField(bit_field, kMonthMask, kMonthShift),
GetPartFromBitField(bit_field, kDayMask, kDayShift),
GetPartFromBitField(bit_field, kHourMask, kHourShift),
GetPartFromBitField(bit_field, kMinuteMask, kMinuteShift),
GetPartFromBitField(bit_field, kSecondMask, kSecondShift), nanosecond);
}
int64_t DatetimeValue::Packed64DatetimeSeconds() const {
return (static_cast<uint64_t>(year_) << kYearShift) |
(month_ << kMonthShift) | (day_ << kDayShift) | (hour_ << kHourShift) |
(minute_ << kMinuteShift) | (second_ << kSecondShift);
}
int64_t DatetimeValue::Packed64DatetimeMicros() const {
return static_cast<int64_t>(
(static_cast<uint64_t>(Packed64DatetimeSeconds()) << kMicrosShift) |
(nanosecond_ / 1000));
}
std::string DatetimeValue::DebugString() const {
if (!valid_) {
return "[INVALID]";
}
std::string raw_output =
absl::StrFormat("%04d-%02d-%02d %02d:%02d:%02d.%09d", year_, month_, day_,
hour_, minute_, second_, nanosecond_);
absl::string_view output(raw_output);
while (absl::ConsumeSuffix(&output, "000")) {
// Do nothing more
}
absl::ConsumeSuffix(&output, ".");
return std::string(output);
}
} // namespace bigquery_ml_utils