sql_utils/public/civil_time.h (115 lines of code) (raw):
/*
* Copyright 2023 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_CIVIL_TIME_H_
#define THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_CIVIL_TIME_H_
#include <cstdint>
#include <string>
#include <cstdint>
#include "absl/time/civil_time.h"
namespace bigquery_ml_utils {
// The TimeValue and DatetimeValue classes below are used to represent TIME
// and DATETIME values. These wrapper classes include conversion methods to
// convert the values to/from an integer bitfield encoding.
//
// The valid range and number of bits required by each date/time field is as the
// following:
//
// Field Range #bigquery_ml_utils_base::Bits
// Year [1, 9999] 14
// Month [1, 12] 4
// Day [1, 31] 5
// Hour [0, 23] 5
// Minute [0, 59] 6
// Second [0, 59]* 6
// Micros [0, 999999] 20
// Nanos [0, 999999999] 30
//
// * Leap second is not supported.
//
// Generally, when encoding the TIME or DATETIME into a bit field, larger
// date/time field is on the more significant side.
//
//
// At whole-second precision:
//
// TIME values, containing hour/minute/second, are encoded into a 4-byte
// bit fields as the following:
// 3 2 1
// MSB 10987654321098765432109876543210 LSB
// | H || M || S |
//
// DATETIME values, containing year/month/day/hour/minute/second, are
// encoded into 8-byte bit fields as the following:
// 6 5 4 3 2 1
// MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
// |--- year ---||m || D || H || M || S |
//
//
// At microsecond precision:
//
// TIME values, containing hour/minute/second/micros, are encoded into 8-byte
// bit fields as the following:
// 6 5 4 3 2 1
// MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
// | H || M || S ||-------micros-----|
//
// DATETIME values, containing year/month/day/hour/minute/second/micros, are
// encoded into 8-byte bit fields as the following:
// 6 5 4 3 2 1
// MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
// |--- year ---||m || D || H || M || S ||-------micros-----|
//
//
// At nanosecond precision:
//
// TIME values, containing hour/minute/second/nanos, are encoded into 8-byte
// bit fields as the following:
// 6 5 4 3 2 1
// MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
// | H || M || S ||---------- nanos -----------|
//
// However, DATETIME values with nanosecond precision cannot fit into 8-byte
// bit fields, thus there's no encoding method for it.
// A struct for TIME data type, keeping H:M:S.D* within 24 hours.
//
// Valid range is [00:00:00, 24:00:00)
//
// Each time part also has a valid range:
// * Hour: [00, 24)
// * Minute: [00, 60)
// * Second: [00, 60)
// * Nanosecond: [00, 1000000000)
//
// This class does not support leap seconds, so 60 is considered out of range
// for the Second part.
//
// When the TimeValue is invalid, the time parts stored in the TimeValue
// instance are undefined.
//
// Internally this class always keeps sub-seconds with nanosecond precision,
// but it can be constructed and used with microsecond if desired.
//
// Some factory methods perform normalization of time parts that are out of
// range. The normalization will coerce each field within its valid range, and
// adjust adjacent fields to accommodate any relevant carry or borrow. Values
// will wrap around within 24 hours during normalization.
//
// For example:
// // no special handling for leap second
// 12:34:60 -> 12:35:00
// 12:34:-01 -> 12:33:59
// 12:34:60::123456789 -> 12:35:00::123456789
// 12:34:60::-000000001 -> 12:34:59::999999999
//
class TimeValue {
public:
// Construct a valid TimeValue initialized to midnight: 00:00:00.000000000.
TimeValue();
// Constructs a TimeValue with hour, minute, second and microseconds.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromHMSAndMicros(int32_t hour, int32_t minute,
int32_t second, int32_t microsecond);
// Like FromHMSAndMicros but will normalize any time parts outside of their
// expected range and thus always returns a valid TimeValue.
static TimeValue FromHMSAndMicrosNormalized(int32_t hour, int32_t minute,
int32_t second,
int32_t microsecond);
// Construct a TimeValue with a bit field encoding hour/minute/second, and
// another integer for micros.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromPacked32SecondsAndMicros(int32_t bit_field_time_seconds,
int32_t microsecond);
// Construct a TimeValue with a bit field encoding
// hour/minute/second/micros.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromPacked64Micros(int64_t bit_field_time_micros);
// Construct a TimeValue with hour, minute, second and nanoseconds.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromHMSAndNanos(int32_t hour, int32_t minute, int32_t second,
int32_t nanosecond);
// Like FromHMSAndNanos but will normalize any time parts outside of their
// expected range and thus always returns a valid TimeValue.
static TimeValue FromHMSAndNanosNormalized(int32_t hour, int32_t minute,
int32_t second,
int32_t nanosecond);
// Construct a TimeValue with a bit field encoding hour/minute/second, and
// another integer for nanoseconds.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromPacked32SecondsAndNanos(int32_t bit_field_time_seconds,
int32_t nanosecond);
// Construct a TimeValue with a bit field encoding hour/minute/second/nanos.
//
// Returns an invalid TimeValue if any time part is outside the valid range.
static TimeValue FromPacked64Nanos(int64_t bit_field_time_nanos);
// Return a debug string like:
// "03:04:05.123456789"
// where trailing 000's in the sub-second part will be trimmed, and if
// sub-second part is 0, the trailing . will also be trimmed.
std::string DebugString() const;
// A TimeValue is invalid when one of the time parts (e.g. hours, minutes,
// etc) supplied to a non-normalized factory function is outside the specified
// range.
bool IsValid() const { return valid_; }
int Hour() const { return hour_; }
int Minute() const { return minute_; }
int Second() const { return second_; }
// Truncation will be applied when getting the sub-seconds at micros
// precision. For example, for a TimeValue 01:02:03.123456789,
// getting sub-seconds at micros precision will return 123456.
int Microseconds() const { return nanosecond_ / 1000; }
int Nanoseconds() const { return nanosecond_; }
// Pack the hour/minute/second into a bit field.
int32_t Packed32TimeSeconds() const;
// Pack the hour/minute/second/micros into a bit field.
int64_t Packed64TimeMicros() const;
// Pack the hour/minute/second/nanos into a bit field.
int64_t Packed64TimeNanos() const;
private:
static TimeValue FromHMSAndNanosInternal(int64_t hour, int64_t minute,
int64_t second, int64_t nanosecond);
static TimeValue InternalFromPacked64SecondsAndNanos(
uint64_t bit_field_time_seconds, int64_t nanosecond);
bool valid_;
int8_t hour_;
int8_t minute_;
int8_t second_;
int32_t nanosecond_;
// Copyable
};
// A struct for DATETIME data type, keeping Y-m-d H:M:S.D*.
// Valid range is [0001-01-01 00:00:00, 10000-01-01 00:00:00)
// Leap second is not allowed, so 60 in the SECOND field is considered invalid.
// Internally this class always keeps sub-seconds with nanosecond precision,
// but it can be constructed and used with microsecond if desired.
//
// Some factory functions perform normalization of date or time parts that are
// out of range. The normalization will coerce each field within its valid
// range, and adjust adjacent fields to accommodate any relevant carry or
// borrow. If the years field is out of range after other fields are normalized
// then the produced DatetimeValue is invalid.
//
// For example:
// 2015-11-09 12:34:60 -> 2015-11-09 12:35:00 // leap second
// 2015-11-09 12:34:70 -> 2015-11-09 12:35:10
// 2015-11-09 -5:34:70 -> 2015-11-08 19:35:10
// 2015-02-29 12:34:56 -> 2015-03-01 12:34:56
// 2015-12-31 23:59:60 -> 2016-01-01 00:00:00 // leap second
//
// 0 or negative value is not valid for day-of-month, and it's normalized to
// the day(s) before the first day of the month.
// 2015-11-00 12:34:56 -> 2015-10-31 12:34:56
// 2015-11--1 12:34:56 -> 2015-10-30 12:34:56
// 2015-11-01 -5:23:56 -> 2015-10-31 19:34:56
//
// 0 or negative value is not valid for month, and it's normalized to
// the month(s) before the first month of the year
// 2015-00-15 12:34:56 -> 2014-12-15 12:34:56
//
// It's possible for the day-of-month to be invalid for the month after
// month normalization, then it will be normalized again to adjust the month
// and day-of-month.
// 2015--1-31 12:34:56 -> 2014-11-31 12:34:56 -> 2014-12-01 12:34:56
//
// It's possible for the normalization result to be out-of-range and become
// invalid.
// 9999-12-31 23:59:60 -> 10000-01-01 00:00:00 // invalid result
//
// Note that when parsing a DatetimeValue value from a string (using
// functions/date_time_util.h), there is a special case for leap seconds.
// Literal times with second :60 will have their subsecond part truncated to
// preserve time ordering as closely as possible.
class DatetimeValue {
public:
// Default constructor, constructing an object representing
// 1970-01-01 00:00:00.000000000.
DatetimeValue();
// Construct a DatetimeValue with year, month, day, hour, minute, second and
// microseconds.
static DatetimeValue FromYMDHMSAndMicros(int32_t year, int32_t month,
int32_t day, int32_t hour,
int32_t minute, int32_t second,
int32_t microsecond);
// Like FromYMDHMSAndMicros but values are normalized.
static DatetimeValue FromYMDHMSAndMicrosNormalized(
int32_t year, int32_t month, int32_t day, int32_t hour, int32_t minute,
int32_t second, int32_t microsecond);
// Construct a DatetimeValue with a absl::CivilSecond object and an
// integer for micros.
static DatetimeValue FromCivilSecondAndMicros(absl::CivilSecond civil_second,
int32_t microsecond);
// Construct a DatetimeValue with a bit field encoding
// year/month/day/hour/minute/second, and another integer for micros.
static DatetimeValue FromPacked64SecondsAndMicros(
int64_t bit_field_datetime_seconds, int32_t microsecond);
// Construct a DatetimeValue with a bit field encoding
// year/month/day/hour/minute/second/micros.
static DatetimeValue FromPacked64Micros(int64_t bit_field_datetime_micros);
// Construct a DatetimeValue with year, month, day, hour, minute, second and
// nanoseconds.
static DatetimeValue FromYMDHMSAndNanos(int32_t year, int32_t month,
int32_t day, int32_t hour,
int32_t minute, int32_t second,
int32_t nanosecond);
// Like FromYMDHMSAndNanos but values are normalized.
static DatetimeValue FromYMDHMSAndNanosNormalized(int32_t year, int32_t month,
int32_t day, int32_t hour,
int32_t minute,
int32_t second,
int32_t nanosecond);
// Construct a DatetimeValue with a absl::CivilSecond object and an
// integer for nanos.
static DatetimeValue FromCivilSecondAndNanos(absl::CivilSecond civil_second,
int32_t nanosecond);
// Construct a DatetimeValue with a bit field encoding
// year/month/day/hour/minute/second, and another integer for nanos.
static DatetimeValue FromPacked64SecondsAndNanos(
int64_t bit_field_datetime_seconds, int32_t nanosecond);
// It's impossible to encode all fields for Datetime with nano precision in a
// single 8 byte integer, so there is no factory function for building a
// DatetimeValue from a single int64_t.
// Return a debug string like:
// "2006-01-02 03:04:05.123456789"
// where trailing 000's in the sub-second part will be trimmed, and if
// sub-second part is 0, the trailing . will also be trimmed.
//
// Invalid value will return "[INVALID]".
std::string DebugString() const;
// A DatetimeValue is invalid when one of the date or time parts (e.g. year,
// hour, etc) supplied to a non-normalized factory function is outside the
// specified range, or if normailization forces the year part out of range.
bool IsValid() const { return valid_; }
int Year() const { return year_; }
int Month() const { return month_; }
int Day() const { return day_; }
int Hour() const { return hour_; }
int Minute() const { return minute_; }
int Second() const { return second_; }
// Truncation will be applied when getting the sub-seconds at micros
// precision. For example, for a TimeValue 01:02:03.123456789,
// getting sub-seconds at micros precision will return 123456.
int Microseconds() const { return nanosecond_ / 1000; }
int Nanoseconds() const { return nanosecond_; }
// Pack the year/month/day/hour/minute/second into a bit field.
int64_t Packed64DatetimeSeconds() const;
// Pack the year/month/day/hour/minute/second/micros into a bit field.
int64_t Packed64DatetimeMicros() const;
// It's impossible to encode all fields for Datetime with nano precision in a
// single 8 byte integer, so there is no packing function for that.
absl::CivilSecond ConvertToCivilSecond() const {
return absl::CivilSecond(year_, month_, day_, hour_, minute_, second_);
}
private:
static DatetimeValue FromYMDHMSAndNanosInternal(int64_t year, int64_t month,
int64_t day, int64_t hour,
int64_t minute,
int64_t second,
int64_t nanosecond);
static DatetimeValue FromCivilSecondAndNanosInternal(
absl::CivilSecond civil_second, int64_t nanosecond);
static DatetimeValue FromPacked64SecondsAndNanosInternal(uint64_t bit_field,
int64_t nanosecond);
int16_t year_;
int8_t month_;
int8_t day_;
int8_t hour_;
int8_t minute_;
int8_t second_;
bool valid_;
int32_t nanosecond_;
// Copyable
};
// Masks of micros and nanos are always used on the least significant bits.
static const unsigned int kMicrosMask = 0xFFFFF; // 20 bits
static const int kMicrosShift = 20;
static const unsigned int kNanosMask = 0x3FFFFFFF; // 30 bits
static const int kNanosShift = 30;
} // namespace bigquery_ml_utils
#endif // THIRD_PARTY_PY_BIGQUERY_ML_UTILS_SQL_UTILS_PUBLIC_CIVIL_TIME_H_