in sql_utils/public/functions/date_time_util.cc [4416:4498]
absl::Status DateBucket(int32_t input_date,
bigquery_ml_utils::IntervalValue bucket_width,
int32_t origin_date, int32_t* output_date) {
if (bucket_width.get_micros() > 0 || bucket_width.get_nano_fractions() > 0) {
return MakeEvalError() << "DATE_BUCKET only supports bucket width INTERVAL "
"with MONTH and DAY parts";
}
if (bucket_width.get_months() < 0 || bucket_width.get_days() < 0) {
return MakeEvalError() << "DATE_BUCKET doesn't support negative bucket "
"width INTERVAL";
}
if ((bucket_width.get_months() > 0) == (bucket_width.get_days() > 0)) {
return MakeEvalError() << "DATE_BUCKET requires exactly one non-zero "
"INTERVAL part in bucket width";
}
// Here we branch out into handling MONTHs and DAY interval types.
// MONTH is special since it's a non-fixed interval, therefore it requires
// use of civil time library to perform all arithmetic on dates.
if (bucket_width.get_months() > 0) {
absl::CivilDay input_civil = EpochDaysToCivilDay(input_date);
absl::CivilDay origin_civil = EpochDaysToCivilDay(origin_date);
absl::CivilMonth input_month = absl::CivilMonth(input_civil);
absl::CivilMonth origin_month = absl::CivilMonth(origin_civil);
int64_t rem = (input_month - origin_month) % bucket_width.get_months();
absl::CivilMonth result = input_month - rem;
// We consider input and origin day to be equal when they both are the
// ends of the month, so when that happens we just set input day to the
// origin day, which we only use for comparison purposes.
int input_day = input_civil.day();
int origin_day = origin_civil.day();
if (input_day < origin_day &&
IsLastDayOfTheMonth(static_cast<int>(origin_civil.year()),
origin_civil.month(), origin_civil.day()) &&
IsLastDayOfTheMonth(static_cast<int>(input_civil.year()),
input_civil.month(), input_civil.day())) {
input_day = origin_day;
}
// Negative remainder indicates that input < origin. When input precedes
// origin we need shift the result backwards by one bucket.
//
// We also shift the result to the previous bucket when the input day is
// less than origin day. This compensates for the fact that when we did the
// math on CivilMonth we completely discarded days.
if (rem < 0 || (rem == 0 && input_day < origin_day)) {
result -= bucket_width.get_months();
}
// cast is safe, given method contract.
int year = static_cast<int32_t>(result.year());
int month = result.month();
int day = origin_civil.day();
// AdjustYearMonthDay takes care of handling last day of the month case: if
// the resulting month has fewer days than the origin's month, then the
// result day is the last day of the result month.
AdjustYearMonthDay(&year, &month, &day);
*output_date = CivilDayToEpochDays(absl::CivilDay(year, month, day));
} else {
int64_t bucket_size = bucket_width.get_days();
// Note that it's safe to cast the remainder to int32_t, since both input
// and origin are int32_t, therefore the result will never be larger than
// "input - origin", which is guaranteed to fit into 32 bits.
int32_t rem =
static_cast<int32_t>((input_date - origin_date) % bucket_size);
int32_t result = input_date - rem;
if (rem < 0) {
// Negative remainder indicates that input < origin. When input precedes
// origin we need shift the result backwards by one bucket.
result -= bucket_size;
}
*output_date = result;
}
if (ABSL_PREDICT_FALSE(!IsValidDate(*output_date))) {
std::string input_date_str;
SQL_RETURN_IF_ERROR(ConvertDateToString(input_date, &input_date_str));
return MakeEvalError() << "Bucket for " << input_date_str
<< " is outside of date range";
}
return absl::OkStatus();
}