absl::Status DateBucket()

in sql_utils/public/functions/date_time_util.cc [4416:4498]


absl::Status DateBucket(int32_t input_date,
                        bigquery_ml_utils::IntervalValue bucket_width,
                        int32_t origin_date, int32_t* output_date) {
  if (bucket_width.get_micros() > 0 || bucket_width.get_nano_fractions() > 0) {
    return MakeEvalError() << "DATE_BUCKET only supports bucket width INTERVAL "
                              "with MONTH and DAY parts";
  }
  if (bucket_width.get_months() < 0 || bucket_width.get_days() < 0) {
    return MakeEvalError() << "DATE_BUCKET doesn't support negative bucket "
                              "width INTERVAL";
  }
  if ((bucket_width.get_months() > 0) == (bucket_width.get_days() > 0)) {
    return MakeEvalError() << "DATE_BUCKET requires exactly one non-zero "
                              "INTERVAL part in bucket width";
  }

  // Here we branch out into handling MONTHs and DAY interval types.
  // MONTH is special since it's a non-fixed interval, therefore it requires
  // use of civil time library to perform all arithmetic on dates.
  if (bucket_width.get_months() > 0) {
    absl::CivilDay input_civil = EpochDaysToCivilDay(input_date);
    absl::CivilDay origin_civil = EpochDaysToCivilDay(origin_date);
    absl::CivilMonth input_month = absl::CivilMonth(input_civil);
    absl::CivilMonth origin_month = absl::CivilMonth(origin_civil);
    int64_t rem = (input_month - origin_month) % bucket_width.get_months();
    absl::CivilMonth result = input_month - rem;

    // We consider input and origin day to be equal when they both are the
    // ends of the month, so when that happens we just set input day to the
    // origin day, which we only use for comparison purposes.
    int input_day = input_civil.day();
    int origin_day = origin_civil.day();
    if (input_day < origin_day &&
        IsLastDayOfTheMonth(static_cast<int>(origin_civil.year()),
                            origin_civil.month(), origin_civil.day()) &&
        IsLastDayOfTheMonth(static_cast<int>(input_civil.year()),
                            input_civil.month(), input_civil.day())) {
      input_day = origin_day;
    }

    // Negative remainder indicates that input < origin. When input precedes
    // origin we need shift the result backwards by one bucket.
    //
    // We also shift the result to the previous bucket when the input day is
    // less than origin day. This compensates for the fact that when we did the
    // math on CivilMonth we completely discarded days.
    if (rem < 0 || (rem == 0 && input_day < origin_day)) {
      result -= bucket_width.get_months();
    }

    // cast is safe, given method contract.
    int year = static_cast<int32_t>(result.year());
    int month = result.month();
    int day = origin_civil.day();
    // AdjustYearMonthDay takes care of handling last day of the month case: if
    // the resulting month has fewer days than the origin's month, then the
    // result day is the last day of the result month.
    AdjustYearMonthDay(&year, &month, &day);
    *output_date = CivilDayToEpochDays(absl::CivilDay(year, month, day));
  } else {
    int64_t bucket_size = bucket_width.get_days();
    // Note that it's safe to cast the remainder to int32_t, since both input
    // and origin are int32_t, therefore the result will never be larger than
    // "input - origin", which is guaranteed to fit into 32 bits.
    int32_t rem =
        static_cast<int32_t>((input_date - origin_date) % bucket_size);
    int32_t result = input_date - rem;
    if (rem < 0) {
      // Negative remainder indicates that input < origin. When input precedes
      // origin we need shift the result backwards by one bucket.
      result -= bucket_size;
    }
    *output_date = result;
  }

  if (ABSL_PREDICT_FALSE(!IsValidDate(*output_date))) {
    std::string input_date_str;
    SQL_RETURN_IF_ERROR(ConvertDateToString(input_date, &input_date_str));
    return MakeEvalError() << "Bucket for " << input_date_str
                           << " is outside of date range";
  }
  return absl::OkStatus();
}