absl::Status ParseTimeWithFormatElements()

in sql_utils/public/functions/cast_date_time.cc [757:1091]


absl::Status ParseTimeWithFormatElements(
    const std::vector<DateTimeFormatElement>& format_elements,
    absl::string_view timestamp_string, const absl::TimeZone default_timezone,
    const absl::Time current_timestamp, TimestampScale scale,
    absl::Time* timestamp) {
  // The number of format elements from <format_elements> that have been
  // successfully processed so far.
  size_t processed_format_element_count = 0;
  // The number of characters of <timestamp_string> that have been successfully
  // parsed so far.
  size_t timestamp_str_parsed_length = 0;

  absl::TimeZone::CivilInfo now_info = default_timezone.At(current_timestamp);
  absl::CivilSecond cs_now = now_info.cs;

  int year = static_cast<int>(cs_now.year());
  int month = cs_now.month();
  int mday = 1;
  int hour = 0;
  int min = 0;
  int sec = 0;
  int hour_in_12_hour_clock = 0;
  bool afternoon = false;
  absl::Duration subseconds = absl::ZeroDuration();

  // Indicates whether TZH or TZM appears in the format string.
  bool timezone_specified_in_format = false;

  bool positive_timezone_offset = true;
  int timezone_offset_hour = 0;
  int timezone_offset_min = 0;

  bool error_in_parsing = false;
  SQL_ASSIGN_OR_RETURN(const std::vector<DigitCountRange> digit_count_ranges,
                       ComputeDigitCountRanges(format_elements));

  // Skips leading whitespaces.
  timestamp_str_parsed_length +=
      TrimLeadingUnicodeWhiteSpaces(timestamp_string);
  while (!error_in_parsing &&
         timestamp_str_parsed_length < timestamp_string.size() &&
         processed_format_element_count < format_elements.size()) {
    size_t parsed_length = absl::string_view::npos;
    absl::string_view timestamp_str_to_parse =
        timestamp_string.substr(timestamp_str_parsed_length);
    const DateTimeFormatElement& format_element =
        format_elements[processed_format_element_count];
    DigitCountRange digit_count_range =
        digit_count_ranges[processed_format_element_count];

    switch (format_element.type) {
      case FormatElementType::kSimpleLiteral:
      case FormatElementType::kDoubleQuotedLiteral:
        parsed_length = ParseStringByExactMatch(timestamp_str_to_parse,
                                                format_element.literal_value);
        break;
      case FormatElementType::kWhitespace:
        // Format element of "kWhitespace" type matches 1 or more Unicode
        // whitespaces.
        parsed_length = TrimLeadingUnicodeWhiteSpaces(timestamp_str_to_parse);
        if (parsed_length == 0) {
          // Matches 0 Unicode whitespace, so we set <error_in_parsing> to true
          // to indicate an error.
          error_in_parsing = true;
        }
        break;
      // Parses for entire year value. For example, for input string "1234", the
      // output <year> is 1234.
      case FormatElementType::kYYYY:
      case FormatElementType::kRRRR:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max,
                                 /*min=*/0, /*max=*/10000, &year);
        break;
      // Parses for the last 3/2/1 digits of the year value depending on the
      // length of the element. For example, assuming <current_year> is 1970:
      //   - for input "123", the output <year> with "YYY" is 1123,
      //   - for input "12", the output <year> with "YY" is 1912,
      //   - for input "1", the output <year> with "Y" is 1971.
      case FormatElementType::kYYY:
      case FormatElementType::kYY:
      case FormatElementType::kY: {
        int element_length = format_element.len_in_format_str;
        SQL_RET_CHECK(element_length >= 0 &&
                      element_length < ABSL_ARRAYSIZE(powers_of_ten));
        int element_length_power_of_ten =
            static_cast<int>(powers_of_ten[element_length]);
        int parsed_year_part;
        parsed_length = ParseInt(
            timestamp_str_to_parse, /*min_width=*/digit_count_range.min,
            /*max_width=*/digit_count_range.max, /*min=*/0,
            /*max=*/element_length_power_of_ten - 1, &parsed_year_part);
        if (parsed_length != absl::string_view::npos) {
          year = year - year % element_length_power_of_ten + parsed_year_part;
        }
        break;
      }
      // Parses for the last 2 digit of the year value. The first 2 digits
      // of the output can be different from that of current year (more
      // details at (broken link)).
      // For example, if the current year is 2002:
      //   - for input "12", the output <year>  is 2012,
      //   - for input "51", the output <year>  is 1951.
      // If the current year is 2299,
      //   - for input "12", the output <year> is 2312,
      //   - for input "51", thr output <year> is 2251.
      case FormatElementType::kRR: {
        parsed_length = ParseWithFormatElementOfTypeRR(
            timestamp_str_to_parse,
            /*current_year=*/year, digit_count_range, &year);
        break;
      }
      // Parses for entire year value with a string in pattern "X,XXX" or
      // "XX,XXX". For example,
      //   - for input "1,234", the output <year> is 1234,
      //   - for input "10,000", the output <year> is 10000.
      case FormatElementType::kYCommaYYY:
        parsed_length = ParseWithFormatElementOfTypeYCommaYYY(
            timestamp_str_to_parse, &year);
        break;
      // Parses for month value 1-12. For example, for input "11", the output
      // <month> is 11.
      case FormatElementType::kMM:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/1,
                                 /*max=*/12, &month);
        break;
      // Parses abbreviated month names with "MON" element and full month
      // names with "MONTH" element. The parsing is case-insensitive.
      // For example,
      //   - for input "Jan"/"jAN", the output <month> with "MON" is 1,
      //   - for input "JUNE"/"juNe", the output <month> with "MONTH" is 6.
      case FormatElementType::kMON:
      case FormatElementType::kMONTH:
        parsed_length = ParseMonthNames(
            timestamp_str_to_parse,
            /*abbreviated=*/format_element.type == FormatElementType::kMON,
            &month);
        break;
      // Parses for day of month value. For example, for input "20", the
      // output <mday> is 20.
      case FormatElementType::kDD:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/1,
                                 /*max=*/31, &mday);
        break;
      // kHH/kHH12 and kAMWithDots/kPMWithDots are used to parse hour value
      // of a 12-hour clock. The matching for meridian indicator part is
      // case-insensitive. For example,
      //   - if input for kHH/kHH12 is "11" and input for
      //     kAMWithDots/kPMWithDots is "A.M."/"A.m.", the output <hour> is 11.
      //   - if input for kHH/kHH12 is "12" and input for
      //     kAMWithDots/kPMWithDots is "a.M."/"a.m.", the output <hour> is 0.
      // string "11", the hour value in the result 12-hour clock is 11.
      case FormatElementType::kHH:
      case FormatElementType::kHH12:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/1,
                                 /*max=*/12, &hour_in_12_hour_clock);
        break;
      case FormatElementType::kAMWithDots:
      case FormatElementType::kPMWithDots: {
        ParseWithCandidatesResult parse_result = ParseStringWithCandidates(
            timestamp_str_to_parse, {"A.M.", "P.M."}, /*ignore_case=*/true);
        parsed_length = parse_result.parsed_length;
        if (parsed_length != absl::string_view::npos) {
          afternoon = (parse_result.matched_candidate_index == 1);
        }
        break;
      }
      // Parses for hour value in a 24-hour clock. For example, for input "12",
      // the output <hour> is 12.
      case FormatElementType::kHH24:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/0,
                                 /*max=*/23, &hour);
        break;
      // Parses for minute value 0-59. For example, for input "20", the output
      // <min> is 20.
      case FormatElementType::kMI:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/0,
                                 /*max=*/59, &min);
        break;
      // Parses for second value 0-59. For example, for input "30", the output
      // <sec> is 30.
      case FormatElementType::kSS:
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/0,
                                 /*max=*/59, &sec);
        break;
      // Parses for number of seconds past midnight 0 ~ 2400*60*60-1. For
      // example, for input "3662", the output <hour>, <min> and <sec> are 1, 1,
      // 2 respectively (since 3660 seconds past midnight corresponds to time
      // "01:01:02").
      case FormatElementType::kSSSSS: {
        int sec_of_day;
        parsed_length =
            ParseInt(timestamp_str_to_parse,
                     /*min_width=*/digit_count_range.min,
                     /*max_width=*/digit_count_range.max, /*min=*/0,
                     /*max=*/kNaiveNumSecondsPerDay - 1, &sec_of_day);
        if (parsed_length != absl::string_view::npos) {
          hour = sec_of_day / kNaiveNumSecondsPerHour;
          min = (sec_of_day % kNaiveNumSecondsPerHour) /
                kNaiveNumSecondsPerMinute;
          sec = sec_of_day % kNaiveNumSecondsPerMinute;
        }
        break;
      }
      // Parses for subsecond value. Additional digits beyond the input <scale>
      // are truncated (6 for micros, 9 for nanos). For example,
      //   - for input "123", the output subsecond with "FF3" is 123.
      //   - for input "1234567", the output subsecond with "FF7" is 123456
      //     under micros scale, or 1234567 under nano scale.
      case FormatElementType::kFFN: {
        SQL_RET_CHECK(format_element.subsecond_digit_count > 0 &&
                      format_element.subsecond_digit_count <= 9);
        parsed_length = ParseSubSeconds(timestamp_str_to_parse,
                                        /*min_width=*/digit_count_range.min,
                                        /*max_width=*/digit_count_range.max,
                                        scale, &subseconds);
        break;
      }
      // Parses for the sign and hour value of the time zone offset. For
      // example,
      //   - for input "+10"/" 10", the sign and hour value of output time zone
      //     are "+10".
      //   - for input "-09", the sign and hour value of output time zone are
      //     "-09".
      case FormatElementType::kTZH: {
        timezone_specified_in_format = true;
        parsed_length = ParseWithFormatElementOfTypeTZH(
            timestamp_str_to_parse, digit_count_range,
            &positive_timezone_offset, &timezone_offset_hour);
        break;
      }
      // Parses for the minute value of the time zone offset. For example, for
      // input "13", the minute value of output time zone is 13.
      case FormatElementType::kTZM:
        timezone_specified_in_format = true;
        parsed_length = ParseInt(timestamp_str_to_parse,
                                 /*min_width=*/digit_count_range.min,
                                 /*max_width=*/digit_count_range.max, /*min=*/0,
                                 /*max=*/59, &timezone_offset_min);
        break;
      default:
        break;
    }

    if (parsed_length == absl::string_view::npos) {
      // If <parsed_length> is absl::string_view::npos, we set
      // <error_in_parsing> to be true to indicate an error.
      error_in_parsing = true;
    }

    if (!error_in_parsing) {
      // We successfully processed a format element, so update the number of
      // elements and characters processed.
      processed_format_element_count++;
      timestamp_str_parsed_length += parsed_length;
    }
  }

  if (error_in_parsing) {
    return MakeEvalError()
           << "Failed to parse input timestamp string at "
           << timestamp_str_parsed_length << " with format element "
           << format_elements[processed_format_element_count].ToString();
  }

  // Skips any remaining whitespace.
  timestamp_str_parsed_length += TrimLeadingUnicodeWhiteSpaces(
      timestamp_string.substr(timestamp_str_parsed_length));

  // Skips trailing empty format elements {kDoubleQuotedLiteral, ""} which match
  // "" in input string.
  while (
      processed_format_element_count < format_elements.size() &&
      format_elements[processed_format_element_count].type ==
          FormatElementType::kDoubleQuotedLiteral &&
      format_elements[processed_format_element_count].literal_value.empty()) {
    processed_format_element_count++;
  }

  if (timestamp_str_parsed_length < timestamp_string.size()) {
    return MakeEvalError() << "Illegal non-space trailing data '"
                           << timestamp_string.substr(
                                  timestamp_str_parsed_length)
                           << "' in timestamp string";
  }

  if (processed_format_element_count < format_elements.size()) {
    return MakeEvalError()
           << "Entire timestamp string has been parsed before dealing with"
           << " format element "
           << format_elements[processed_format_element_count].ToString();
  }

  // Calculates the <hour> in 24-hour clock if hour value of a 12-hour clock is
  // parsed.
  if (hour_in_12_hour_clock != 0) {
    hour = hour_in_12_hour_clock % 12 + (afternoon ? 12 : 0);
  }

  const absl::CivilSecond cs(year, month, mday, hour, min, sec);
  // absl::CivilSecond will 'normalize' its arguments, so we simply compare
  // the input against the result to check whether a YMD is valid.
  if (cs.year() != year || cs.month() != month || cs.day() != mday) {
    return MakeEvalError()
           << "Invalid result from year, month, day values after parsing";
  }

  absl::TimeZone timezone;
  if (timezone_specified_in_format) {
    SQL_RETURN_IF_ERROR(MakeTimeZone(
        absl::StrFormat("%c%02d%02d", positive_timezone_offset ? '+' : '-',
                        timezone_offset_hour, timezone_offset_min),
        &timezone));
  } else {
    timezone = default_timezone;
  }
  *timestamp = timezone.At(cs).pre + subseconds;
  if (!IsValidTime(*timestamp)) {
    return MakeEvalError() << "The parsing result is out of valid time range";
  }
  return absl::OkStatus();
}