in sql_utils/public/functions/cast_date_time.cc [757:1091]
absl::Status ParseTimeWithFormatElements(
const std::vector<DateTimeFormatElement>& format_elements,
absl::string_view timestamp_string, const absl::TimeZone default_timezone,
const absl::Time current_timestamp, TimestampScale scale,
absl::Time* timestamp) {
// The number of format elements from <format_elements> that have been
// successfully processed so far.
size_t processed_format_element_count = 0;
// The number of characters of <timestamp_string> that have been successfully
// parsed so far.
size_t timestamp_str_parsed_length = 0;
absl::TimeZone::CivilInfo now_info = default_timezone.At(current_timestamp);
absl::CivilSecond cs_now = now_info.cs;
int year = static_cast<int>(cs_now.year());
int month = cs_now.month();
int mday = 1;
int hour = 0;
int min = 0;
int sec = 0;
int hour_in_12_hour_clock = 0;
bool afternoon = false;
absl::Duration subseconds = absl::ZeroDuration();
// Indicates whether TZH or TZM appears in the format string.
bool timezone_specified_in_format = false;
bool positive_timezone_offset = true;
int timezone_offset_hour = 0;
int timezone_offset_min = 0;
bool error_in_parsing = false;
SQL_ASSIGN_OR_RETURN(const std::vector<DigitCountRange> digit_count_ranges,
ComputeDigitCountRanges(format_elements));
// Skips leading whitespaces.
timestamp_str_parsed_length +=
TrimLeadingUnicodeWhiteSpaces(timestamp_string);
while (!error_in_parsing &&
timestamp_str_parsed_length < timestamp_string.size() &&
processed_format_element_count < format_elements.size()) {
size_t parsed_length = absl::string_view::npos;
absl::string_view timestamp_str_to_parse =
timestamp_string.substr(timestamp_str_parsed_length);
const DateTimeFormatElement& format_element =
format_elements[processed_format_element_count];
DigitCountRange digit_count_range =
digit_count_ranges[processed_format_element_count];
switch (format_element.type) {
case FormatElementType::kSimpleLiteral:
case FormatElementType::kDoubleQuotedLiteral:
parsed_length = ParseStringByExactMatch(timestamp_str_to_parse,
format_element.literal_value);
break;
case FormatElementType::kWhitespace:
// Format element of "kWhitespace" type matches 1 or more Unicode
// whitespaces.
parsed_length = TrimLeadingUnicodeWhiteSpaces(timestamp_str_to_parse);
if (parsed_length == 0) {
// Matches 0 Unicode whitespace, so we set <error_in_parsing> to true
// to indicate an error.
error_in_parsing = true;
}
break;
// Parses for entire year value. For example, for input string "1234", the
// output <year> is 1234.
case FormatElementType::kYYYY:
case FormatElementType::kRRRR:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max,
/*min=*/0, /*max=*/10000, &year);
break;
// Parses for the last 3/2/1 digits of the year value depending on the
// length of the element. For example, assuming <current_year> is 1970:
// - for input "123", the output <year> with "YYY" is 1123,
// - for input "12", the output <year> with "YY" is 1912,
// - for input "1", the output <year> with "Y" is 1971.
case FormatElementType::kYYY:
case FormatElementType::kYY:
case FormatElementType::kY: {
int element_length = format_element.len_in_format_str;
SQL_RET_CHECK(element_length >= 0 &&
element_length < ABSL_ARRAYSIZE(powers_of_ten));
int element_length_power_of_ten =
static_cast<int>(powers_of_ten[element_length]);
int parsed_year_part;
parsed_length = ParseInt(
timestamp_str_to_parse, /*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/element_length_power_of_ten - 1, &parsed_year_part);
if (parsed_length != absl::string_view::npos) {
year = year - year % element_length_power_of_ten + parsed_year_part;
}
break;
}
// Parses for the last 2 digit of the year value. The first 2 digits
// of the output can be different from that of current year (more
// details at (broken link)).
// For example, if the current year is 2002:
// - for input "12", the output <year> is 2012,
// - for input "51", the output <year> is 1951.
// If the current year is 2299,
// - for input "12", the output <year> is 2312,
// - for input "51", thr output <year> is 2251.
case FormatElementType::kRR: {
parsed_length = ParseWithFormatElementOfTypeRR(
timestamp_str_to_parse,
/*current_year=*/year, digit_count_range, &year);
break;
}
// Parses for entire year value with a string in pattern "X,XXX" or
// "XX,XXX". For example,
// - for input "1,234", the output <year> is 1234,
// - for input "10,000", the output <year> is 10000.
case FormatElementType::kYCommaYYY:
parsed_length = ParseWithFormatElementOfTypeYCommaYYY(
timestamp_str_to_parse, &year);
break;
// Parses for month value 1-12. For example, for input "11", the output
// <month> is 11.
case FormatElementType::kMM:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/1,
/*max=*/12, &month);
break;
// Parses abbreviated month names with "MON" element and full month
// names with "MONTH" element. The parsing is case-insensitive.
// For example,
// - for input "Jan"/"jAN", the output <month> with "MON" is 1,
// - for input "JUNE"/"juNe", the output <month> with "MONTH" is 6.
case FormatElementType::kMON:
case FormatElementType::kMONTH:
parsed_length = ParseMonthNames(
timestamp_str_to_parse,
/*abbreviated=*/format_element.type == FormatElementType::kMON,
&month);
break;
// Parses for day of month value. For example, for input "20", the
// output <mday> is 20.
case FormatElementType::kDD:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/1,
/*max=*/31, &mday);
break;
// kHH/kHH12 and kAMWithDots/kPMWithDots are used to parse hour value
// of a 12-hour clock. The matching for meridian indicator part is
// case-insensitive. For example,
// - if input for kHH/kHH12 is "11" and input for
// kAMWithDots/kPMWithDots is "A.M."/"A.m.", the output <hour> is 11.
// - if input for kHH/kHH12 is "12" and input for
// kAMWithDots/kPMWithDots is "a.M."/"a.m.", the output <hour> is 0.
// string "11", the hour value in the result 12-hour clock is 11.
case FormatElementType::kHH:
case FormatElementType::kHH12:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/1,
/*max=*/12, &hour_in_12_hour_clock);
break;
case FormatElementType::kAMWithDots:
case FormatElementType::kPMWithDots: {
ParseWithCandidatesResult parse_result = ParseStringWithCandidates(
timestamp_str_to_parse, {"A.M.", "P.M."}, /*ignore_case=*/true);
parsed_length = parse_result.parsed_length;
if (parsed_length != absl::string_view::npos) {
afternoon = (parse_result.matched_candidate_index == 1);
}
break;
}
// Parses for hour value in a 24-hour clock. For example, for input "12",
// the output <hour> is 12.
case FormatElementType::kHH24:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/23, &hour);
break;
// Parses for minute value 0-59. For example, for input "20", the output
// <min> is 20.
case FormatElementType::kMI:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/59, &min);
break;
// Parses for second value 0-59. For example, for input "30", the output
// <sec> is 30.
case FormatElementType::kSS:
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/59, &sec);
break;
// Parses for number of seconds past midnight 0 ~ 2400*60*60-1. For
// example, for input "3662", the output <hour>, <min> and <sec> are 1, 1,
// 2 respectively (since 3660 seconds past midnight corresponds to time
// "01:01:02").
case FormatElementType::kSSSSS: {
int sec_of_day;
parsed_length =
ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/kNaiveNumSecondsPerDay - 1, &sec_of_day);
if (parsed_length != absl::string_view::npos) {
hour = sec_of_day / kNaiveNumSecondsPerHour;
min = (sec_of_day % kNaiveNumSecondsPerHour) /
kNaiveNumSecondsPerMinute;
sec = sec_of_day % kNaiveNumSecondsPerMinute;
}
break;
}
// Parses for subsecond value. Additional digits beyond the input <scale>
// are truncated (6 for micros, 9 for nanos). For example,
// - for input "123", the output subsecond with "FF3" is 123.
// - for input "1234567", the output subsecond with "FF7" is 123456
// under micros scale, or 1234567 under nano scale.
case FormatElementType::kFFN: {
SQL_RET_CHECK(format_element.subsecond_digit_count > 0 &&
format_element.subsecond_digit_count <= 9);
parsed_length = ParseSubSeconds(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max,
scale, &subseconds);
break;
}
// Parses for the sign and hour value of the time zone offset. For
// example,
// - for input "+10"/" 10", the sign and hour value of output time zone
// are "+10".
// - for input "-09", the sign and hour value of output time zone are
// "-09".
case FormatElementType::kTZH: {
timezone_specified_in_format = true;
parsed_length = ParseWithFormatElementOfTypeTZH(
timestamp_str_to_parse, digit_count_range,
&positive_timezone_offset, &timezone_offset_hour);
break;
}
// Parses for the minute value of the time zone offset. For example, for
// input "13", the minute value of output time zone is 13.
case FormatElementType::kTZM:
timezone_specified_in_format = true;
parsed_length = ParseInt(timestamp_str_to_parse,
/*min_width=*/digit_count_range.min,
/*max_width=*/digit_count_range.max, /*min=*/0,
/*max=*/59, &timezone_offset_min);
break;
default:
break;
}
if (parsed_length == absl::string_view::npos) {
// If <parsed_length> is absl::string_view::npos, we set
// <error_in_parsing> to be true to indicate an error.
error_in_parsing = true;
}
if (!error_in_parsing) {
// We successfully processed a format element, so update the number of
// elements and characters processed.
processed_format_element_count++;
timestamp_str_parsed_length += parsed_length;
}
}
if (error_in_parsing) {
return MakeEvalError()
<< "Failed to parse input timestamp string at "
<< timestamp_str_parsed_length << " with format element "
<< format_elements[processed_format_element_count].ToString();
}
// Skips any remaining whitespace.
timestamp_str_parsed_length += TrimLeadingUnicodeWhiteSpaces(
timestamp_string.substr(timestamp_str_parsed_length));
// Skips trailing empty format elements {kDoubleQuotedLiteral, ""} which match
// "" in input string.
while (
processed_format_element_count < format_elements.size() &&
format_elements[processed_format_element_count].type ==
FormatElementType::kDoubleQuotedLiteral &&
format_elements[processed_format_element_count].literal_value.empty()) {
processed_format_element_count++;
}
if (timestamp_str_parsed_length < timestamp_string.size()) {
return MakeEvalError() << "Illegal non-space trailing data '"
<< timestamp_string.substr(
timestamp_str_parsed_length)
<< "' in timestamp string";
}
if (processed_format_element_count < format_elements.size()) {
return MakeEvalError()
<< "Entire timestamp string has been parsed before dealing with"
<< " format element "
<< format_elements[processed_format_element_count].ToString();
}
// Calculates the <hour> in 24-hour clock if hour value of a 12-hour clock is
// parsed.
if (hour_in_12_hour_clock != 0) {
hour = hour_in_12_hour_clock % 12 + (afternoon ? 12 : 0);
}
const absl::CivilSecond cs(year, month, mday, hour, min, sec);
// absl::CivilSecond will 'normalize' its arguments, so we simply compare
// the input against the result to check whether a YMD is valid.
if (cs.year() != year || cs.month() != month || cs.day() != mday) {
return MakeEvalError()
<< "Invalid result from year, month, day values after parsing";
}
absl::TimeZone timezone;
if (timezone_specified_in_format) {
SQL_RETURN_IF_ERROR(MakeTimeZone(
absl::StrFormat("%c%02d%02d", positive_timezone_offset ? '+' : '-',
timezone_offset_hour, timezone_offset_min),
&timezone));
} else {
timezone = default_timezone;
}
*timestamp = timezone.At(cs).pre + subseconds;
if (!IsValidTime(*timestamp)) {
return MakeEvalError() << "The parsing result is out of valid time range";
}
return absl::OkStatus();
}