in be/src/vec/runtime/vdatetime_value.cpp [1983:2222]
bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale,
const cctz::time_zone* local_time_zone, bool convert_zero) {
const char* ptr = date_str;
const char* end = date_str + len;
// ONLY 2, 6 can follow by a space
const static int allow_space_mask = 4 | 64;
uint32_t date_val[MAX_DATE_PARTS] = {0};
int32_t date_len[MAX_DATE_PARTS] = {0};
// Skip space character
while (ptr < end && check_space(*ptr)) {
ptr++;
}
if (ptr == end || !isdigit(*ptr)) {
return false;
}
// Fix year length
const char* pos = ptr;
while (pos < end && (isdigit(*pos) || *pos == 'T')) {
pos++;
}
int year_len = 4;
int digits = pos - ptr;
bool is_interval_format = false;
bool has_bar = false;
// Compatible with MySQL.
// For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years
if (pos == end || *pos == '.' ||
time_zone_begins(pos, end)) { // no delimeter until ./Asia/Z/GMT...
if (digits == 4 || digits == 8 || digits >= 14) {
year_len = 4;
} else {
year_len = 2;
}
is_interval_format = true;
}
int field_idx = 0;
int field_len = year_len;
long sec_offset = 0;
bool need_use_timezone = false;
while (ptr < end && isdigit(*ptr) && field_idx < MAX_DATE_PARTS) {
const char* start = ptr;
int temp_val = 0;
bool scan_to_delim = (!is_interval_format) && (field_idx != 6);
while (ptr < end && isdigit(*ptr) && (scan_to_delim || field_len--)) { // field_len <= 7
temp_val = temp_val * 10 + (*ptr - '0');
ptr++;
}
if (ptr == start) {
return false;
}
if (field_idx == 6) {
if constexpr (is_datetime) {
// round of microseconds
// 1. normalize to 7 digits for rounding
// 2. rounding
// 3. nomalize to 6 digits for storage
if (scale >= 0) {
// do normalization
const auto ms_digit_count = ptr - start;
const auto normalizer = int_exp10(std::abs(7 - ms_digit_count));
temp_val *= normalizer;
// check round
const auto rounder = int_exp10(std::abs(7 - scale));
const auto reminder = temp_val % rounder;
temp_val -= reminder;
if (reminder >= 5 * normalizer) {
temp_val += rounder;
}
// truncate to 6 digits
if (temp_val == int_exp10(7)) {
temp_val = 0;
sec_offset += 1;
} else {
temp_val /= 10;
}
}
// move ptr to start of timezone or end
while (ptr < end && isdigit(*ptr)) {
ptr++;
}
} else {
// Microsecond
const auto ms_part = ptr - start;
temp_val *= int_exp10(std::max(0L, 6 - ms_part));
}
}
// Impossible
if (temp_val > 999999L) {
return false;
}
date_val[field_idx] = temp_val;
if (field_idx == 6) {
// select cast("2020-01-01 12:00:00.12345" as Datetime(4))
// ptr - start will be 5, but scale is 4
date_len[field_idx] = std::min(static_cast<int>(ptr - start), scale);
} else {
date_len[field_idx] = ptr - start;
}
field_len = 2;
if (ptr == end) {
field_idx++;
break;
}
// timezone
if (UNLIKELY((field_idx > 2 ||
!has_bar) /*dont treat xxxx-xx-xx:xx:xx as xxxx-xx(-xx:xx:xx)*/
&& time_zone_begins(ptr, end))) {
if (local_time_zone == nullptr) {
return false;
}
need_use_timezone = true;
field_idx++;
break;
}
if (field_idx == 2 && *ptr == 'T') {
// YYYYMMDDTHHMMDD, skip 'T' and continue
ptr++;
field_idx++;
continue;
}
// Second part
if (field_idx == 5) {
if (*ptr == '.') {
ptr++;
// for datetime, we need to discard the fraction part
// that beyond the scale + 1, and scale + 1 digit will
// be used to round the fraction part
if constexpr (is_datetime) {
field_len = std::min(7, scale + 1);
} else {
field_len = 6;
}
} else if (isdigit(*ptr)) {
field_idx++;
break;
}
field_idx++;
continue;
}
// escape separator
while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) {
if (check_space(*ptr)) {
if (((1 << field_idx) & allow_space_mask) == 0) {
return false;
}
}
if (*ptr == '-') {
has_bar = true;
}
ptr++;
}
field_idx++;
}
int num_field = field_idx;
if (!is_interval_format) {
year_len = date_len[0];
}
for (; field_idx < MAX_DATE_PARTS; ++field_idx) {
date_val[field_idx] = 0;
}
if (year_len == 2) {
if (date_val[0] < YY_PART_YEAR) {
date_val[0] += 2000;
} else {
date_val[0] += 1900;
}
}
if (num_field < 3) {
return false;
}
if (is_invalid(date_val[0], date_val[1], date_val[2], 0, 0, 0, 0)) {
if (date_val[0] == 0 && date_val[1] == 0 && date_val[2] == 0 && convert_zero) {
date_val[1] = 1;
date_val[2] = 1;
} else {
return false;
}
}
if (need_use_timezone) {
cctz::time_zone given_tz {};
if (!TimezoneUtils::find_cctz_time_zone(std::string {ptr, end}, given_tz)) {
return false; // invalid format
}
auto given = cctz::convert(cctz::civil_second {}, given_tz);
auto local = cctz::convert(cctz::civil_second {}, *local_time_zone);
// these two values is absolute time. so they are negative. need to use (-local) - (-given)
sec_offset = std::chrono::duration_cast<std::chrono::seconds>(given - local).count();
}
// In check_range_and_set_time, for Date type the time part will be truncated. So if the timezone offset should make
// rounding to date part, it would be lost. To avoid this, we use a Datetime type to do these calc. It will save the
// time part and apply the offset. Then convert to Date type back.
// see https://github.com/apache/doris/pull/33553 for more details.
if constexpr (!is_datetime) {
if (sec_offset) {
DateV2Value<DateTimeV2ValueType> tmp;
if (!tmp.check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3],
date_val[4], date_val[5], date_val[6])) {
return false;
}
if (!tmp.date_add_interval<TimeUnit::SECOND>(
TimeInterval {TimeUnit::SECOND, sec_offset, false})) {
return false;
}
this->assign_from(tmp);
return true;
}
}
if (!check_range_and_set_time(date_val[0], date_val[1], date_val[2], date_val[3], date_val[4],
date_val[5], date_val[6])) {
return false;
}
return sec_offset ? date_add_interval<TimeUnit::SECOND>(
TimeInterval {TimeUnit::SECOND, sec_offset, false})
: true;
}