in be/src/vec/runtime/vdatetime_value.cpp [1207:1630]
bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, const char* value,
int64_t value_len, const char** sub_val_end) {
if (value_len <= 0) [[unlikely]] {
return false;
}
const char* ptr = format;
const char* end = format + format_len;
const char* val = value;
const char* val_end = value + value_len;
bool already_set_time_part = false; // skip time part in the end's setting.
uint32_t part_used = 0;
constexpr int YEAR_PART = 1U << 0;
constexpr int MONTH_PART = 1U << 1;
constexpr int DAY_PART = 1U << 2;
constexpr int NORMAL_DATE_PART = YEAR_PART | MONTH_PART | DAY_PART;
constexpr int WEEKDAY_PART = 1U << 3;
constexpr int YEARDAY_PART = 1U << 4;
constexpr int WEEK_NUM_PART = 1U << 5;
constexpr int SPECIAL_DATE_PART = WEEKDAY_PART | YEARDAY_PART | WEEK_NUM_PART;
[[maybe_unused]] constexpr int DATE_PART = NORMAL_DATE_PART | SPECIAL_DATE_PART;
constexpr int HOUR_PART = 1U << 6;
constexpr int MINUTE_PART = 1U << 7;
constexpr int SECOND_PART = 1U << 8;
constexpr int TIME_PART = HOUR_PART | MINUTE_PART | SECOND_PART;
int half_day = 0; // 0 for am/none, 12 for pm.
int weekday = -1;
int yearday = -1;
int week_num = -1; // week idx in one year
bool strict_week_number = false;
bool sunday_first = false;
bool strict_week_number_year_type = false;
int strict_week_number_year = -1;
bool hour_system_12 = false;
auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0};
while (ptr < end && val < val_end) {
// Skip space character
while (val < val_end && check_space(*val)) {
val++;
}
// Check switch
if (*ptr == '%' && ptr + 1 < end) {
const char* tmp = nullptr;
int64_t int_value = 0;
ptr++;
switch (*ptr++) {
// Year
case 'y':
// Year, numeric (two digits)
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
int_value += int_value >= 70 ? 1900 : 2000;
year = int_value;
val = tmp;
part_used |= YEAR_PART;
break;
case 'Y':
// Year, numeric, four digits
tmp = val + min(4, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
if (tmp - val <= 2) {
int_value += int_value >= 70 ? 1900 : 2000;
}
year = int_value;
val = tmp;
part_used |= YEAR_PART;
break;
// Month
case 'm':
case 'c':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
month = int_value;
val = tmp;
part_used |= MONTH_PART;
break;
case 'M':
int_value = check_word(const_cast<const char**>(s_month_name), val, val_end, &val);
if (int_value < 0) {
return false;
}
month = int_value;
part_used |= MONTH_PART;
break;
case 'b':
int_value = check_word(s_ab_month_name, val, val_end, &val);
if (int_value < 0) {
return false;
}
month = int_value;
part_used |= MONTH_PART;
break;
// Day
case 'd':
case 'e':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
day = int_value;
val = tmp;
part_used |= DAY_PART;
break;
case 'D':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
day = int_value;
val = tmp + min(2, val_end - tmp);
part_used |= DAY_PART;
break;
// Hour
case 'h':
case 'I':
case 'l':
hour_system_12 = true;
part_used |= HOUR_PART;
// Fall through
case 'k':
case 'H':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
hour = int_value;
val = tmp;
part_used |= HOUR_PART;
break;
// Minute
case 'i':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
minute = int_value;
val = tmp;
part_used |= MINUTE_PART;
break;
// Second
case 's':
case 'S':
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
second = int_value;
val = tmp;
part_used |= SECOND_PART;
break;
// Micro second
case 'f':
// _microsecond is removed, but need to eat this val
tmp = val + min(6, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
val = tmp;
break;
// AM/PM, only meaningful for 12-hour system.
case 'p':
if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !hour_system_12) {
return false;
}
if (toupper(*val) == 'P') {
// PM
half_day = 12;
}
val += 2;
break;
// Weekday
case 'W':
int_value = check_word(const_cast<const char**>(s_day_name), val, val_end, &val);
if (int_value < 0) {
return false;
}
int_value++;
weekday = int_value;
part_used |= WEEKDAY_PART;
break;
case 'a':
int_value = check_word(s_ab_day_name, val, val_end, &val);
if (int_value < 0) {
return false;
}
int_value++;
weekday = int_value;
part_used |= WEEKDAY_PART;
break;
case 'w':
tmp = val + min(1, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
if (int_value >= 7) {
return false;
}
if (int_value == 0) {
int_value = 7;
}
weekday = int_value;
val = tmp;
part_used |= WEEKDAY_PART;
break;
case 'j':
tmp = val + min(3, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
yearday = int_value;
val = tmp;
part_used |= YEARDAY_PART;
break;
case 'u':
case 'v':
case 'U':
case 'V':
sunday_first = (*(ptr - 1) == 'U' || *(ptr - 1) == 'V');
// Used to check if there is %x or %X
strict_week_number = (*(ptr - 1) == 'V' || *(ptr - 1) == 'v');
tmp = val + min(2, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
week_num = int_value;
if (week_num > 53 || (strict_week_number && week_num == 0)) {
return false;
}
val = tmp;
part_used |= WEEK_NUM_PART;
break;
// strict week number, must be used with %V or %v
case 'x':
case 'X':
strict_week_number_year_type = (*(ptr - 1) == 'X');
tmp = val + min(4, val_end - val);
if (!str_to_int64(val, &tmp, &int_value)) {
return false;
}
strict_week_number_year = int_value;
val = tmp;
part_used |= WEEK_NUM_PART;
break;
case 'r': {
VecDateTimeValue tmp_val;
if (!tmp_val.from_date_format_str("%I:%i:%S %p", 11, val, val_end - val, &tmp)) {
return false;
}
this->_hour = tmp_val._hour;
this->_minute = tmp_val._minute;
this->_second = tmp_val._second;
val = tmp;
part_used |= TIME_PART;
already_set_time_part = true;
break;
}
case 'T': {
VecDateTimeValue tmp_val;
if (!tmp_val.from_date_format_str("%H:%i:%S", 8, val, val_end - val, &tmp)) {
return false;
}
this->_hour = tmp_val._hour;
this->_minute = tmp_val._minute;
this->_second = tmp_val._second;
part_used |= TIME_PART;
already_set_time_part = true;
val = tmp;
break;
}
case '.':
while (val < val_end && ispunct(*val)) {
val++;
}
break;
case '@':
while (val < val_end && isalpha(*val)) {
val++;
}
break;
case '#':
while (val < val_end && isdigit(*val)) {
val++;
}
break;
case '%': // %%, escape the %
if ('%' != *val) {
return false;
}
val++;
break;
default:
return false;
}
} else if (!check_space(*ptr)) {
if (*ptr != *val) {
return false;
}
ptr++;
val++;
} else {
ptr++;
}
}
// for compatible with mysql, like something have %H:%i:%s format but no relative content...
while (ptr < end) {
if (*ptr == '%' && ptr + 1 < end) {
ptr++;
switch (*ptr++) {
case 'H':
case 'h':
case 'I':
case 'i':
case 'k':
case 'l':
case 'r':
case 's':
case 'f':
case 'S':
case 'p':
case 'T':
part_used |= TIME_PART;
break;
default:
break;
}
} else {
ptr++;
}
}
if (!part_used) {
return false;
}
if (hour_system_12) {
if (hour > 12 || hour < 1) {
return false;
}
hour = (hour % 12) + half_day;
}
if (sub_val_end) {
*sub_val_end = val;
}
// Compute timestamp type
if (part_used & DATE_PART) {
if (part_used & TIME_PART) {
_type = TIME_DATETIME;
} else {
_type = TIME_DATE;
}
} else {
_type = TIME_TIME;
}
_neg = false;
// Year day
if (yearday > 0) {
uint64_t days = doris::calc_daynr(year, 1, 1) + yearday - 1;
if (!get_date_from_daynr(days)) {
return false;
}
}
// weekday
if (week_num >= 0 && weekday > 0) {
// Check
if ((strict_week_number &&
(strict_week_number_year < 0 || strict_week_number_year_type != sunday_first)) ||
(!strict_week_number && strict_week_number_year >= 0)) {
return false;
}
uint64_t days =
doris::calc_daynr(strict_week_number ? strict_week_number_year : year, 1, 1);
uint8_t weekday_b = doris::calc_weekday(days, sunday_first);
if (sunday_first) {
days += ((weekday_b == 0) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday % 7;
} else {
days += ((weekday_b <= 3) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday - 1;
}
if (!get_date_from_daynr(days)) {
return false;
}
}
// 1. already_set_date_part means _year, _month, _day be set, so we only set time part
// 2. already_set_time_part means _hour, _minute, _second, _microsecond be set,
// so we only need to set date part
// 3. if both are true, means all part of date_time be set, no need check_range_and_set_time
bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0);
if (already_set_date_part && already_set_time_part) {
return true;
}
// complete default month/day
if (!(part_used & ~NORMAL_DATE_PART)) { // Ymd part only
if (!(part_used & DAY_PART)) {
day = 1;
if (!(part_used & MONTH_PART)) {
month = 1;
}
}
}
if (already_set_date_part) {
return check_range_and_set_time(_year, _month, _day, hour, minute, second, _type);
}
if (already_set_time_part) {
return check_range_and_set_time(year, month, day, _hour, _minute, _second, _type);
}
return check_range_and_set_time(year, month, day, hour, minute, second, _type);
}