in native/spark-expr/src/conversion_funcs/cast.rs [1998:2117]
fn date_parser(date_str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>> {
// local functions
fn get_trimmed_start(bytes: &[u8]) -> usize {
let mut start = 0;
while start < bytes.len() && is_whitespace_or_iso_control(bytes[start]) {
start += 1;
}
start
}
fn get_trimmed_end(start: usize, bytes: &[u8]) -> usize {
let mut end = bytes.len() - 1;
while end > start && is_whitespace_or_iso_control(bytes[end]) {
end -= 1;
}
end + 1
}
fn is_whitespace_or_iso_control(byte: u8) -> bool {
byte.is_ascii_whitespace() || byte.is_ascii_control()
}
fn is_valid_digits(segment: i32, digits: usize) -> bool {
// An integer is able to represent a date within [+-]5 million years.
let max_digits_year = 7;
//year (segment 0) can be between 4 to 7 digits,
//month and day (segment 1 and 2) can be between 1 to 2 digits
(segment == 0 && digits >= 4 && digits <= max_digits_year)
|| (segment != 0 && digits > 0 && digits <= 2)
}
fn return_result(date_str: &str, eval_mode: EvalMode) -> SparkResult<Option<i32>> {
if eval_mode == EvalMode::Ansi {
Err(SparkError::CastInvalidValue {
value: date_str.to_string(),
from_type: "STRING".to_string(),
to_type: "DATE".to_string(),
})
} else {
Ok(None)
}
}
// end local functions
if date_str.is_empty() {
return return_result(date_str, eval_mode);
}
//values of date segments year, month and day defaulting to 1
let mut date_segments = [1, 1, 1];
let mut sign = 1;
let mut current_segment = 0;
let mut current_segment_value = Wrapping(0);
let mut current_segment_digits = 0;
let bytes = date_str.as_bytes();
let mut j = get_trimmed_start(bytes);
let str_end_trimmed = get_trimmed_end(j, bytes);
if j == str_end_trimmed {
return return_result(date_str, eval_mode);
}
//assign a sign to the date
if bytes[j] == b'-' || bytes[j] == b'+' {
sign = if bytes[j] == b'-' { -1 } else { 1 };
j += 1;
}
//loop to the end of string until we have processed 3 segments,
//exit loop on encountering any space ' ' or 'T' after the 3rd segment
while j < str_end_trimmed && (current_segment < 3 && !(bytes[j] == b' ' || bytes[j] == b'T')) {
let b = bytes[j];
if current_segment < 2 && b == b'-' {
//check for validity of year and month segments if current byte is separator
if !is_valid_digits(current_segment, current_segment_digits) {
return return_result(date_str, eval_mode);
}
//if valid update corresponding segment with the current segment value.
date_segments[current_segment as usize] = current_segment_value.0;
current_segment_value = Wrapping(0);
current_segment_digits = 0;
current_segment += 1;
} else if !b.is_ascii_digit() {
return return_result(date_str, eval_mode);
} else {
//increment value of current segment by the next digit
let parsed_value = Wrapping((b - b'0') as i32);
current_segment_value = current_segment_value * Wrapping(10) + parsed_value;
current_segment_digits += 1;
}
j += 1;
}
//check for validity of last segment
if !is_valid_digits(current_segment, current_segment_digits) {
return return_result(date_str, eval_mode);
}
if current_segment < 2 && j < str_end_trimmed {
// For the `yyyy` and `yyyy-[m]m` formats, entire input must be consumed.
return return_result(date_str, eval_mode);
}
date_segments[current_segment as usize] = current_segment_value.0;
match NaiveDate::from_ymd_opt(
sign * date_segments[0],
date_segments[1] as u32,
date_segments[2] as u32,
) {
Some(date) => {
let duration_since_epoch = date
.signed_duration_since(NaiveDateTime::UNIX_EPOCH.date())
.num_days();
Ok(Some(duration_since_epoch.to_i32().unwrap()))
}
None => Ok(None),
}
}