fn parse_date()

in arrow-cast/src/parse.rs [588:680]


fn parse_date(string: &str) -> Option<NaiveDate> {
    // If the date has an extended (signed) year such as "+10999-12-31" or "-0012-05-06"
    //
    // According to [ISO 8601], years have:
    //  Four digits or more for the year. Years in the range 0000 to 9999 will be pre-padded by
    //  zero to ensure four digits. Years outside that range will have a prefixed positive or negative symbol.
    //
    // [ISO 8601]: https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html#ISO_LOCAL_DATE
    if string.starts_with('+') || string.starts_with('-') {
        // Skip the sign and look for the hyphen that terminates the year digits.
        // According to ISO 8601 the unsigned part must be at least 4 digits.
        let rest = &string[1..];
        let hyphen = rest.find('-')?;
        if hyphen < 4 {
            return None;
        }
        // The year substring is the sign and the digits (but not the separator)
        // e.g. for "+10999-12-31", hyphen is 5 and s[..6] is "+10999"
        let year: i32 = string[..hyphen + 1].parse().ok()?;
        // The remainder should begin with a '-' which we strip off, leaving the month-day part.
        let remainder = string[hyphen + 1..].strip_prefix('-')?;
        let mut parts = remainder.splitn(2, '-');
        let month: u32 = parts.next()?.parse().ok()?;
        let day: u32 = parts.next()?.parse().ok()?;
        return NaiveDate::from_ymd_opt(year, month, day);
    }

    if string.len() > 10 {
        // Try to parse as datetime and return just the date part
        return string_to_datetime(&Utc, string)
            .map(|dt| dt.date_naive())
            .ok();
    };
    let mut digits = [0; 10];
    let mut mask = 0;

    // Treating all bytes the same way, helps LLVM vectorise this correctly
    for (idx, (o, i)) in digits.iter_mut().zip(string.bytes()).enumerate() {
        *o = i.wrapping_sub(b'0');
        mask |= ((*o < 10) as u16) << idx
    }

    const HYPHEN: u8 = b'-'.wrapping_sub(b'0');

    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
    if digits[4] != HYPHEN {
        let (year, month, day) = match (mask, string.len()) {
            (0b11111111, 8) => (
                digits[0] as u16 * 1000
                    + digits[1] as u16 * 100
                    + digits[2] as u16 * 10
                    + digits[3] as u16,
                digits[4] * 10 + digits[5],
                digits[6] * 10 + digits[7],
            ),
            _ => return None,
        };
        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
    }

    let (month, day) = match mask {
        0b1101101111 => {
            if digits[7] != HYPHEN {
                return None;
            }
            (digits[5] * 10 + digits[6], digits[8] * 10 + digits[9])
        }
        0b101101111 => {
            if digits[7] != HYPHEN {
                return None;
            }
            (digits[5] * 10 + digits[6], digits[8])
        }
        0b110101111 => {
            if digits[6] != HYPHEN {
                return None;
            }
            (digits[5], digits[7] * 10 + digits[8])
        }
        0b10101111 => {
            if digits[6] != HYPHEN {
                return None;
            }
            (digits[5], digits[7])
        }
        _ => return None,
    };

    let year =
        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;

    NaiveDate::from_ymd_opt(year as _, month as _, day as _)
}