in cpp/src/arrow/util/value_parsing.h [661:789]
static inline bool ParseTimestampISO8601(const char* s, size_t length,
TimeUnit::type unit, TimestampType::c_type* out,
bool* out_zone_offset_present = NULLPTR) {
using seconds_type = std::chrono::duration<TimestampType::c_type>;
// We allow the following zone offset formats:
// - (none)
// - Z
// - [+-]HH(:?MM)?
//
// We allow the following formats for all units:
// - "YYYY-MM-DD"
// - "YYYY-MM-DD[ T]hhZ?"
// - "YYYY-MM-DD[ T]hh:mmZ?"
// - "YYYY-MM-DD[ T]hh:mm:ssZ?"
//
// We allow the following formats for unit == MILLI, MICRO, or NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?"
//
// We allow the following formats for unit == MICRO, or NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?"
//
// We allow the following formats for unit == NANO:
// - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?"
//
// UTC is always assumed, and the DataType's timezone is ignored.
//
if (ARROW_PREDICT_FALSE(length < 10)) return false;
seconds_type seconds_since_epoch;
if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
return false;
}
if (length == 10) {
*out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
return true;
}
if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
return false;
}
if (out_zone_offset_present) {
*out_zone_offset_present = false;
}
seconds_type zone_offset(0);
if (s[length - 1] == 'Z') {
--length;
if (out_zone_offset_present) *out_zone_offset_present = true;
} else if (s[length - 3] == '+' || s[length - 3] == '-') {
// [+-]HH
length -= 3;
if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + length + 1, &zone_offset))) {
return false;
}
if (s[length] == '+') zone_offset *= -1;
if (out_zone_offset_present) *out_zone_offset_present = true;
} else if (s[length - 5] == '+' || s[length - 5] == '-') {
// [+-]HHMM
length -= 5;
if (ARROW_PREDICT_FALSE(!detail::ParseHHMM(s + length + 1, &zone_offset))) {
return false;
}
if (s[length] == '+') zone_offset *= -1;
if (out_zone_offset_present) *out_zone_offset_present = true;
} else if ((s[length - 6] == '+' || s[length - 6] == '-') && (s[length - 3] == ':')) {
// [+-]HH:MM
length -= 6;
if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + length + 1, &zone_offset))) {
return false;
}
if (s[length] == '+') zone_offset *= -1;
if (out_zone_offset_present) *out_zone_offset_present = true;
}
seconds_type seconds_since_midnight;
switch (length) {
case 13: // YYYY-MM-DD[ T]hh
if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) {
return false;
}
break;
case 16: // YYYY-MM-DD[ T]hh:mm
if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) {
return false;
}
break;
case 19: // YYYY-MM-DD[ T]hh:mm:ss
case 21: // YYYY-MM-DD[ T]hh:mm:ss.s
case 22: // YYYY-MM-DD[ T]hh:mm:ss.ss
case 23: // YYYY-MM-DD[ T]hh:mm:ss.sss
case 24: // YYYY-MM-DD[ T]hh:mm:ss.ssss
case 25: // YYYY-MM-DD[ T]hh:mm:ss.sssss
case 26: // YYYY-MM-DD[ T]hh:mm:ss.ssssss
case 27: // YYYY-MM-DD[ T]hh:mm:ss.sssssss
case 28: // YYYY-MM-DD[ T]hh:mm:ss.ssssssss
case 29: // YYYY-MM-DD[ T]hh:mm:ss.sssssssss
if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
return false;
}
break;
default:
return false;
}
seconds_since_epoch += seconds_since_midnight;
seconds_since_epoch += zone_offset;
if (length <= 19) {
*out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
return true;
}
if (ARROW_PREDICT_FALSE(s[19] != '.')) {
return false;
}
uint32_t subseconds = 0;
if (ARROW_PREDICT_FALSE(
!detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) {
return false;
}
*out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds;
return true;
}