in sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala [478:627]
def parseTimestampString(s: UTF8String): (Array[Int], Option[ZoneId], Boolean) = {
def isValidDigits(segment: Int, digits: Int): Boolean = {
// A Long is able to represent a timestamp within [+-]200 thousand years
val maxDigitsYear = 6
// For the nanosecond part, more than 6 digits is allowed, but will be truncated.
segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
// For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
(segment == 7 && digits <= 2) ||
(segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
}
if (s == null) {
return (Array.empty, None, false)
}
var tz: Option[String] = None
val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
var i = 0
var currentSegmentValue = 0
var currentSegmentDigits = 0
val bytes = s.getBytes
var j = getTrimmedStart(bytes)
val strEndTrimmed = getTrimmedEnd(j, bytes)
if (j == strEndTrimmed) {
return (Array.empty, None, false)
}
var digitsMilli = 0
var justTime = false
var yearSign: Option[Int] = None
if (bytes(j) == '-' || bytes(j) == '+') {
yearSign = if (bytes(j) == '-') Some(-1) else Some(1)
j += 1
}
while (j < strEndTrimmed) {
val b = bytes(j)
val parsedValue = b - '0'.toByte
if (parsedValue < 0 || parsedValue > 9) {
if (j == 0 && b == 'T') {
justTime = true
i += 3
} else if (i < 2) {
if (b == '-') {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
} else if (i == 0 && b == ':' && yearSign.isEmpty) {
justTime = true
if (!isValidDigits(3, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(3) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i = 4
} else {
return (Array.empty, None, false)
}
} else if (i == 2) {
if (b == ' ' || b == 'T') {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
} else {
return (Array.empty, None, false)
}
} else if (i == 3 || i == 4) {
if (b == ':') {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
} else {
return (Array.empty, None, false)
}
} else if (i == 5 || i == 6) {
if (b == '.' && i == 5) {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
} else {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
tz = Some(new String(bytes, j, strEndTrimmed - j))
j = strEndTrimmed - 1
}
if (i == 6 && b != '.') {
i += 1
}
} else {
if (i < segments.length && (b == ':' || b == ' ')) {
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
currentSegmentValue = 0
currentSegmentDigits = 0
i += 1
} else {
return (Array.empty, None, false)
}
}
} else {
if (i == 6) {
digitsMilli += 1
}
// We will truncate the nanosecond part if there are more than 6 digits, which results
// in loss of precision
if (i != 6 || currentSegmentDigits < 6) {
currentSegmentValue = currentSegmentValue * 10 + parsedValue
}
currentSegmentDigits += 1
}
j += 1
}
if (!isValidDigits(i, currentSegmentDigits)) {
return (Array.empty, None, false)
}
segments(i) = currentSegmentValue
while (digitsMilli < 6) {
segments(6) *= 10
digitsMilli += 1
}
// This step also validates time zone part
val zoneId = tz.map(zoneName => getZoneId(zoneName.trim))
segments(0) *= yearSign.getOrElse(1)
(segments, zoneId, justTime)
}