in sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkIntervalUtils.scala [110:374]
def microsToDuration(micros: Long): Duration = Duration.of(micros, ChronoUnit.MICROS)
/**
* Obtains a [[Period]] representing a number of months. The days unit will be zero, and the
* years and months units will be normalized.
*
* <p> The months unit is adjusted to have an absolute value < 12, with the years unit being
* adjusted to compensate. For example, the method returns "2 years and 3 months" for the 27
* input months. <p> The sign of the years and months units will be the same after
* normalization. For example, -13 months will be converted to "-1 year and -1 month".
*
* @param months
* The number of months, positive or negative
* @return
* The period of months, not null
*/
def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized()
/**
* Converts a string to [[CalendarInterval]] case-insensitively.
*
* @throws IllegalArgumentException
* if the input string is not in valid interval format.
*/
def stringToInterval(input: UTF8String): CalendarInterval = {
import ParseState._
if (input == null) {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_NULL",
messageParameters = Map("input" -> "null"))
}
// scalastyle:off caselocale .toLowerCase
val s = input.trimAll().toLowerCase
// scalastyle:on
val bytes = s.getBytes
if (bytes.isEmpty) {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
messageParameters = Map("input" -> input.toString))
}
var state = PREFIX
var i = 0
var currentValue: Long = 0
var isNegative: Boolean = false
var months: Int = 0
var days: Int = 0
var microseconds: Long = 0
var fractionScale: Int = 0
val initialFractionScale = (NANOS_PER_SECOND / 10).toInt
var fraction: Int = 0
var pointPrefixed: Boolean = false
def trimToNextState(b: Byte, next: ParseState): Unit = {
if (Character.isWhitespace(b)) {
i += 1
} else {
state = next
}
}
def currentWord: String = {
val sep = "\\s+"
val strings = s.toString.split(sep)
val lenRight = s.substring(i, s.numBytes()).toString.split(sep).length
strings(strings.length - lenRight)
}
while (i < bytes.length) {
val b = bytes(i)
state match {
case PREFIX =>
if (s.startsWith(intervalStr)) {
if (s.numBytes() == intervalStr.numBytes()) {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
messageParameters = Map("input" -> input.toString))
} else if (!Character.isWhitespace(bytes(i + intervalStr.numBytes()))) {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PREFIX",
messageParameters = Map("input" -> input.toString, "prefix" -> currentWord))
} else {
i += intervalStr.numBytes() + 1
}
}
state = TRIM_BEFORE_SIGN
case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN)
case SIGN =>
currentValue = 0
fraction = 0
// We preset next state from SIGN to TRIM_BEFORE_VALUE. If we meet '.' in the SIGN state,
// it means that the interval value we deal with here is a numeric with only fractional
// part, such as '.11 second', which can be parsed to 0.11 seconds. In this case, we need
// to reset next state to `VALUE_FRACTIONAL_PART` to go parse the fraction part of the
// interval value.
state = TRIM_BEFORE_VALUE
// We preset the scale to an invalid value to track fraction presence in the UNIT_BEGIN
// state. If we meet '.', the scale become valid for the VALUE_FRACTIONAL_PART state.
fractionScale = -1
pointPrefixed = false
b match {
case '-' =>
isNegative = true
i += 1
case '+' =>
isNegative = false
i += 1
case _ if '0' <= b && b <= '9' =>
isNegative = false
case '.' =>
isNegative = false
fractionScale = initialFractionScale
pointPrefixed = true
i += 1
state = VALUE_FRACTIONAL_PART
case _ =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.UNRECOGNIZED_NUMBER",
messageParameters = Map("input" -> input.toString, "number" -> currentWord))
}
case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE)
case VALUE =>
b match {
case _ if '0' <= b && b <= '9' =>
try {
currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0'))
} catch {
case e: ArithmeticException =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
messageParameters = Map("input" -> input.toString))
}
case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_UNIT
case '.' =>
fractionScale = initialFractionScale
state = VALUE_FRACTIONAL_PART
case _ =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
messageParameters = Map("input" -> input.toString, "value" -> currentWord))
}
i += 1
case VALUE_FRACTIONAL_PART =>
if ('0' <= b && b <= '9' && fractionScale > 0) {
fraction += (b - '0') * fractionScale
fractionScale /= 10
} else if (Character.isWhitespace(b) &&
(!pointPrefixed || fractionScale < initialFractionScale)) {
fraction /= NANOS_PER_MICROS.toInt
state = TRIM_BEFORE_UNIT
} else if ('0' <= b && b <= '9') {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PRECISION",
messageParameters = Map("input" -> input.toString, "value" -> currentWord))
} else {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
messageParameters = Map("input" -> input.toString, "value" -> currentWord))
}
i += 1
case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN)
case UNIT_BEGIN =>
// Checks that only seconds can have the fractional part
if (b != 's' && fractionScale >= 0) {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_FRACTION",
messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
}
if (isNegative) {
currentValue = -currentValue
fraction = -fraction
}
try {
b match {
case 'y' if s.matchAt(yearStr, i) =>
val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue)
months = Math.toIntExact(Math.addExact(months, monthsInYears))
i += yearStr.numBytes()
case 'w' if s.matchAt(weekStr, i) =>
val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue)
days = Math.toIntExact(Math.addExact(days, daysInWeeks))
i += weekStr.numBytes()
case 'd' if s.matchAt(dayStr, i) =>
days = Math.addExact(days, Math.toIntExact(currentValue))
i += dayStr.numBytes()
case 'h' if s.matchAt(hourStr, i) =>
val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR)
microseconds = Math.addExact(microseconds, hoursUs)
i += hourStr.numBytes()
case 's' if s.matchAt(secondStr, i) =>
val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND)
microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction)
i += secondStr.numBytes()
case 'm' =>
if (s.matchAt(monthStr, i)) {
months = Math.addExact(months, Math.toIntExact(currentValue))
i += monthStr.numBytes()
} else if (s.matchAt(minuteStr, i)) {
val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE)
microseconds = Math.addExact(microseconds, minutesUs)
i += minuteStr.numBytes()
} else if (s.matchAt(millisStr, i)) {
val millisUs = SparkDateTimeUtils.millisToMicros(currentValue)
microseconds = Math.addExact(microseconds, millisUs)
i += millisStr.numBytes()
} else if (s.matchAt(microsStr, i)) {
microseconds = Math.addExact(microseconds, currentValue)
i += microsStr.numBytes()
} else {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
}
case _ =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
}
} catch {
case e: ArithmeticException =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
messageParameters = Map("input" -> input.toString))
}
state = UNIT_SUFFIX
case UNIT_SUFFIX =>
b match {
case 's' => state = UNIT_END
case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_SIGN
case _ =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
}
i += 1
case UNIT_END =>
if (Character.isWhitespace(b)) {
i += 1
state = TRIM_BEFORE_SIGN
} else {
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
}
}
}
val result = state match {
case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN =>
new CalendarInterval(months, days, microseconds)
case TRIM_BEFORE_VALUE =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.MISSING_NUMBER",
messageParameters = Map("input" -> input.toString, "word" -> currentWord))
case VALUE | VALUE_FRACTIONAL_PART =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.MISSING_UNIT",
messageParameters = Map("input" -> input.toString, "word" -> currentWord))
case _ =>
throw new SparkIllegalArgumentException(
errorClass = "INVALID_INTERVAL_FORMAT.UNKNOWN_PARSING_ERROR",
messageParameters = Map("input" -> input.toString, "word" -> currentWord))
}
result
}