in daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala [169:321]
override def runtimeDependencies = Vector(textNumberFormatEv)
private val primNumeric = context.optPrimType.get.asInstanceOf[NodeInfo.PrimType.PrimNumeric]
def parse(start: PState): Unit = {
val node: DISimple = start.simpleElement
val str = node.dataValueAsString
Assert.invariant(str != null) // worst case it should be empty string. But not null.
if (str == "") {
PE(start, "Unable to parse %s from empty string", context.optPrimType.get.globalQName)
return
}
// because of the way the zero rep regular expressions are generated, they
// will match either all or none of 'str', never part of it. Thus,
// findFirstIn() either matches and it's a zero rep, or it doesn't and it's
// not a zero
val numValue: DataValueNumber = zeroRepsRegex.find { _.findFirstIn(str).isDefined } match {
case Some(_) => primNumeric.fromNumber(0)
case None => {
val df = textNumberFormatEv.evaluate(start)
val strToParse = if (df.isParseStrict) str else str.trim
val pos = new ParsePosition(0)
val icuNum: JNumber = df.parse(strToParse, pos) match {
case null => {
val infNaN: JDouble =
if (df.isDecimalPatternMatchRequired) {
// ICU failed to parse. But there is a bug in ICU4J (ICU-22303) that if there is
// a decimal in the pattern and we've set that decimal to be required (due to
// strict mode), then it will fail to parse Inf/NaN representations. As a
// workaround, we clone the DecimalFormat, disable requiring the decimal, and
// reparse. We only accept successful Inf/NaN parses though--everything else is
// considered a parse error since it meant the decimal point was missing or
// wasn't either inf/nan or a valid number. If ICU fixes this bug, we should
// remove this infNan variable and its use, as it is likely pretty expensive to
// clone, change a setting, and reparse. Fortunately, it is only in the error
// case of strict parsing so should be rare.
pos.setIndex(0)
val newDF = df.clone().asInstanceOf[DecimalFormat]
newDF.setDecimalPatternMatchRequired(false)
newDF.parse(strToParse, pos) match {
case d: JDouble => {
Assert.invariant(d.isNaN || d.isInfinite)
d
}
case _ => null
}
} else {
null
}
if (infNaN != null) {
infNaN
} else {
PE(
start,
"Unable to parse %s from text: %s",
context.optPrimType.get.globalQName,
str
)
return
}
}
case d: JDouble => {
// ICU returns a Double only if it parsed NaN, Infinity, -Infinity, or negative
// zero. We will later pass this value in primNumber.fromNumber, which will fail if
// the primitive type does not allow NaN/Infinity
Assert.invariant(
d.isNaN || d.isInfinite || JDouble.doubleToLongBits(d) == 0x8000000000000000L
)
d
}
case bd: ICUBigDecimal => {
// ICU will return their own custom BigDecimal if the value cannot fit in a Long and
// isn't infinity/NaN. We only want Java types, so detect this and convert it to the
// appropriate type. Additionally, due to ICU lax parsing, ICU could successfully
// parse something with a non-zero fractional part even if the pattern does not
// specify a decimal. So in cases where decimals are not allowed (e.g. integer
// primitives, virtual decimal points), we create a PE.
val fractionalPartMustBeZero = primNumeric.isInteger || textDecimalVirtualPoint > 0
if (bd.scale == 0) bd.unscaledValue
else if (!fractionalPartMustBeZero) {
bd.toBigDecimal
} else {
PE(
start,
"Unable to parse %s from text: %s",
context.optPrimType.get.globalQName,
str
)
return
}
}
case l: JLong => l
// $COVERAGE-OFF$
case num: JNumber => {
Assert.invariantFailed(
"ICU returned an unexpected type. Expected either Double, ICU BigDecimal, or Long, but got " + num.getClass.getName
)
}
// $COVERAGE-ON$
}
// Verify that what was parsed was what was passed exactly in byte count.
// Use pos to verify all characters consumed & check for errors!
if (pos.getIndex != strToParse.length) {
val isValid =
if (df.getPadPosition == DecimalFormat.PAD_AFTER_SUFFIX) {
// If the DecimalFormat pad position is PAD_AFTER_SUFFIX, ICU
// does not update the parse position to be a the end of the
// padding, but instead sets the position to the end of the
// suffix. So we need to manually check to see if all characters
// after the parse position are the pad character
val padChar = df.getPadCharacter
val afterPosition = str.substring(pos.getIndex)
afterPosition.forall(_ == padChar)
} else {
// For all other padPositions, the parse position must be at the
// end of the string. That's not the case here, so it's not valid
false
}
if (!isValid) {
PE(
start,
"Unable to parse %s from text: %s",
context.optPrimType.get.globalQName,
str
)
return
}
}
val num: JNumber = applyTextDecimalVirtualPointForParse(icuNum)
val numValue: DataValueNumber =
try {
primNumeric.fromNumber(num)
} catch {
case e: InvalidPrimitiveDataException => {
PE(start, "%s", e.getMessage)
return
}
}
numValue
}
}
node.overwriteDataValue(numValue)
}