in endorsed/src/org.apache.sis.util/main/org/apache/sis/measure/UnitFormat.java [1112:1329]
public Unit<?> parse(CharSequence symbols, final ParsePosition position) throws MeasurementParseException {
ArgumentChecks.ensureNonNull("symbols", symbols);
ArgumentChecks.ensureNonNull("position", position);
/*
* Check for authority codes (currently only EPSG, but more could be added later).
* Example: "urn:ogc:def:uom:EPSG::9001". If the unit is not an authority code
* (which is the most common case), only then we will parse the unit symbols.
*/
int end = symbols.length();
int start = CharSequences.skipLeadingWhitespaces(symbols, position.getIndex(), end);
if (AUTHORITIES != null) {
final Map.Entry<Integer, String> entry = DefinitionURI.codeOf("uom", AUTHORITIES, symbols);
if (entry != null) {
Unit<?> unit = null;
NumberFormatException failure = null;
final String code = entry.getValue();
switch (entry.getKey()) {
case 0: { // EPSG
try {
unit = Units.valueOfEPSG(Integer.parseInt(code));
} catch (NumberFormatException e) {
failure = e;
}
break;
}
case 1: { // UCUM
unit = parse(code);
break;
}
}
if (unit != null) {
position.setIndex(end);
finish(position);
return unit;
}
throw (MeasurementParseException) new MeasurementParseException(
Errors.format(Errors.Keys.UnknownUnit_1,
Constants.EPSG + Constants.DEFAULT_SEPARATOR + code), symbols,
start + Math.max(0, symbols.toString().lastIndexOf(code))).initCause(failure);
}
}
/*
* Split the unit around the multiplication and division operators and parse each term individually.
* Note that exponentation need to be kept as part of a single unit symbol.
*
* The `start` variable is the index of the first character of the next unit term to parse.
*/
final Operation operation = new Operation(symbols); // Enumeration value: NOOP, IMPLICIT, MULTIPLY, DIVIDE.
Unit<?> unit = null;
boolean hasSpaces = false;
int i = start;
scan: for (int n; i < end; i += n) {
final int c = Character.codePointAt(symbols, i);
n = Character.charCount(c);
final int next;
switch (c) {
/*
* The minus sign can be both part of a number or part of a symbol. If the minus sign if followed
* by a digit, then handle it as part of a number, in which case the action is only "continue".
* Otherwise handle as part of a symbol, in which case the action is in the default case below.
* The intent is to prevent the replacement of Operation.IMPLICIT by Operation.MULTIPLY in symbol
* like "(m²⋅s)-1" because we want the "-1" part to be handled as Operation.EXPONENT instead.
*/
case '-': {
if (i + n < end && Character.isDigit(Character.codePointAt(symbols, i + n))) {
continue;
}
// else fall through.
}
/*
* For any character that is not an operator or parenthesis, either continue the scanning of
* characters or stop it, depending on whether the character is valid for a unit symbol or not.
* In the latter case, we consider that we reached the end of a unit symbol.
*/
default: {
if (AbstractUnit.isSymbolChar(c)) {
if (operation.code == Operation.IMPLICIT) {
operation.code = Operation.MULTIPLY;
}
continue;
}
if (Character.isDigit(c) || Characters.isSuperScript(c)) {
continue;
}
if (Character.isSpaceChar(c)) { // NOT Character.isWhitespace(int)
hasSpaces = true;
continue;
}
break scan;
}
/*
* Star is for exponentiation in UCUM syntax, but some symbols may use it for unit multiplication.
* We interpret the symbol as a multiplication if the characters before or after it seem to be for
* a unit symbol.
*/
case Style.EXPONENT_OR_MULTIPLY: {
final int w = exponentOperator(symbols, i, end);
if (w < 0) {
next = Operation.MULTIPLY;
break;
}
i += w;
// else fall through.
}
case Style.EXPONENT: {
if (operation.code == Operation.IMPLICIT) {
next = Operation.EXPONENT;
break;
}
continue;
}
/*
* The period is the multiplication operator in UCUM format. According UCUM there is no ambiguity
* with the decimal separator since unit terms should not contain floating point numbers. However
* we relax this rule in order to support scale factor of angular units (e.g. π/180). The period
* is interpreted as a decimal separator if there is a decimal digit before and after it.
*/
case '.': if (isDecimalSeparator(symbols, i, end)) continue;
case '×': // Fall through
case AbstractUnit.MULTIPLY: next = Operation.MULTIPLY; break;
case '÷':
case '⁄': // Fraction slash
case '/':
case AbstractUnit.DIVIDE: next = Operation.DIVIDE; break;
/*
* If we find an '(' parenthesis, invoke recursively this method for the part inside parenthesis.
* The parsing should end at the ')' parenthesis since it is not a valid unit symbol. If we do not
* find that closing parenthesis, this will be considered an error.
*/
case Style.OPEN: {
final int pos = i + Character.charCount(c);
final ParsePosition sub = new ParsePosition(pos);
final Unit<?> term = parse(symbols, sub);
i = CharSequences.skipLeadingWhitespaces(symbols, sub.getIndex(), end);
if (i >= end || Character.codePointAt(symbols, i) != Style.CLOSE) {
throw new MeasurementParseException(Errors.format(Errors.Keys.NonEquilibratedParenthesis_2,
symbols.subSequence(start, i), Style.CLOSE), symbols, start);
}
unit = operation.apply(unit, term, pos);
operation.code = Operation.IMPLICIT; // Default operation if there is no × or / symbol after parenthesis.
start = i + (n = 1); // Skip the number of characters in the '(' Unicode code point.
continue;
}
}
/*
* We reach this point only if we found some operator (division or multiplication).
* If the operator has been found between two digits, we consider it as part of the
* term. For example, "m2/3" is considered as a single term where "2/3" is the exponent.
*/
if (i > start && i+n < end
&& Character.isDigit(Character.codePointBefore(symbols, i))
&& Character.isDigit(Character.codePointAt(symbols, i+n)))
{
continue;
}
/*
* At this point, we have either a first unit to parse (NOOP), or a multiplication or division to apply
* between the previously parsed units and the next unit to parse. A special case is IMPLICIT, which is
* a multiplication without explicit × symbol after the parenthesis. The implicit multiplication can be
* overridden by an explicit × or / symbol, which is what happened if we reach this point (tip: look in
* the above `switch` statement all cases that end with `break`, not `break scan` or `continue`).
*/
if (operation.code != Operation.IMPLICIT) {
unit = operation.apply(unit, parseTerm(symbols, start, i, operation), start);
}
hasSpaces = false;
operation.code = next;
start = i + n;
}
/*
* At this point we either found an unrecognized character or reached the end of string. We will
* parse the remaining characters as a unit and apply the pending unit operation (multiplication
* or division). But before, we need to check if the parsing should stop at the first whitespace.
* This verification assumes that spaces are allowed only in labels specified by the label(…)
* method and in resource bundles, not in labels specified by AbstractUnit.alternate(String).
*/
Unit<?> component = null;
if (hasSpaces) {
end = i;
start = CharSequences.skipLeadingWhitespaces(symbols, start, i);
search: while ((i = CharSequences.skipTrailingWhitespaces(symbols, start, i)) > start) {
final String uom = symbols.subSequence(start, i).toString();
if ((component = labelToUnit.get(uom)) != null) break;
if ((component = fromName(uom)) != null) break;
int j=i, c;
do {
c = Character.codePointBefore(symbols, j);
j -= Character.charCount(c);
if (j <= start) break search;
} while (!Character.isWhitespace(c));
/*
* Really use Character.isWhitespace(c) above, not Character.isSpaceChar(c), because we want
* to exclude non-breaking spaces. This block should be the only place in UnitFormat class
* where we use isWhitespace(c) instead of isSpaceChar(c).
*/
i = j; // Will become the index of first space after search loop completion.
}
/*
* At this point we did not found any user-specified label or localized name matching the substring.
* Assume that the parsing should stop at the first space, on the basis that spaces are not allowed
* in unit symbols. We make an exception if we detect that the part before the first space contains
* digits (not allowed in unit symbols neither), in which case the substring may be something like
* "100 feet".
*/
if (hasDigit(symbols, start, i)) {
i = end; // Restore the full length (until the first illegal character).
}
}
if (!(operation.finished = (component != null))) {
component = parseTerm(symbols, start, i, operation); // May set `operation.finished` flag.
}
if (operation.finished) {
finish(position); // For preventing interpretation of "degree minute" as "degree × minute".
}
unit = operation.apply(unit, component, start);
position.setIndex(i);
return unit;
}