public Unit parse()

in endorsed/src/org.apache.sis.util/main/org/apache/sis/measure/UnitFormat.java [1112:1329]
146 lines of code
45 McCabe index (conditional complexity)

    public Unit<?> parse(CharSequence symbols, final ParsePosition position) throws MeasurementParseException {
        ArgumentChecks.ensureNonNull("symbols",  symbols);
        ArgumentChecks.ensureNonNull("position", position);
        /*
         * Check for authority codes (currently only EPSG, but more could be added later).
         * Example: "urn:ogc:def:uom:EPSG::9001". If the unit is not an authority code
         * (which is the most common case), only then we will parse the unit symbols.
         */
        int end   = symbols.length();
        int start = CharSequences.skipLeadingWhitespaces(symbols, position.getIndex(), end);
        if (AUTHORITIES != null) {
            final Map.Entry<Integer, String> entry = DefinitionURI.codeOf("uom", AUTHORITIES, symbols);
            if (entry != null) {
                Unit<?> unit = null;
                NumberFormatException failure = null;
                final String code = entry.getValue();
                switch (entry.getKey()) {
                    case 0: {                   // EPSG
                        try {
                            unit = Units.valueOfEPSG(Integer.parseInt(code));
                        } catch (NumberFormatException e) {
                            failure = e;
                        }
                        break;
                    }
                    case 1: {                   // UCUM
                        unit = parse(code);
                        break;
                    }
                }
                if (unit != null) {
                    position.setIndex(end);
                    finish(position);
                    return unit;
                }
                throw (MeasurementParseException) new MeasurementParseException(
                        Errors.format(Errors.Keys.UnknownUnit_1,
                        Constants.EPSG + Constants.DEFAULT_SEPARATOR + code), symbols,
                        start + Math.max(0, symbols.toString().lastIndexOf(code))).initCause(failure);
            }
        }
        /*
         * Split the unit around the multiplication and division operators and parse each term individually.
         * Note that exponentation need to be kept as part of a single unit symbol.
         *
         * The `start` variable is the index of the first character of the next unit term to parse.
         */
        final Operation operation = new Operation(symbols);    // Enumeration value: NOOP, IMPLICIT, MULTIPLY, DIVIDE.
        Unit<?> unit = null;
        boolean hasSpaces = false;
        int i = start;
scan:   for (int n; i < end; i += n) {
            final int c = Character.codePointAt(symbols, i);
            n = Character.charCount(c);
            final int next;
            switch (c) {
                /*
                 * The minus sign can be both part of a number or part of a symbol. If the minus sign if followed
                 * by a digit, then handle it as part of a number, in which case the action is only "continue".
                 * Otherwise handle as part of a symbol, in which case the action is in the default case below.
                 * The intent is to prevent the replacement of Operation.IMPLICIT by Operation.MULTIPLY in symbol
                 * like "(m²⋅s)-1" because we want the "-1" part to be handled as Operation.EXPONENT instead.
                 */
                case '-': {
                    if (i + n < end && Character.isDigit(Character.codePointAt(symbols, i + n))) {
                        continue;
                    }
                    // else fall through.
                }
                /*
                 * For any character that is not an operator or parenthesis, either continue the scanning of
                 * characters or stop it, depending on whether the character is valid for a unit symbol or not.
                 * In the latter case, we consider that we reached the end of a unit symbol.
                 */
                default:  {
                    if (AbstractUnit.isSymbolChar(c)) {
                        if (operation.code == Operation.IMPLICIT) {
                            operation.code =  Operation.MULTIPLY;
                        }
                        continue;
                    }
                    if (Character.isDigit(c) || Characters.isSuperScript(c)) {
                        continue;
                    }
                    if (Character.isSpaceChar(c)) {                         // NOT Character.isWhitespace(int)
                        hasSpaces = true;
                        continue;
                    }
                    break scan;
                }
                /*
                 * Star is for exponentiation in UCUM syntax, but some symbols may use it for unit multiplication.
                 * We interpret the symbol as a multiplication if the characters before or after it seem to be for
                 * a unit symbol.
                 */
                case Style.EXPONENT_OR_MULTIPLY: {
                    final int w = exponentOperator(symbols, i, end);
                    if (w < 0) {
                        next = Operation.MULTIPLY;
                        break;
                    }
                    i += w;
                    // else fall through.
                }
                case Style.EXPONENT: {
                    if (operation.code == Operation.IMPLICIT) {
                        next = Operation.EXPONENT;
                        break;
                    }
                    continue;
                }
                /*
                 * The period is the multiplication operator in UCUM format. According UCUM there is no ambiguity
                 * with the decimal separator since unit terms should not contain floating point numbers. However
                 * we relax this rule in order to support scale factor of angular units (e.g. π/180).  The period
                 * is interpreted as a decimal separator if there is a decimal digit before and after it.
                 */
                case '.': if (isDecimalSeparator(symbols, i, end)) continue;
                case '×': // Fall through
                case AbstractUnit.MULTIPLY: next = Operation.MULTIPLY; break;
                case '÷':
                case '⁄': // Fraction slash
                case '/':
                case AbstractUnit.DIVIDE: next = Operation.DIVIDE; break;
                /*
                 * If we find an '(' parenthesis, invoke recursively this method for the part inside parenthesis.
                 * The parsing should end at the ')' parenthesis since it is not a valid unit symbol. If we do not
                 * find that closing parenthesis, this will be considered an error.
                 */
                case Style.OPEN: {
                    final int pos = i + Character.charCount(c);
                    final ParsePosition sub = new ParsePosition(pos);
                    final Unit<?> term = parse(symbols, sub);
                    i = CharSequences.skipLeadingWhitespaces(symbols, sub.getIndex(), end);
                    if (i >= end || Character.codePointAt(symbols, i) != Style.CLOSE) {
                        throw new MeasurementParseException(Errors.format(Errors.Keys.NonEquilibratedParenthesis_2,
                                    symbols.subSequence(start, i), Style.CLOSE), symbols, start);
                    }
                    unit = operation.apply(unit, term, pos);
                    operation.code = Operation.IMPLICIT;    // Default operation if there is no × or / symbol after parenthesis.
                    start = i + (n = 1);                    // Skip the number of characters in the '(' Unicode code point.
                    continue;
                }
            }
            /*
             * We reach this point only if we found some operator (division or multiplication).
             * If the operator has been found between two digits, we consider it as part of the
             * term. For example, "m2/3" is considered as a single term where "2/3" is the exponent.
             */
            if (i > start && i+n < end
                    && Character.isDigit(Character.codePointBefore(symbols, i))
                    && Character.isDigit(Character.codePointAt(symbols, i+n)))
            {
                continue;
            }
            /*
             * At this point, we have either a first unit to parse (NOOP), or a multiplication or division to apply
             * between the previously parsed units and the next unit to parse. A special case is IMPLICIT, which is
             * a multiplication without explicit × symbol after the parenthesis. The implicit multiplication can be
             * overridden by an explicit × or / symbol, which is what happened if we reach this point (tip: look in
             * the above `switch` statement all cases that end with `break`, not `break scan` or `continue`).
             */
            if (operation.code != Operation.IMPLICIT) {
                unit = operation.apply(unit, parseTerm(symbols, start, i, operation), start);
            }
            hasSpaces = false;
            operation.code = next;
            start = i + n;
        }
        /*
         * At this point we either found an unrecognized character or reached the end of string. We will
         * parse the remaining characters as a unit and apply the pending unit operation (multiplication
         * or division). But before, we need to check if the parsing should stop at the first whitespace.
         * This verification assumes that spaces are allowed only in labels specified by the label(…)
         * method and in resource bundles, not in labels specified by AbstractUnit.alternate(String).
         */
        Unit<?> component = null;
        if (hasSpaces) {
            end = i;
            start = CharSequences.skipLeadingWhitespaces(symbols, start, i);
search:     while ((i = CharSequences.skipTrailingWhitespaces(symbols, start, i)) > start) {
                final String uom = symbols.subSequence(start, i).toString();
                if ((component = labelToUnit.get(uom)) != null) break;
                if ((component =        fromName(uom)) != null) break;
                int j=i, c;
                do {
                    c = Character.codePointBefore(symbols, j);
                    j -= Character.charCount(c);
                    if (j <= start) break search;
                } while (!Character.isWhitespace(c));
                /*
                 * Really use Character.isWhitespace(c) above, not Character.isSpaceChar(c), because we want
                 * to exclude non-breaking spaces.   This block should be the only place in UnitFormat class
                 * where we use isWhitespace(c) instead of isSpaceChar(c).
                 */
                i = j;                  // Will become the index of first space after search loop completion.
            }
            /*
             * At this point we did not found any user-specified label or localized name matching the substring.
             * Assume that the parsing should stop at the first space, on the basis that spaces are not allowed
             * in unit symbols. We make an exception if we detect that the part before the first space contains
             * digits (not allowed in unit symbols neither), in which case the substring may be something like
             * "100 feet".
             */
            if (hasDigit(symbols, start, i)) {
                i = end;                        // Restore the full length (until the first illegal character).
            }
        }
        if (!(operation.finished = (component != null))) {
            component = parseTerm(symbols, start, i, operation);            // May set `operation.finished` flag.
        }
        if (operation.finished) {
            finish(position);           // For preventing interpretation of "degree minute" as "degree × minute".
        }
        unit = operation.apply(unit, component, start);
        position.setIndex(i);
        return unit;
    }