static void parse()

in endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/base/LegalSymbols.java [96:273]


    static void parse(final Locale locale, final String notice, final DefaultLegalConstraints constraints) {
        final int length = notice.length();
        final var buffer = new StringBuilder(length);
        int     year           = 0;         // The copyright year, or 0 if none.
        int     quoteLevel     = 0;         // Incremented on ( [ « characters, decremented on ) ] » characters.
        boolean isCopyright    = false;     // Whether the word parsed by previous iteration was "Copyright" or "(C)".
        boolean wasSeparator   = true;      // Whether the caracter parsed by the previous iteration was a word separator.
        boolean wasPunctuation = true;      // Whether the previous character was a punctuation of Unicode category "other".
        boolean skipNextChars  = true;      // Whether the next spaces and some punction characters should be ignored.
parse:  for (int i = 0; i < length;) {
            final int c = notice.codePointAt(i);
            final int n = Character.charCount(c);
            int     quoteChange   = 0;
            boolean isSeparator   = false;
            boolean isPunctuation;
            switch (Character.getType(c)) {
                case Character.INITIAL_QUOTE_PUNCTUATION:
                case Character.START_PUNCTUATION: {
                    quoteChange   = +1;                     //  ( [ «  etc.
                    skipNextChars = false;
                    isPunctuation = false;
                    break;
                }
                case Character.FINAL_QUOTE_PUNCTUATION:
                case Character.END_PUNCTUATION: {
                    quoteChange   = -1;                     //  ) ] »  etc.
                    skipNextChars = false;
                    isPunctuation = false;
                    break;
                }
                default: {                                  // Letter, digit, hyphen, etc.
                    skipNextChars = false;
                    isPunctuation = false;
                    break;
                }
                case Character.OTHER_PUNCTUATION: {         //  , . : ; / " etc. but not -.
                    isPunctuation = true;
                    isSeparator   = true;
                    break;
                }
                case Character.LINE_SEPARATOR:
                case Character.SPACE_SEPARATOR:
                case Character.PARAGRAPH_SEPARATOR: {
                    isPunctuation = wasPunctuation;
                    isSeparator   = true;
                    break;
                }
            }
            if (wasSeparator && !isSeparator && quoteLevel == 0) {
                /*
                 * Found the beginning of a new word. Ignore textes like "(C)" or "All rights reserved".
                 * Some of those textes are implied by the metadata where the legal notice will be stored.
                 */
                for (final LegalSymbols r : VALUES) {
                    for (final String symbol : r.symbols) {
                        if (notice.regionMatches(true, i, symbol, 0, symbol.length())) {
                            final int after = i + symbol.length();
                            if (after >= length || isSpaceOrPunctuation(notice.codePointAt(after))) {
                                isCopyright |= (r.restriction == Restriction.COPYRIGHT);
                                constraints.getUseConstraints().add(r.restriction);
                                wasPunctuation = true;      // Pretend that "Copyright" was followed by a coma.
                                skipNextChars  = true;      // Ignore spaces and punctuations until the next word.
                                i = after;                  // Skip the "Copyright" (or other) word.
                                continue parse;
                            }
                        }
                    }
                }
                /*
                 * If a copyright notice is followed by digits, assume that those digits are the copyright year.
                 * We require the year is followed by punctuations or non-breaking space in order to reduce the
                 * risk of confusion with postal addresses. So this block should accept "John, 1992." but not
                 * "1992-1 Nowhere road".
                 */
                if (isCopyright && wasPunctuation && year == 0 && c >= '0' && c <= '9') {
                    int endOfDigits = i + n;            // After the last digit in sequence.
                    while (endOfDigits < length) {
                        final int d = notice.codePointAt(endOfDigits);
                        if (d < '0' || d > '9') break;
                        endOfDigits++;              // No need to use Character.charCount(s) here.
                    }
                    // Verify if the digits are followed by a punctuation.
                    final int endOfToken = CharSequences.skipLeadingWhitespaces(notice, endOfDigits, length);
                    if (endOfToken > endOfDigits || isSpaceOrPunctuation(notice.codePointAt(endOfToken))) try {
                        year = Integer.parseInt(notice.substring(i, endOfDigits));
                        if (year >= 1800 && year <= 9999) {                     // Those limits are arbitrary.
                            skipNextChars = true;
                            i = endOfToken;
                            continue;
                        }
                        year = 0;                                               // Reject as not a copyright year.
                    } catch (NumberFormatException e) {
                        // Not an integer - ignore, will be handled as text.
                    }
                }
            }
            /*
             * End of the block that was executed at the beginning of each new word.
             * Following is executed for every characters, except if the above block
             * skipped a portion of the input string.
             */
            wasPunctuation = isPunctuation;
            wasSeparator   = isSeparator;
            quoteLevel    += quoteChange;
            if (!skipNextChars && !Character.isIdentifierIgnorable(c)) {
                buffer.appendCodePoint(c);
            }
            i += n;
        }
        /*
         * End of parsing. Omit trailing spaces and some punctuations if any, then store the result.
         * If a `Citation` already exist and could be for the same legal notice in different locales,
         * it will be completed. Otherwise, a new citation will be created.
         */
        int i = buffer.length();
        while (i > 0) {
            final int c = buffer.codePointBefore(i);
            if (!isSpaceOrPunctuation(c)) break;
            i -= Character.charCount(c);
        }
        DefaultCitation citation = null;
        if (locale != null) {
            for (Citation c : constraints.getReferences()) {
                if (c instanceof DefaultCitation) {
                    if (update(c.getTitle(), locale, notice)) {
                        citation = (DefaultCitation) c;             // Set only on success.
                        break;
                    }
                }
            }
        }
        if (citation == null) {
            citation = new DefaultCitation(i18n(locale, notice));
            constraints.getReferences().add(citation);
        }
        if (year != 0) {
            final var date = new DefaultCitationDate(Year.of(year), DateType.valueOf("IN_FORCE"));
            final var dates = citation.getDates();
            if (!dates.contains(date)) {
                dates.add(date);
            }
        }
        /*
         * At this point, the citation has been created and added to the contraints.
         * If a party already exists, try to update the owner's name. This is based
         * on the assumption that `LegalSymbols` is the only code putting 'i18n' in
         * the constraints.
         */
        if (i != 0) {
            buffer.setLength(i);
            final String owner = buffer.toString();
            if (locale != null) {
                for (final var cited : citation.getCitedResponsibleParties()) {
                    if (cited.getRole() == Role.OWNER && cited instanceof DefaultResponsibleParty) {
                        for (final var party : ((DefaultResponsibleParty) cited).getParties()) {
                            final var i18n = party.getName();
                            if (CharSequences.startsWith(owner, i18n.toString(Locale.ENGLISH), true)) {
                                /*
                                 * Use case: name is followed by unwanted text because the
                                 * `VALUES` special cases are provided in English only.
                                 * Example: "John Smith, Tous droits réservés."
                                 */
                                return;
                            }
                            if (update(i18n, locale, owner)) {
                                return;
                            }
                        }
                    }
                }
            }
            // Same limitation as MetadataBuilder.party().
            var party = new AbstractParty(i18n(locale, owner), null);
            var cited = new DefaultResponsibleParty(Role.OWNER);
            cited.getParties().add(party);
            citation.getCitedResponsibleParties().add(cited);
        }
    }