protected RangeToken parseCharacterClass()

in src/main/java/org/apache/xmlbeans/impl/regex/RegexParser.java [788:902]


    protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
        this.setContext(S_INBRACKETS);
        this.next();                            // '['
        boolean nrange = false;
        RangeToken base = null;
        RangeToken tok;
        if (this.read() == T_CHAR && this.chardata == '^') {
            nrange = true;
            this.next();                        // '^'
            if (useNrange) {
                tok = Token.createNRange();
            } else {
                base = Token.createRange();
                base.addRange(0, Token.UTF16_MAX);
                tok = Token.createRange();
            }
        } else {
            tok = Token.createRange();
        }
        int type;
        boolean firstloop = true;
        while ((type = this.read()) != T_EOF) {
            if (type == T_CHAR && this.chardata == ']' && !firstloop)
                break;
            firstloop = false;
            int c = this.chardata;
            boolean end = false;
            if (type == T_BACKSOLIDUS) {
                switch (c) {
                  case 'd':  case 'D':
                  case 'w':  case 'W':
                  case 's':  case 'S':
                    tok.mergeRanges(this.getTokenForShorthand(c));
                    end = true;
                    break;

                  case 'i':  case 'I':
                  case 'c':  case 'C':
                    c = this.processCIinCharacterClass(tok, c);
                    if (c < 0)  end = true;
                    break;

                  case 'p':
                  case 'P':
                    int pstart = this.offset;
                    RangeToken tok2 = this.processBacksolidus_pP(c);
                    if (tok2 == null)  throw this.ex("parser.atom.5", pstart);
                    tok.mergeRanges(tok2);
                    end = true;
                    break;

                  default:
                    c = this.decodeEscaped();
                } // \ + c
            } // backsolidus
                                                // POSIX Character class such as [:alnum:]
            else if (type == T_POSIX_CHARCLASS_START) {
                int nameend = this.regex.indexOf(':', this.offset);
                if (nameend < 0) throw this.ex("parser.cc.1", this.offset);
                boolean positive = true;
                if (this.regex.charAt(this.offset) == '^') {
                    this.offset ++;
                    positive = false;
                }
                String name = this.regex.substring(this.offset, nameend);
                RangeToken range = Token.getRange(name, positive,
                                                  this.isSet(RegularExpression.XMLSCHEMA_MODE));
                if (range == null)  throw this.ex("parser.cc.3", this.offset);
                tok.mergeRanges(range);
                end = true;
                if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']')
                    throw this.ex("parser.cc.1", nameend);
                this.offset = nameend+2;
            }
            this.next();
            if (!end) {                         // if not shorthands...
                if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
                    tok.addRange(c, c);
                } else {
                    this.next(); // Skips '-'
                    if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);
                    if (type == T_CHAR && this.chardata == ']') {
                        tok.addRange(c, c);
                        tok.addRange('-', '-');
                    } else {
                        int rangeend = this.chardata;
                        if (type == T_BACKSOLIDUS)
                            rangeend = this.decodeEscaped();
                        this.next();
                        tok.addRange(c, rangeend);
                    }
                }
            }
            if (this.isSet(RegularExpression.SPECIAL_COMMA)
                && this.read() == T_CHAR && this.chardata == ',')
                this.next();
        }
        if (this.read() == T_EOF)
            throw this.ex("parser.cc.2", this.offset);
        if (!useNrange && nrange) {
            base.subtractRanges(tok);
            tok = base;
        }
        tok.sortRanges();
        tok.compactRanges();
        //tok.dumpRanges();
        /*
        if (this.isSet(RegularExpression.IGNORE_CASE))
            tok = RangeToken.createCaseInsensitiveToken(tok);
        */
        this.setContext(S_NORMAL);
        this.next();                    // Skips ']'

        return tok;
    }