Token parseAtom()

in src/org/apache/xerces/impl/xpath/regex/RegexParser.java [749:837]


    Token parseAtom() throws ParseException {
        int ch = this.read();
        Token tok = null;
        switch (ch) {
          case T_LPAREN:        return this.processParen();
          case T_LPAREN2:       return this.processParen2(); // '(?:'
          case T_CONDITION:     return this.processCondition(); // '(?('
          case T_MODIFIERS:     return this.processModifiers(); // (?modifiers ... )
          case T_INDEPENDENT:   return this.processIndependent();
          case T_DOT:
            this.next();                    // Skips '.'
            tok = Token.token_dot;
            break;

            /**
             * char-class ::= '[' ( '^'? range ','?)+ ']'
             * range ::= '\d' | '\w' | '\s' | category-block | range-char
             *           | range-char '-' range-char
             * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2
             * bmp-char ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]
             */
          case T_LBRACKET:      return this.parseCharacterClass(true);
          case T_SET_OPERATIONS: return this.parseSetOperations();

          case T_BACKSOLIDUS:
            switch (this.chardata) {
              case 'd':  case 'D':
              case 'w':  case 'W':
              case 's':  case 'S':
                tok = this.getTokenForShorthand(this.chardata);
                this.next();
                return tok;

              case 'e':  case 'f':  case 'n':  case 'r':
              case 't':  case 'u':  case 'v':  case 'x':
                {
                    int ch2 = this.decodeEscaped();
                    if (ch2 < 0x10000) {
                        tok = Token.createChar(ch2);
                    } else {
                        tok = Token.createString(REUtil.decomposeToSurrogates(ch2));
                    }
                }
                break;

              case 'c': return this.processBacksolidus_c();
              case 'C': return this.processBacksolidus_C();
              case 'i': return this.processBacksolidus_i();
              case 'I': return this.processBacksolidus_I();
              case 'g': return this.processBacksolidus_g();
              case 'X': return this.processBacksolidus_X();
              case '1':  case '2':  case '3':  case '4':
              case '5':  case '6':  case '7':  case '8':  case '9':
                return this.processBackreference();

              case 'P':
              case 'p':
                int pstart = this.offset;
                tok = processBacksolidus_pP(this.chardata);
                if (tok == null)  throw this.ex("parser.atom.5", pstart);
                break;

              default:
                tok = Token.createChar(this.chardata);
            }
            this.next();
            break;

          case T_CHAR:
            if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}')
                throw this.ex("parser.atom.4", this.offset-1);
            tok = Token.createChar(this.chardata);
            int high = this.chardata;
            this.next();
            if (REUtil.isHighSurrogate(high)
                && this.read() == T_CHAR && REUtil.isLowSurrogate(this.chardata)) {
                char[] sur = new char[2];
                sur[0] = (char)high;
                sur[1] = (char)this.chardata;
                tok = Token.createParen(Token.createString(new String(sur)), 0);
                this.next();
            }
            break;

          default:
            throw this.ex("parser.atom.4", this.offset-1);
        }
        return tok;
    }