in src/org/apache/xerces/impl/xpath/XPath.java [1325:1894]
public boolean scanExpr(SymbolTable symbolTable,
XPath.Tokens tokens, String data,
int currentOffset, int endOffset)
throws XPathException {
int nameOffset;
String nameHandle, prefixHandle;
boolean starIsMultiplyOperator = false;
int ch;
while (true) {
if (currentOffset == endOffset) {
break;
}
ch = data.charAt(currentOffset);
//
// [39] ExprWhitespace ::= S
//
while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
if (++currentOffset == endOffset) {
break;
}
ch = data.charAt(currentOffset);
}
if (currentOffset == endOffset) {
break;
}
//
// [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
// | NameTest | NodeType | Operator | FunctionName
// | AxisName | Literal | Number | VariableReference
//
byte chartype = (ch >= 0x80) ? CHARTYPE_NONASCII : fASCIICharMap[ch];
switch (chartype) {
case CHARTYPE_OPEN_PAREN: // '('
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_PAREN);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_CLOSE_PAREN: // ')'
addToken(tokens, XPath.Tokens.EXPRTOKEN_CLOSE_PAREN);
starIsMultiplyOperator = true;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_OPEN_BRACKET: // '['
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_BRACKET);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_CLOSE_BRACKET: // ']'
addToken(tokens, XPath.Tokens.EXPRTOKEN_CLOSE_BRACKET);
starIsMultiplyOperator = true;
if (++currentOffset == endOffset) {
break;
}
break;
//
// [30] Number ::= Digits ('.' Digits?)? | '.' Digits
// ^^^^^^^^^^
//
case CHARTYPE_PERIOD: // '.', '..' or '.' Digits
if (currentOffset + 1 == endOffset) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
break;
}
ch = data.charAt(currentOffset + 1);
if (ch == '.') { // '..'
addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_PERIOD);
starIsMultiplyOperator = true;
currentOffset += 2;
} else if (ch >= '0' && ch <= '9') {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NUMBER);
starIsMultiplyOperator = true;
currentOffset = scanNumber(tokens, data, endOffset, currentOffset/*, encoding*/);
} else if (ch == '/') {
addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
} else if (ch == '|') {
addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
break;
} else if (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
// this is legal if the next token is non-existent or |
do {
if (++currentOffset == endOffset) {
break;
}
ch = data.charAt(currentOffset);
} while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D);
if (currentOffset == endOffset || ch == '|') {
addToken(tokens, XPath.Tokens.EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
break;
}
throw new XPathException ("c-general-xpath");
} else { // '.'
throw new XPathException ("c-general-xpath");
}
if (currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_ATSIGN: // '@'
addToken(tokens, XPath.Tokens.EXPRTOKEN_ATSIGN);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_COMMA: // ','
addToken(tokens, XPath.Tokens.EXPRTOKEN_COMMA);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_COLON: // '::'
if (++currentOffset == endOffset) {
// System.out.println("abort 1a");
return false; // REVISIT
}
ch = data.charAt(currentOffset);
if (ch != ':') {
// System.out.println("abort 1b");
return false; // REVISIT
}
addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_COLON);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_SLASH: // '/' and '//'
if (++currentOffset == endOffset) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH);
starIsMultiplyOperator = false;
break;
}
ch = data.charAt(currentOffset);
if (ch == '/') { // '//'
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_DOUBLE_SLASH);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_SLASH);
starIsMultiplyOperator = false;
}
break;
case CHARTYPE_UNION: // '|'
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_UNION);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_PLUS: // '+'
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_PLUS);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_MINUS: // '-'
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MINUS);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_EQUAL: // '='
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_EQUAL);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_EXCLAMATION: // '!='
if (++currentOffset == endOffset) {
// System.out.println("abort 2a");
return false; // REVISIT
}
ch = data.charAt(currentOffset);
if (ch != '=') {
// System.out.println("abort 2b");
return false; // REVISIT
}
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_NOT_EQUAL);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
case CHARTYPE_LESS: // '<' and '<='
if (++currentOffset == endOffset) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS);
starIsMultiplyOperator = false;
break;
}
ch = data.charAt(currentOffset);
if (ch == '=') { // '<='
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS_EQUAL);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_LESS);
starIsMultiplyOperator = false;
}
break;
case CHARTYPE_GREATER: // '>' and '>='
if (++currentOffset == endOffset) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER);
starIsMultiplyOperator = false;
break;
}
ch = data.charAt(currentOffset);
if (ch == '=') { // '>='
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER_EQUAL);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_GREATER);
starIsMultiplyOperator = false;
}
break;
//
// [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
//
case CHARTYPE_QUOTE: // '\"' or '\''
int qchar = ch;
if (++currentOffset == endOffset) {
// System.out.println("abort 2c");
return false; // REVISIT
}
ch = data.charAt(currentOffset);
int litOffset = currentOffset;
while (ch != qchar) {
if (++currentOffset == endOffset) {
// System.out.println("abort 2d");
return false; // REVISIT
}
ch = data.charAt(currentOffset);
}
int litLength = currentOffset - litOffset;
addToken(tokens, XPath.Tokens.EXPRTOKEN_LITERAL);
starIsMultiplyOperator = true;
tokens.addToken(symbolTable.addSymbol(data.substring(litOffset, litOffset + litLength)));
if (++currentOffset == endOffset) {
break;
}
break;
//
// [30] Number ::= Digits ('.' Digits?)? | '.' Digits
// [31] Digits ::= [0-9]+
//
case CHARTYPE_DIGIT:
addToken(tokens, XPath.Tokens.EXPRTOKEN_NUMBER);
starIsMultiplyOperator = true;
currentOffset = scanNumber(tokens, data, endOffset, currentOffset/*, encoding*/);
break;
//
// [36] VariableReference ::= '$' QName
//
case CHARTYPE_DOLLAR:
if (++currentOffset == endOffset) {
// System.out.println("abort 3a");
return false; // REVISIT
}
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
// System.out.println("abort 3b");
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
else {
ch = -1;
}
nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
if (ch != ':') {
prefixHandle = XMLSymbols.EMPTY_STRING;
} else {
prefixHandle = nameHandle;
if (++currentOffset == endOffset) {
// System.out.println("abort 4a");
return false; // REVISIT
}
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
// System.out.println("abort 4b");
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
else {
ch = -1;
}
nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
}
addToken(tokens, XPath.Tokens.EXPRTOKEN_VARIABLE_REFERENCE);
starIsMultiplyOperator = true;
tokens.addToken(prefixHandle);
tokens.addToken(nameHandle);
break;
//
// [37] NameTest ::= '*' | NCName ':' '*' | QName
// [34] MultiplyOperator ::= '*'
//
case CHARTYPE_STAR: // '*'
//
// 3.7 Lexical Structure
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then a * must be recognized as a MultiplyOperator.
//
// Otherwise, the token must not be recognized as a MultiplyOperator.
//
if (starIsMultiplyOperator) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MULT);
starIsMultiplyOperator = false;
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_ANY);
starIsMultiplyOperator = true;
}
if (++currentOffset == endOffset) {
break;
}
break;
//
// NCName, QName and non-terminals
//
case CHARTYPE_NONASCII: // possibly a valid non-ascii 'Letter' (BaseChar | Ideographic)
case CHARTYPE_LETTER:
case CHARTYPE_UNDERSCORE:
//
// 3.7 Lexical Structure
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then an NCName must be recognized as an OperatorName.
//
// If the character following an NCName (possibly after intervening ExprWhitespace) is (,
// then the token must be recognized as a NodeType or a FunctionName.
//
// If the two characters following an NCName (possibly after intervening ExprWhitespace)
// are ::, then the token must be recognized as an AxisName.
//
// Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
// FunctionName, or an AxisName.
//
// [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
// [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
// [35] FunctionName ::= QName - NodeType
// [6] AxisName ::= (see above)
//
// [37] NameTest ::= '*' | NCName ':' '*' | QName
// [5] NCName ::= (Letter | '_') (NCNameChar)*
// [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_' (ascii subset of 'NCNameChar')
// [?] QName ::= (NCName ':')? NCName
// [?] Letter ::= [A-Za-z] (ascii subset of 'Letter')
// [?] Digit ::= [0-9] (ascii subset of 'Digit')
//
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
// System.out.println("abort 4c");
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
else {
ch = -1;
}
nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
boolean isNameTestNCName = false;
boolean isAxisName = false;
prefixHandle = XMLSymbols.EMPTY_STRING;
if (ch == ':') {
if (++currentOffset == endOffset) {
// System.out.println("abort 5");
return false; // REVISIT
}
ch = data.charAt(currentOffset);
if (ch == '*') {
if (++currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
isNameTestNCName = true;
} else if (ch == ':') {
if (++currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
isAxisName = true;
} else {
prefixHandle = nameHandle;
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
// System.out.println("abort 5b");
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data.charAt(currentOffset);
}
else {
ch = -1;
}
nameHandle = symbolTable.addSymbol(data.substring(nameOffset, currentOffset));
}
}
//
// [39] ExprWhitespace ::= S
//
while (ch == ' ' || ch == 0x0A || ch == 0x09 || ch == 0x0D) {
if (++currentOffset == endOffset) {
break;
}
ch = data.charAt(currentOffset);
}
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then an NCName must be recognized as an OperatorName.
//
if (starIsMultiplyOperator) {
if (nameHandle == fAndSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_AND);
starIsMultiplyOperator = false;
} else if (nameHandle == fOrSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_OR);
starIsMultiplyOperator = false;
} else if (nameHandle == fModSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_MOD);
starIsMultiplyOperator = false;
} else if (nameHandle == fDivSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPERATOR_DIV);
starIsMultiplyOperator = false;
} else {
// System.out.println("abort 6");
return false; // REVISIT
}
if (isNameTestNCName) {
// System.out.println("abort 7");
return false; // REVISIT - NCName:* where an OperatorName is required
} else if (isAxisName) {
// System.out.println("abort 8");
return false; // REVISIT - AxisName:: where an OperatorName is required
}
break;
}
//
// If the character following an NCName (possibly after intervening ExprWhitespace) is (,
// then the token must be recognized as a NodeType or a FunctionName.
//
if (ch == '(' && !isNameTestNCName && !isAxisName) {
if (nameHandle == fCommentSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_COMMENT);
} else if (nameHandle == fTextSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_TEXT);
} else if (nameHandle == fPISymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_PI);
} else if (nameHandle == fNodeSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NODETYPE_NODE);
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_FUNCTION_NAME);
tokens.addToken(prefixHandle);
tokens.addToken(nameHandle);
}
addToken(tokens, XPath.Tokens.EXPRTOKEN_OPEN_PAREN);
starIsMultiplyOperator = false;
if (++currentOffset == endOffset) {
break;
}
break;
}
//
// If the two characters following an NCName (possibly after intervening ExprWhitespace)
// are ::, then the token must be recognized as an AxisName.
//
if (isAxisName ||
(ch == ':' && currentOffset + 1 < endOffset &&
data.charAt(currentOffset + 1) == ':')) {
if (nameHandle == fAncestorSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ANCESTOR);
} else if (nameHandle == fAncestorOrSelfSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF);
} else if (nameHandle == fAttributeSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_ATTRIBUTE);
} else if (nameHandle == fChildSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_CHILD);
} else if (nameHandle == fDescendantSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_DESCENDANT);
} else if (nameHandle == fDescendantOrSelfSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF);
} else if (nameHandle == fFollowingSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_FOLLOWING);
} else if (nameHandle == fFollowingSiblingSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING);
} else if (nameHandle == fNamespaceSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_NAMESPACE);
} else if (nameHandle == fParentSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PARENT);
} else if (nameHandle == fPrecedingSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PRECEDING);
} else if (nameHandle == fPrecedingSiblingSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_PRECEDING_SIBLING);
} else if (nameHandle == fSelfSymbol) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_AXISNAME_SELF);
} else {
// System.out.println("abort 9");
return false; // REVISIT
}
if (isNameTestNCName) {
// System.out.println("abort 10");
return false; // REVISIT - "NCName:* ::" where "AxisName ::" is required
}
addToken(tokens, XPath.Tokens.EXPRTOKEN_DOUBLE_COLON);
starIsMultiplyOperator = false;
if (!isAxisName) {
currentOffset++;
if (++currentOffset == endOffset) {
break;
}
}
break;
}
//
// Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
// FunctionName, or an AxisName.
//
if (isNameTestNCName) {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_NAMESPACE);
starIsMultiplyOperator = true;
tokens.addToken(nameHandle);
} else {
addToken(tokens, XPath.Tokens.EXPRTOKEN_NAMETEST_QNAME);
starIsMultiplyOperator = true;
tokens.addToken(prefixHandle);
tokens.addToken(nameHandle);
}
break;
default:
// CHARTYPE_INVALID or CHARTYPE_OTHER
// We're not expecting to find either of these in a valid expression.
return false;
}
}
if (XPath.Tokens.DUMP_TOKENS) {
tokens.dumpTokens();
}
return true;
}