in src/xercesc/validators/schema/identity/XercesXPath.cpp [791:1344]
bool XPathScanner::scanExpression(const XMLCh* const data,
XMLSize_t currentOffset,
const XMLSize_t endOffset,
ValueVectorOf<int>* const tokens) {
bool starIsMultiplyOperator = false;
XMLSize_t nameOffset = 0;
int nameHandle = -1;
int prefixHandle = -1;
XMLCh ch;
XMLBuffer dataBuffer(128, tokens->getMemoryManager());
while (currentOffset != endOffset) {
ch = data[currentOffset];
while (XMLChar1_0::isWhitespace(ch)) {
if (++currentOffset == endOffset) {
break;
}
ch = data[currentOffset];
}
if (currentOffset == endOffset) {
break;
}
//
// [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
// | NameTest | NodeType | Operator | FunctionName
// | AxisName | Literal | Number | VariableReference
//
XMLByte chartype = (ch >= 0x80) ? (XMLByte)CHARTYPE_NONASCII : fASCIICharMap[ch];
switch (chartype) {
case CHARTYPE_OPEN_PAREN: // '('
addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_CLOSE_PAREN: // ')'
addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_PAREN);
starIsMultiplyOperator = true;
++currentOffset;
break;
case CHARTYPE_OPEN_BRACKET: // '['
addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_BRACKET);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_CLOSE_BRACKET: // ']'
addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_BRACKET);
starIsMultiplyOperator = true;
++currentOffset;
break;
//
// [30] Number ::= Digits ('.' Digits?)? | '.' Digits
// ^^^^^^^^^^
//
case CHARTYPE_PERIOD: // '.', '..' or '.' Digits
if (currentOffset + 1 == endOffset) {
addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
break;
}
ch = data[currentOffset + 1];
if (ch == chPeriod) { // '..'
addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_PERIOD);
starIsMultiplyOperator = true;
currentOffset += 2;
} else if (ch >= chDigit_0 && ch <= chDigit_9) {
addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
starIsMultiplyOperator = true;
currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
} else if (ch == chForwardSlash) {
addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
} else if (ch == chPipe) { // '|'
addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
currentOffset++;
} else if (XMLChar1_0::isWhitespace(ch)) {
do {
if (++currentOffset == endOffset)
break;
ch = data[currentOffset];
} while (XMLChar1_0::isWhitespace(ch));
if (currentOffset == endOffset || ch == chPipe || ch == chForwardSlash) {
addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
starIsMultiplyOperator = true;
break;
}
} else {
XMLCh str[2]= {ch, 0 };
ThrowXMLwithMemMgr1(XPathException, XMLExcepts::XPath_InvalidChar, str, tokens->getMemoryManager());
}
break;
case CHARTYPE_ATSIGN: // '@'
addToken(tokens, XercesXPath::EXPRTOKEN_ATSIGN);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_COMMA: // ','
addToken(tokens, XercesXPath::EXPRTOKEN_COMMA);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_COLON: // '::'
if (++currentOffset == endOffset) {
return false; // REVISIT
}
ch = data[currentOffset];
if (ch != chColon) {
return false; // REVISIT
}
addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_SLASH: // '/' and '//'
if (++currentOffset == endOffset) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
starIsMultiplyOperator = false;
break;
}
ch = data[currentOffset];
if (ch == chForwardSlash) { // '//'
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DOUBLE_SLASH);
starIsMultiplyOperator = false;
++currentOffset;
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
starIsMultiplyOperator = false;
}
break;
case CHARTYPE_UNION: // '|'
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_UNION);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_PLUS: // '+'
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_PLUS);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_MINUS: // '-'
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MINUS);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_EQUAL: // '='
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_EQUAL);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_EXCLAMATION: // '!='
if (++currentOffset == endOffset) {
return false; // REVISIT
}
ch = data[currentOffset];
if (ch != chEqual) {
return false; // REVISIT
}
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_NOT_EQUAL);
starIsMultiplyOperator = false;
++currentOffset;
break;
case CHARTYPE_LESS: // '<' and '<='
if (++currentOffset == endOffset) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
starIsMultiplyOperator = false;
break;
}
ch = data[currentOffset];
if (ch == chEqual) { // '<='
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS_EQUAL);
starIsMultiplyOperator = false;
++currentOffset;
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
starIsMultiplyOperator = false;
}
break;
case CHARTYPE_GREATER: // '>' and '>='
if (++currentOffset == endOffset) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
starIsMultiplyOperator = false;
break;
}
ch = data[currentOffset];
if (ch == chEqual) { // '>='
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER_EQUAL);
starIsMultiplyOperator = false;
++currentOffset;
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
starIsMultiplyOperator = false;
}
break;
//
// [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
//
case CHARTYPE_QUOTE: // '\"' or '\''
{
XMLCh qchar = ch;
if (++currentOffset == endOffset) {
return false; // REVISIT
}
ch = data[currentOffset];
XMLSize_t litOffset = currentOffset;
while (ch != qchar) {
if (++currentOffset == endOffset) {
return false; // REVISIT
}
ch = data[currentOffset];
}
addToken(tokens, XercesXPath::EXPRTOKEN_LITERAL);
starIsMultiplyOperator = true;
dataBuffer.set(data + litOffset, currentOffset - litOffset);
tokens->addElement(fStringPool->addOrFind(dataBuffer.getRawBuffer()));
++currentOffset;
break;
}
//
// [30] Number ::= Digits ('.' Digits?)? | '.' Digits
// [31] Digits ::= [0-9]+
//
case CHARTYPE_DIGIT:
addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
starIsMultiplyOperator = true;
currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
break;
//
// [36] VariableReference ::= '$' QName
//
case CHARTYPE_DOLLAR:
if (++currentOffset == endOffset) {
return false; // REVISIT
}
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data[currentOffset];
}
else {
ch = 0;
}
dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
prefixHandle = -1;
if (ch == chColon) {
prefixHandle = nameHandle;
if (++currentOffset == endOffset) {
return false; // REVISIT
}
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
return false; // REVISIT
}
dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
}
addToken(tokens, XercesXPath::EXPRTOKEN_VARIABLE_REFERENCE);
starIsMultiplyOperator = true;
tokens->addElement(prefixHandle);
tokens->addElement(nameHandle);
break;
//
// [37] NameTest ::= '*' | NCName ':' '*' | QName
// [34] MultiplyOperator ::= '*'
//
case CHARTYPE_STAR: // '*'
//
// 3.7 Lexical Structure
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then a * must be recognized as a MultiplyOperator.
//
// Otherwise, the token must not be recognized as a MultiplyOperator.
//
if (starIsMultiplyOperator) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MULT);
starIsMultiplyOperator = false;
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_ANY);
starIsMultiplyOperator = true;
}
++currentOffset;
break;
//
// NCName, QName and non-terminals
//
case CHARTYPE_NONASCII: // possibly a valid non-ascii 'Letter' (BaseChar | Ideographic)
case CHARTYPE_LETTER:
case CHARTYPE_UNDERSCORE:
{
//
// 3.7 Lexical Structure
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then an NCName must be recognized as an OperatorName.
//
// If the character following an NCName (possibly after intervening ExprWhitespace) is (,
// then the token must be recognized as a NodeType or a FunctionName.
//
// If the two characters following an NCName (possibly after intervening ExprWhitespace)
// are ::, then the token must be recognized as an AxisName.
//
// Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
// FunctionName, or an AxisName.
//
// [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
// [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
// [35] FunctionName ::= QName - NodeType
// [6] AxisName ::= (see above)
//
// [37] NameTest ::= '*' | NCName ':' '*' | QName
// [5] NCName ::= (Letter | '_') (NCNameChar)*
// [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_' (ascii subset of 'NCNameChar')
// [?] QName ::= (NCName ':')? NCName
// [?] Letter ::= [A-Za-z] (ascii subset of 'Letter')
// [?] Digit ::= [0-9] (ascii subset of 'Digit')
//
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data[currentOffset];
}
else {
ch = 0;
}
dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
bool isNameTestNCName = false;
bool isAxisName = false;
prefixHandle = -1;
if (ch == chColon) {
if (++currentOffset == endOffset) {
return false; // REVISIT
}
ch = data[currentOffset];
if (ch == chAsterisk) {
if (++currentOffset < endOffset) {
ch = data[currentOffset];
}
isNameTestNCName = true;
} else if (ch == chColon) {
if (++currentOffset < endOffset) {
ch = data[currentOffset];
}
isAxisName = true;
} else {
prefixHandle = nameHandle;
nameOffset = currentOffset;
currentOffset = scanNCName(data, endOffset, currentOffset);
if (currentOffset == nameOffset) {
return false; // REVISIT
}
if (currentOffset < endOffset) {
ch = data[currentOffset];
}
else {
ch = 0;
}
dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
}
}
//
// [39] ExprWhitespace ::= S
//
while (XMLChar1_0::isWhitespace(ch)) {
if (++currentOffset == endOffset) {
break;
}
ch = data[currentOffset];
}
//
// If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
// an Operator, then an NCName must be recognized as an OperatorName.
//
if (starIsMultiplyOperator) {
if (nameHandle == fAndSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_AND);
starIsMultiplyOperator = false;
} else if (nameHandle == fOrSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_OR);
starIsMultiplyOperator = false;
} else if (nameHandle == fModSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MOD);
starIsMultiplyOperator = false;
} else if (nameHandle == fDivSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DIV);
starIsMultiplyOperator = false;
} else {
return false; // REVISIT
}
if (isNameTestNCName) {
return false; // REVISIT - NCName:* where an OperatorName is required
} else if (isAxisName) {
return false; // REVISIT - AxisName:: where an OperatorName is required
}
break;
}
//
// If the character following an NCName (possibly after intervening ExprWhitespace) is (,
// then the token must be recognized as a NodeType or a FunctionName.
//
if (ch == chOpenParen && !isNameTestNCName && !isAxisName) {
if (nameHandle == fCommentSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_COMMENT);
} else if (nameHandle == fTextSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_TEXT);
} else if (nameHandle == fPISymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_PI);
} else if (nameHandle == fNodeSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_NODE);
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_FUNCTION_NAME);
tokens->addElement(prefixHandle);
tokens->addElement(nameHandle);
}
addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
starIsMultiplyOperator = false;
++currentOffset;
break;
}
//
// If the two characters following an NCName (possibly after intervening ExprWhitespace)
// are ::, then the token must be recognized as an AxisName.
//
if (isAxisName ||
(ch == chColon && currentOffset + 1 < endOffset &&
data[currentOffset + 1] == chColon)) {
if (nameHandle == fAncestorSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR);
} else if (nameHandle == fAncestorOrSelfSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF);
} else if (nameHandle == fAttributeSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ATTRIBUTE);
} else if (nameHandle == fChildSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_CHILD);
} else if (nameHandle == fDescendantSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT);
} else if (nameHandle == fDescendantOrSelfSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF);
} else if (nameHandle == fFollowingSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING);
} else if (nameHandle == fFollowingSiblingSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING);
} else if (nameHandle == fNamespaceSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_NAMESPACE);
} else if (nameHandle == fParentSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PARENT);
} else if (nameHandle == fPrecedingSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING);
} else if (nameHandle == fPrecedingSiblingSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING_SIBLING);
} else if (nameHandle == fSelfSymbol) {
addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_SELF);
} else {
return false; // REVISIT
}
if (isNameTestNCName) {
return false; // REVISIT - "NCName:* ::" where "AxisName ::" is required
}
addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
starIsMultiplyOperator = false;
if (!isAxisName) {
currentOffset += 2;
}
break;
}
//
// Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
// FunctionName, or an AxisName.
//
if (isNameTestNCName) {
addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_NAMESPACE);
tokens->addElement(nameHandle);
} else {
addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_QNAME);
tokens->addElement(prefixHandle);
tokens->addElement(nameHandle);
}
starIsMultiplyOperator = true;
break;
}
default:
{
XMLCh str[2]= {ch, 0 };
ThrowXMLwithMemMgr1(XPathException, XMLExcepts::XPath_InvalidChar, str, tokens->getMemoryManager());
break;
}
}
}
return true;
}