bool XPathScanner::scanExpression()

in src/xercesc/validators/schema/identity/XercesXPath.cpp [791:1344]


bool XPathScanner::scanExpression(const XMLCh* const data,
                                  XMLSize_t currentOffset,
                                  const XMLSize_t endOffset,
                                  ValueVectorOf<int>* const tokens) {

    bool      starIsMultiplyOperator = false;
    XMLSize_t nameOffset = 0;
    int       nameHandle = -1;
    int       prefixHandle = -1;
    XMLCh     ch;
    XMLBuffer dataBuffer(128, tokens->getMemoryManager());

    while (currentOffset != endOffset) {

        ch = data[currentOffset];

        while (XMLChar1_0::isWhitespace(ch)) {

            if (++currentOffset == endOffset) {
                break;
            }

            ch = data[currentOffset];
        }

        if (currentOffset == endOffset) {
            break;
        }
        //
        // [28] ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::'
        //                  | NameTest | NodeType | Operator | FunctionName
        //                  | AxisName | Literal | Number | VariableReference
        //
        XMLByte chartype = (ch >= 0x80) ? (XMLByte)CHARTYPE_NONASCII : fASCIICharMap[ch];

        switch (chartype) {
        case CHARTYPE_OPEN_PAREN:       // '('
            addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_CLOSE_PAREN:      // ')'
            addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_PAREN);
            starIsMultiplyOperator = true;
            ++currentOffset;
            break;
        case CHARTYPE_OPEN_BRACKET:     // '['
            addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_BRACKET);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_CLOSE_BRACKET:    // ']'
            addToken(tokens, XercesXPath::EXPRTOKEN_CLOSE_BRACKET);
            starIsMultiplyOperator = true;
            ++currentOffset;
            break;
                //
                // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
                //                                         ^^^^^^^^^^
                //
        case CHARTYPE_PERIOD:           // '.', '..' or '.' Digits
            if (currentOffset + 1 == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
                break;
            }

            ch = data[currentOffset + 1];

            if (ch == chPeriod) {            // '..'
                addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset += 2;
            } else if (ch >= chDigit_0 && ch <= chDigit_9) {
                addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
                starIsMultiplyOperator = true;
                currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
            } else if (ch == chForwardSlash) {
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
            } else if (ch == chPipe) { // '|'
                addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                starIsMultiplyOperator = true;
                currentOffset++;
            } else if (XMLChar1_0::isWhitespace(ch)) {
                do {
                    if (++currentOffset == endOffset)
                        break;

                    ch = data[currentOffset];
                } while (XMLChar1_0::isWhitespace(ch));

                if (currentOffset == endOffset || ch == chPipe || ch == chForwardSlash) {
				    addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD);
                    starIsMultiplyOperator = true;
                    break;
                }
            } else {
                XMLCh str[2]= {ch, 0 };
                ThrowXMLwithMemMgr1(XPathException, XMLExcepts::XPath_InvalidChar, str, tokens->getMemoryManager());
            }

            break;
        case CHARTYPE_ATSIGN:           // '@'
            addToken(tokens, XercesXPath::EXPRTOKEN_ATSIGN);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_COMMA:            // ','
            addToken(tokens, XercesXPath::EXPRTOKEN_COMMA);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_COLON:            // '::'
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }
            ch = data[currentOffset];

            if (ch != chColon) {
                return false; // REVISIT
            }
            addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_SLASH:            // '/' and '//'
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chForwardSlash) { // '//'
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DOUBLE_SLASH);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_SLASH);
                starIsMultiplyOperator = false;
            }
            break;
        case CHARTYPE_UNION:            // '|'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_UNION);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_PLUS:             // '+'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_PLUS);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_MINUS:            // '-'
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MINUS);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_EQUAL:            // '='
            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_EQUAL);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_EXCLAMATION:      // '!='
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }

            ch = data[currentOffset];

            if (ch != chEqual) {
                return false; // REVISIT
            }

            addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_NOT_EQUAL);
            starIsMultiplyOperator = false;
            ++currentOffset;
            break;
        case CHARTYPE_LESS: // '<' and '<='
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chEqual) { // '<='
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS_EQUAL);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_LESS);
                starIsMultiplyOperator = false;
            }
            break;
        case CHARTYPE_GREATER: // '>' and '>='
            if (++currentOffset == endOffset) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
                starIsMultiplyOperator = false;
                break;
            }

            ch = data[currentOffset];

            if (ch == chEqual) { // '>='
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER_EQUAL);
                starIsMultiplyOperator = false;
                ++currentOffset;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_GREATER);
                starIsMultiplyOperator = false;
            }
            break;
        //
        // [29] Literal ::= '"' [^"]* '"' | "'" [^']* "'"
        //
        case CHARTYPE_QUOTE:            // '\"' or '\''
            {
                XMLCh qchar = ch;
                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }

                ch = data[currentOffset];

                XMLSize_t litOffset = currentOffset;
                while (ch != qchar) {
                    if (++currentOffset == endOffset) {
                        return false; // REVISIT
                    }

                    ch = data[currentOffset];
                }

                addToken(tokens, XercesXPath::EXPRTOKEN_LITERAL);
                starIsMultiplyOperator = true;

                dataBuffer.set(data + litOffset, currentOffset - litOffset);
                tokens->addElement(fStringPool->addOrFind(dataBuffer.getRawBuffer()));
                ++currentOffset;
                break;
            }
        //
        // [30] Number ::= Digits ('.' Digits?)? | '.' Digits
        // [31] Digits ::= [0-9]+
        //
        case CHARTYPE_DIGIT:
            addToken(tokens, XercesXPath::EXPRTOKEN_NUMBER);
            starIsMultiplyOperator = true;
            currentOffset = scanNumber(data, endOffset, currentOffset, tokens);
            break;
        //
        // [36] VariableReference ::= '$' QName
        //
        case CHARTYPE_DOLLAR:
            if (++currentOffset == endOffset) {
                return false; // REVISIT
            }
            nameOffset = currentOffset;
            currentOffset = scanNCName(data, endOffset, currentOffset);

            if (currentOffset == nameOffset) {
                return false; // REVISIT
            }

            if (currentOffset < endOffset) {
                ch = data[currentOffset];
            }
            else {
                ch = 0;
            }

            dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
            nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
            prefixHandle = -1;

            if (ch == chColon) {

                prefixHandle = nameHandle;
                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }
                nameOffset = currentOffset;
                currentOffset = scanNCName(data, endOffset, currentOffset);

                if (currentOffset == nameOffset) {
                    return false; // REVISIT
                }

                dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
                nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
            }
            addToken(tokens, XercesXPath::EXPRTOKEN_VARIABLE_REFERENCE);
            starIsMultiplyOperator = true;
            tokens->addElement(prefixHandle);
            tokens->addElement(nameHandle);
            break;
        //
        // [37] NameTest ::= '*' | NCName ':' '*' | QName
        // [34] MultiplyOperator ::= '*'
        //
        case CHARTYPE_STAR:             // '*'
            //
            // 3.7 Lexical Structure
            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then a * must be recognized as a MultiplyOperator.
            //
            // Otherwise, the token must not be recognized as a MultiplyOperator.
            //
            if (starIsMultiplyOperator) {
                addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MULT);
                starIsMultiplyOperator = false;
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_ANY);
                starIsMultiplyOperator = true;
            }

            ++currentOffset;
            break;
        //
        // NCName, QName and non-terminals
        //
        case CHARTYPE_NONASCII: // possibly a valid non-ascii 'Letter' (BaseChar | Ideographic)
        case CHARTYPE_LETTER:
        case CHARTYPE_UNDERSCORE:
            {
            //
            // 3.7 Lexical Structure
            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then an NCName must be recognized as an OperatorName.
            //
            //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
            //  then the token must be recognized as a NodeType or a FunctionName.
            //
            //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
            //  are ::, then the token must be recognized as an AxisName.
            //
            //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
            //  FunctionName, or an AxisName.
            //
            // [33] OperatorName ::= 'and' | 'or' | 'mod' | 'div'
            // [38] NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
            // [35] FunctionName ::= QName - NodeType
            // [6] AxisName ::= (see above)
            //
            // [37] NameTest ::= '*' | NCName ':' '*' | QName
            // [5] NCName ::= (Letter | '_') (NCNameChar)*
            // [?] NCNameChar ::= Letter | Digit | '.' | '-' | '_'  (ascii subset of 'NCNameChar')
            // [?] QName ::= (NCName ':')? NCName
            // [?] Letter ::= [A-Za-z]                              (ascii subset of 'Letter')
            // [?] Digit ::= [0-9]                                  (ascii subset of 'Digit')
            //
            nameOffset = currentOffset;
            currentOffset = scanNCName(data, endOffset, currentOffset);
            if (currentOffset == nameOffset) {
                return false; // REVISIT
			}

            if (currentOffset < endOffset) {
                ch = data[currentOffset];
            }
            else {
                ch = 0;
            }

            dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
            nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());

            bool isNameTestNCName = false;
            bool isAxisName = false;
            prefixHandle = -1;

            if (ch == chColon) {

                if (++currentOffset == endOffset) {
                    return false; // REVISIT
                }

                ch = data[currentOffset];

                if (ch == chAsterisk) {
                    if (++currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }

                    isNameTestNCName = true;
                } else if (ch == chColon) {
                    if (++currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }

                    isAxisName = true;
                } else {
                    prefixHandle = nameHandle;
                    nameOffset = currentOffset;
                    currentOffset = scanNCName(data, endOffset, currentOffset);
                    if (currentOffset == nameOffset) {
                        return false; // REVISIT
                    }
                    if (currentOffset < endOffset) {
                        ch = data[currentOffset];
                    }
                    else {
                        ch = 0;
                    }

                    dataBuffer.set(data + nameOffset, currentOffset - nameOffset);
                    nameHandle = fStringPool->addOrFind(dataBuffer.getRawBuffer());
                }
            }
            //
            // [39] ExprWhitespace ::= S
            //
            while (XMLChar1_0::isWhitespace(ch)) {
                if (++currentOffset == endOffset) {
                    break;
                }
                ch = data[currentOffset];
            }

            //
            //  If there is a preceding token and the preceding token is not one of @, ::, (, [, , or
            //  an Operator, then an NCName must be recognized as an OperatorName.
            //
            if (starIsMultiplyOperator) {
                if (nameHandle == fAndSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_AND);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fOrSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_OR);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fModSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_MOD);
                    starIsMultiplyOperator = false;
                } else if (nameHandle == fDivSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_OPERATOR_DIV);
                    starIsMultiplyOperator = false;
                } else {
                    return false; // REVISIT
                }

                if (isNameTestNCName) {
                    return false; // REVISIT - NCName:* where an OperatorName is required
                } else if (isAxisName) {
                    return false; // REVISIT - AxisName:: where an OperatorName is required
                }
                break;
            }
            //
            //  If the character following an NCName (possibly after intervening ExprWhitespace) is (,
            //  then the token must be recognized as a NodeType or a FunctionName.
            //
            if (ch == chOpenParen && !isNameTestNCName && !isAxisName) {
                if (nameHandle == fCommentSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_COMMENT);
                } else if (nameHandle == fTextSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_TEXT);
                } else if (nameHandle == fPISymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_PI);
                } else if (nameHandle == fNodeSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_NODETYPE_NODE);
                } else {
                    addToken(tokens, XercesXPath::EXPRTOKEN_FUNCTION_NAME);
                    tokens->addElement(prefixHandle);
                    tokens->addElement(nameHandle);
                }
                addToken(tokens, XercesXPath::EXPRTOKEN_OPEN_PAREN);
                starIsMultiplyOperator = false;
                ++currentOffset;
                break;
            }

            //
            //  If the two characters following an NCName (possibly after intervening ExprWhitespace)
            //  are ::, then the token must be recognized as an AxisName.
            //
            if (isAxisName ||
                (ch == chColon && currentOffset + 1 < endOffset &&
                 data[currentOffset + 1] == chColon)) {

                if (nameHandle == fAncestorSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR);
                } else if (nameHandle == fAncestorOrSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ANCESTOR_OR_SELF);
                } else if (nameHandle == fAttributeSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_ATTRIBUTE);
                } else if (nameHandle == fChildSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_CHILD);
                } else if (nameHandle == fDescendantSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT);
                } else if (nameHandle == fDescendantOrSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_DESCENDANT_OR_SELF);
                } else if (nameHandle == fFollowingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING);
                } else if (nameHandle == fFollowingSiblingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_FOLLOWING_SIBLING);
                } else if (nameHandle == fNamespaceSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_NAMESPACE);
                } else if (nameHandle == fParentSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PARENT);
                } else if (nameHandle == fPrecedingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING);
                } else if (nameHandle == fPrecedingSiblingSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_PRECEDING_SIBLING);
                } else if (nameHandle == fSelfSymbol) {
                    addToken(tokens, XercesXPath::EXPRTOKEN_AXISNAME_SELF);
                } else {
                    return false; // REVISIT
                }

                if (isNameTestNCName) {
                    return false; // REVISIT - "NCName:* ::" where "AxisName ::" is required
                }

                addToken(tokens, XercesXPath::EXPRTOKEN_DOUBLE_COLON);
                starIsMultiplyOperator = false;
                if (!isAxisName) {
                    currentOffset += 2;
                }
                break;
            }
            //
            //  Otherwise, the token must not be recognized as an OperatorName, a NodeType, a
            //  FunctionName, or an AxisName.
            //
            if (isNameTestNCName) {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_NAMESPACE);
                tokens->addElement(nameHandle);
            } else {
                addToken(tokens, XercesXPath::EXPRTOKEN_NAMETEST_QNAME);
                tokens->addElement(prefixHandle);
                tokens->addElement(nameHandle);
            }

            starIsMultiplyOperator = true;
            break;
            }
        default:
            {
            XMLCh str[2]= {ch, 0 };
            ThrowXMLwithMemMgr1(XPathException, XMLExcepts::XPath_InvalidChar, str, tokens->getMemoryManager());
            break;
            }
        }
    }

    return true;
}