bool WFXMLScanner::scanStartTagNS()

in src/xercesc/internal/WFXMLScanner.cpp [1064:1503]


bool WFXMLScanner::scanStartTagNS(bool& gotData)
{
    //  Assume we will still have data until proven otherwise. It will only
    //  ever be false if this is the root and its empty.
    gotData = true;

    //  The current position is after the open bracket, so we need to read in
    //  in the element name.
    int colonPosition;
    if (!fReaderMgr.getQName(fQNameBuf, &colonPosition))
    {        
        if (fQNameBuf.isEmpty())
            emitError(XMLErrs::ExpectedElementName);
        else
            emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
        fReaderMgr.skipToChar(chOpenAngle);
        return false;
    }

    // See if its the root element
    const bool isRoot = fElemStack.isEmpty();

	// Assume it won't be an empty tag
    bool isEmpty = false;

    // Skip any whitespace after the name
    fReaderMgr.skipPastSpaces();

    //  Lets try to look up the element
    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
    XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf);

    if (!elemDecl) {
        if (!XMLString::compareNString(qnameRawBuf, XMLUni::fgXMLNSColonString, 6))
            emitError(XMLErrs::NoXMLNSAsElementPrefix, qnameRawBuf);

        if (fElementIndex < fElements->size()) {
            elemDecl = fElements->elementAt(fElementIndex);
        }
        else {
            elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
            (
                fGrammarPoolMemoryManager
            );
            fElements->addElement(elemDecl);
        }

        elemDecl->setElementName(qnameRawBuf, fEmptyNamespaceId);
        fElementLookup->put((void*)elemDecl->getFullName(), elemDecl);
        fElementIndex++;
    }

    // Expand the element stack and add the new element
    fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());

    // reset NS attribute list
    fAttrNSList->removeAllElements();

    // We loop until we either see a /> or >, handling attribute/value
    // pairs until we get there.
    XMLSize_t attCount = 0;
    XMLSize_t curAttListSize = fAttrList->size();
    while (true)
    {
        // And get the next non-space character
        XMLCh nextCh = fReaderMgr.peekNextChar();

        //  If the next character is not a slash or closed angle bracket,
        //  then it must be whitespace, since whitespace is required
        //  between the end of the last attribute and the name of the next
        //  one.
        if (attCount)
        {
            if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
            {
                bool bFoundSpace;
                fReaderMgr.skipPastSpaces(bFoundSpace);
                if (!bFoundSpace)
                {
                    // Emit the error but keep on going
                    emitError(XMLErrs::ExpectedWhitespace);
                }
                // Ok, peek another char
                nextCh = fReaderMgr.peekNextChar();
            }
        }

        //  Ok, here we first check for any of the special case characters.
        //  If its not one, then we do the normal case processing, which
        //  assumes that we've hit an attribute value, Otherwise, we do all
        //  the special case checks.
        if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
        {
            //  Assume its going to be an attribute, so get a name from
            //  the input.
            int colonPosition;
            if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
            {                
                if (fAttNameBuf.isEmpty())
                    emitError(XMLErrs::ExpectedAttrName);
                else
                    emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer()); 
                fReaderMgr.skipPastChar(chCloseAngle);
                return false;
            }

            // And next must be an equal sign
            if (!scanEq())
            {
                static const XMLCh tmpList[] =
                {
                    chSingleQuote, chDoubleQuote, chCloseAngle
                    , chOpenAngle, chForwardSlash, chNull
                };

                emitError(XMLErrs::ExpectedEqSign);

                //  Try to sync back up by skipping forward until we either
                //  hit something meaningful.
                const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
                {
                    // Jump back to top for normal processing of these
                    continue;
                }
                else if ((chFound == chSingleQuote)
                      ||  (chFound == chDoubleQuote)
                      ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                {
                    // Just fall through assuming that the value is to follow
                }
                else if (chFound == chOpenAngle)
                {
                    // Assume a malformed tag and that new one is starting
                    emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                    return false;
                }
                else
                {
                    // Something went really wrong
                    return false;
                }
            }

            //  See if this attribute is declared more than one for this element.
            const XMLCh* attNameRawBuf = fAttNameBuf.getRawBuffer();
            XMLSize_t attNameHash = XMLString::hash(attNameRawBuf, 109);
            if (attCount) {

                for (XMLSize_t k=0; k < attCount; k++) {

                    if (fAttrNameHashList->elementAt(k) == attNameHash) {
                        if (XMLString::equals(
                                fAttrList->elementAt(k)->getQName()
                                , attNameRawBuf))
                        {
                            emitError
                            (
                                XMLErrs::AttrAlreadyUsedInSTag
                                , attNameRawBuf
                                , qnameRawBuf
                            );
                            break;
                        }
                    }
                }
            }

            //  Skip any whitespace before the value and then scan the att
            //  value. This will come back normalized with entity refs and
            //  char refs expanded.
            fReaderMgr.skipPastSpaces();
            if (!scanAttValue(attNameRawBuf, fAttValueBuf))
            {
                static const XMLCh tmpList[] =
                {
                    chCloseAngle, chOpenAngle, chForwardSlash, chNull
                };

                emitError(XMLErrs::ExpectedAttrValue);

                //  It failed, so lets try to get synced back up. We skip
                //  forward until we find some whitespace or one of the
                //  chars in our list.
                const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                if ((chFound == chCloseAngle)
                ||  (chFound == chForwardSlash)
                ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                {
                    //  Just fall through and process this attribute, though
                    //  the value will be "".
                }
                else if (chFound == chOpenAngle)
                {
                    // Assume a malformed tag and that new one is starting
                    emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                    return false;
                }
                else
                {
                    // Something went really wrong
                    return false;
                }
            }

            //  Add this attribute to the attribute list that we use to
            //  pass them to the handler. We reuse its existing elements
            //  but expand it as required.
            const XMLCh* attValueRawBuf = fAttValueBuf.getRawBuffer();
            XMLAttr* curAtt = 0;
            if (attCount >= curAttListSize)
            {
                curAtt = new (fMemoryManager) XMLAttr
                (
                    fEmptyNamespaceId
                    , attNameRawBuf
                    , attValueRawBuf
                    , XMLAttDef::CData
                    , true
                    , fMemoryManager
                );
                fAttrList->addElement(curAtt);
                fAttrNameHashList->addElement(attNameHash);
            }
            else
            {
                curAtt = fAttrList->elementAt(attCount);
                curAtt->set
                (
                    fEmptyNamespaceId
                    , attNameRawBuf
                    , attValueRawBuf
                );
                curAtt->setSpecified(true);
                fAttrNameHashList->setElementAt(attNameHash, attCount);
            }

            // Map prefix to namespace
            const XMLCh* attPrefix = curAtt->getPrefix();
            const XMLCh* attLocalName = curAtt->getName();
            const XMLCh* namespaceURI = fAttValueBuf.getRawBuffer();

            if (attPrefix && *attPrefix) {
                if (XMLString::equals(attPrefix, XMLUni::fgXMLString)) {
                    curAtt->setURIId(fXMLNamespaceId);
                }
                else if (XMLString::equals(attPrefix, XMLUni::fgXMLNSString)) {

                    if (XMLString::equals(attLocalName, XMLUni::fgXMLNSString))
                        emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
                    else if (XMLString::equals(attLocalName, XMLUni::fgXMLString)) {
                        if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
                            emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
                    }

                    if (!namespaceURI)
                        emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);
                    else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
                        emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);

                    fElemStack.addPrefix
                    (
                        attLocalName
                        , fURIStringPool->addOrFind(namespaceURI)
                    );
                    curAtt->setURIId(fXMLNSNamespaceId);
                }
                else {
                    fAttrNSList->addElement(curAtt);
                }
            }
            else {
                if (XMLString::equals(XMLUni::fgXMLNSString, attLocalName)) {

                    if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
                        emitError(XMLErrs::NoUseOfxmlnsURI);
                    else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
                        emitError(XMLErrs::XMLURINotMatchXMLPrefix);

                    fElemStack.addPrefix
                    (
                        XMLUni::fgZeroLenString
                        , fURIStringPool->addOrFind(namespaceURI)
                    );
                }
            }

            // increment attribute count
            attCount++;

            // And jump back to the top of the loop
            continue;
        }

        //  It was some special case character so do all of the checks and
        //  deal with it.
        if (!nextCh)
            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

        if (nextCh == chForwardSlash)
        {
            fReaderMgr.getNextChar();
            isEmpty = true;
            if (!fReaderMgr.skippedChar(chCloseAngle))
                emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
            break;
        }
        else if (nextCh == chCloseAngle)
        {
            fReaderMgr.getNextChar();
            break;
        }
        else if (nextCh == chOpenAngle)
        {
            //  Check for this one specially, since its going to be common
            //  and it is kind of auto-recovering since we've already hit the
            //  next open bracket, which is what we would have seeked to (and
            //  skipped this whole tag.)
            emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
            break;
        }
        else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
        {
            //  Check for this one specially, which is probably a missing
            //  attribute name, e.g. ="value". Just issue expected name
            //  error and eat the quoted string, then jump back to the
            //  top again.
            emitError(XMLErrs::ExpectedAttrName);
            fReaderMgr.getNextChar();
            fReaderMgr.skipQuotedString(nextCh);
            fReaderMgr.skipPastSpaces();
            continue;
        }
    }

    // Handle provided attributes that we did not map their prefixes
    for (unsigned int i=0; i < fAttrNSList->size(); i++) {

        XMLAttr* providedAttr = fAttrNSList->elementAt(i);

        providedAttr->setURIId
        (
	        resolvePrefix
            (
                providedAttr->getPrefix(),
                ElemStack::Mode_Attribute
            )
        );
    }

    if(attCount) {

        //
        // Decide if to use hash table to do duplicate checking
        //
        bool toUseHashTable = false;
        setAttrDupChkRegistry(attCount, toUseHashTable);

        // check for duplicate namespace attributes:
        // by checking for qualified names with the same local part and with prefixes 
        // which have been bound to namespace names that are identical. 
        XMLAttr* loopAttr;
        XMLAttr* curAtt;
        for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) {
            loopAttr = fAttrList->elementAt(attrIndex);

            if (!toUseHashTable)
            {
                for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
                    curAtt = fAttrList->elementAt(curAttrIndex);
                    if (curAtt->getURIId() == loopAttr->getURIId() &&
                        XMLString::equals(curAtt->getName(), loopAttr->getName())) {
                        emitError
                            ( 
                            XMLErrs::AttrAlreadyUsedInSTag
                            , curAtt->getName()
                            , elemDecl->getFullName()
                            );
                    }
                }
            }
            else 
            {
                if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId()))
                {
                    emitError
                    ( 
                    XMLErrs::AttrAlreadyUsedInSTag
                    , loopAttr->getName()
                    , elemDecl->getFullName()
                    );
                }

                fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr);
            }
        }  
    }

    // Resolve the qualified name to a URI.
    unsigned int uriId = resolvePrefix
    (
        elemDecl->getElementName()->getPrefix()
        , ElemStack::Mode_Element
    );

    // Now we can update the element stack
    fElemStack.setCurrentURI(uriId);

    // Tell the document handler about this start tag
    if (fDocHandler)
    {
        fDocHandler->startElement
        (
            *elemDecl
            , uriId
            , elemDecl->getElementName()->getPrefix()
            , *fAttrList
            , attCount
            , isEmpty
            , isRoot
        );
    }

    //  If empty, validate content right now if we are validating and then
    //  pop the element stack top. Else, we have to update the current stack
    //  top's namespace mapping elements.
    if (isEmpty)
    {
        // Pop the element stack back off since it'll never be used now
        fElemStack.popTop();

        // If the elem stack is empty, then it was an empty root
        if (isRoot)
            gotData = false;
    }

    return true;
}