void IGXMLScanner::scanCDSection()

in src/xercesc/internal/IGXMLScanner2.cpp [2630:2839]


void IGXMLScanner::scanCDSection()
{
    static const XMLCh CDataClose[] =
    {
            chCloseSquare, chCloseAngle, chNull
    };

    //  The next character should be the opening square bracket. If not
    //  issue an error, but then try to recover by skipping any whitespace
    //  and checking again.
    if (!fReaderMgr.skippedChar(chOpenSquare))
    {
        emitError(XMLErrs::ExpectedOpenSquareBracket);
        fReaderMgr.skipPastSpaces();

        // If we still don't find it, then give up, else keep going
        if (!fReaderMgr.skippedChar(chOpenSquare))
            return;
    }

    // Get a buffer for this
    XMLBufBid bbCData(&fBufMgr);

    //  We just scan forward until we hit the end of CDATA section sequence.
    //  CDATA is effectively a big escape mechanism so we don't treat markup
    //  characters specially here.
    bool            emittedError = false;
    bool    gotLeadingSurrogate = false;
    const ElemStack::StackElem* topElem = fElemStack.topElement();

    // Get the character data opts for the current element
    XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
    if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
    {
        // And see if the current element is a 'Children' style content model
        ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
        if(currType)
        {
            SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
            if(modelType == SchemaElementDecl::Children ||
               modelType == SchemaElementDecl::ElementOnlyEmpty)
                charOpts = XMLElementDecl::SpacesOk;
            else if(modelType == SchemaElementDecl::Empty)
                charOpts = XMLElementDecl::NoCharData;
        }
    } else // DTD grammar
        charOpts = topElem->fThisElement->getCharDataOpts();

    while (true)
    {
        const XMLCh nextCh = fReaderMgr.getNextChar();

        // Watch for unexpected end of file
        if (!nextCh)
        {
            emitError(XMLErrs::UnterminatedCDATASection);
            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
        }

        if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
        {
            // This document is standalone; this ignorable CDATA whitespace is forbidden.
            // XML 1.0, Section 2.9
            // And see if the current element is a 'Children' style content model
            if (topElem->fThisElement->isExternal()) {

                if (charOpts == XMLElementDecl::SpacesOk) // Element Content
                {
                    // Error - standalone should have a value of "no" as whitespace detected in an
                    // element type with element content whose element declaration was external
                    fValidator->emitError(XMLValid::NoWSForStandalone);
                    if(fGrammarType == Grammar::SchemaGrammarType)
                    {
                        if (getPSVIHandler())
                        {
                            // REVISIT:
                            // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
                        }
                    }
                }
            }
        }

        //  If this is a close square bracket it could be our closing
        //  sequence.
        if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
        {
            //  make sure we were not expecting a trailing surrogate.
            if (gotLeadingSurrogate)
                emitError(XMLErrs::Expected2ndSurrogateChar);

            if (fGrammarType == Grammar::SchemaGrammarType) {

                XMLSize_t xsLen = bbCData.getLen();
                const XMLCh* xsNormalized = bbCData.getRawBuffer();
                DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
                if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
                {
                    // normalize the character according to schema whitespace facet
                    ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
                    xsNormalized = fWSNormalizeBuf.getRawBuffer();
                    xsLen = fWSNormalizeBuf.getLen();
                    if (fNormalizeData && fValidate) {
                        bbCData.set(xsNormalized);
                    }
                }

                if (fValidate) {

                    // tell the schema validation about the character data for checkContent later
                    ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);

                    if (charOpts != XMLElementDecl::AllCharData)
                    {
                        // They definitely cannot handle any type of char data
                        fValidator->emitError(XMLValid::NoCharDataInCM);
                        if (getPSVIHandler())
                        {
                            // REVISIT:
                            // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
                        }
                    }
                }

                // call all active identity constraints
                if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
                    fContent.append(xsNormalized, xsLen);
                }
            }
            else {
                if (fValidate) {

                    if (charOpts != XMLElementDecl::AllCharData)
                    {
                        // They definitely cannot handle any type of char data
                        fValidator->emitError(XMLValid::NoCharDataInCM);
                    }
                }
            }

            // If we have a doc handler, call it
            if (fDocHandler)
            {
                fDocHandler->docCharacters(
                    bbCData.getRawBuffer(), bbCData.getLen(), true
                );
            }

            // And we are done
            break;
        }

        //  Make sure its a valid character. But if we've emitted an error
        //  already, don't bother with the overhead since we've already told
        //  them about it.
        if (!emittedError)
        {
            // Deal with surrogate pairs
            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
            {
                //  Its a leading surrogate. If we already got one, then
                //  issue an error, else set leading flag to make sure that
                //  we look for a trailing next time.
                if (gotLeadingSurrogate)
                    emitError(XMLErrs::Expected2ndSurrogateChar);
                else
                    gotLeadingSurrogate = true;
            }
            else
            {
                //  If its a trailing surrogate, make sure that we are
                //  prepared for that. Else, its just a regular char so make
                //  sure that we were not expected a trailing surrogate.
                if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                {
                    // Its trailing, so make sure we were expecting it
                    if (!gotLeadingSurrogate)
                        emitError(XMLErrs::Unexpected2ndSurrogateChar);
                }
                else
                {
                    //  Its just a char, so make sure we were not expecting a
                    //  trailing surrogate.
                    if (gotLeadingSurrogate)
                        emitError(XMLErrs::Expected2ndSurrogateChar);

                    // Its got to at least be a valid XML character
                    else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                    {
                        XMLCh tmpBuf[9];
                        XMLString::binToText
                        (
                            nextCh
                            , tmpBuf
                            , 8
                            , 16
                            , fMemoryManager
                        );
                        emitError(XMLErrs::InvalidCharacter, tmpBuf);
                        emittedError = true;
                    }
                }
                gotLeadingSurrogate = false;
            }
        }

        // Add it to the buffer
        bbCData.append(nextCh);
    }
}