void DTDScanner::scanExtSubsetDecl()

in src/xercesc/validators/DTD/DTDScanner.cpp [2460:2692]


void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect, const bool isDTD)
{
    // Indicate we are in the external subset now
    FlagJanitor<bool> janContentFlag(&fInternalSubset, false);


    bool bAcceptDecl = !inIncludeSect;

    // Get a buffer for whitespace
    XMLBufBid bbSpace(fBufMgr);

    //
    //  If we have a doc type handler and we are not being called recursively
    //  to handle an include section, tell it the ext subset starts
    //
    if (fDocTypeHandler && isDTD && !inIncludeSect)
        fDocTypeHandler->startExtSubset();

    //
    //  We have to play a trick here if the current entity we are parsing
    //  is a PE. Because the spooling code will put out a whitespace before
    //  and after an expanded PE if its being scanned outside the context of
    //  a literal entity, this will confuse this external subset code.
    //
    //  So, we see if that is what is happening and, if so, eat the single
    //  space, a check for the <?xml string. If we find it, we parse that
    //  markup right now and put the space back.
    //
    if (fReaderMgr->isScanningPERefOutOfLiteral())
    {
        if (fReaderMgr->skippedSpace())
        {
            if (fScanner->checkXMLDecl(true))
            {
                scanTextDecl();
                bAcceptDecl = false;

                // <TBD> Figure out how to do this
                // fReaderMgr->unGet(chSpace);
            }
        }
    }

    // Get the current reader number
    const XMLSize_t orgReader = fReaderMgr->getCurrentReaderNum();

    //
    //  Loop until we hit the end of the external subset entity. Note that
    //  we use a double loop here in order to avoid the overhead of doing
    //  the exception setup/teardown work on every loop.
    //
    bool inMarkup = false;
    bool inCharData = false;
    while (true)
    {
        bool bDoBreak=false;    // workaround for Borland bug with 'break' in 'catch'
        try
        {
            while (true)
            {
                XMLCh nextCh;
                
                try {
                    nextCh = fReaderMgr->peekNextChar();
                }
                catch (XMLException& ex) {
                    fScanner->emitError(XMLErrs::XMLException_Fatal, ex.getCode(), ex.getMessage(), NULL, NULL);
                    nextCh = chNull;
                }

                if (!nextCh)
                {
                    return; // nothing left
                }
                else if (nextCh == chOpenAngle)
                {
                    // Get the reader we started this on
                    // XML 1.0 P28a Well-formedness constraint: PE Between Declarations
                    const XMLSize_t orgReader = fReaderMgr->getCurrentReaderNum();
                    bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);

                    //
                    //  Now scan the markup. Set the flag so that we will know that
                    //  we were in markup if an end of entity exception occurs.
                    //
                    fReaderMgr->getNextChar();
                    inMarkup = true;
                    scanMarkupDecl(bAcceptDecl);
                    inMarkup = false;

                    //
                    //  And see if we got back to the same level. If not, then its
                    //  a partial markup error.
                    //
                    if (fReaderMgr->getCurrentReaderNum() != orgReader){
                        if (wasInPE)
                            fScanner->emitError(XMLErrs::PEBetweenDecl);
                        else if (fScanner->getValidationScheme() == XMLScanner::Val_Always)
                            fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
                    }

                }
                else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
                {
                    //
                    //  If we have a doc type handler, and advanced callbacks are
                    //  enabled, then gather up whitespace and call back. Otherwise
                    //  just skip whitespaces.
                    //
                    if (fDocTypeHandler)
                    {
                        inCharData = true;
                        fReaderMgr->getSpaces(bbSpace.getBuffer());
                        inCharData = false;

                        fDocTypeHandler->doctypeWhitespace
                        (
                            bbSpace.getRawBuffer()
                            , bbSpace.getLen()
                        );
                    }
                    else
                    {
                        //
                        //  If we hit an end of entity in the middle of white
                        //  space, that's fine. We'll just come back in here
                        //  again on the next round and skip some more.
                        //
                        fReaderMgr->skipPastSpaces();
                    }
                }
                else if (nextCh == chPercent)
                {
                    //
                    //  Expand (and scan if external) the reference value. Tell
                    //  it to throw an end of entity exception at the end of the
                    //  entity.
                    //
                    fReaderMgr->getNextChar();
                    expandPERef(true, false, false, true);
                }
                else if (inIncludeSect && (nextCh == chCloseSquare))
                {
                    //
                    //  Its the end of a conditional include section. So scan it and
                    //  decrement the include depth counter.
                    //
                    fReaderMgr->getNextChar();
                    if (!fReaderMgr->skippedChar(chCloseSquare))
                    {
                        fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                        fReaderMgr->skipPastChar(chCloseAngle);
                    }
                    else if (!fReaderMgr->skippedChar(chCloseAngle))
                    {
                        fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                        fReaderMgr->skipPastChar(chCloseAngle);
                    }
                    return;
                }
                else
                {
                    fReaderMgr->getNextChar();
                    if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
                    {
                        XMLCh tmpBuf[9];
                        XMLString::binToText
                        (
                            nextCh
                            , tmpBuf
                            , 8
                            , 16
                            , fMemoryManager
                        );
                        fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                    }
                    else
                    {
                        fScanner->emitError(XMLErrs::InvalidDocumentStructure);
                    }

                    // Try to get realigned
                    static const XMLCh toSkip[] =
                    {
                        chPercent, chCloseSquare, chOpenAngle, chNull
                    };
                    fReaderMgr->skipUntilInOrWS(toSkip);
                }
                bAcceptDecl = false;
            }
        }
        catch(const EndOfEntityException& toCatch)
        {
            //
            //  If the external entity ended while we were in markup, then that's
            //  a partial markup error.
            //
            if (inMarkup)
            {
                fScanner->emitError(XMLErrs::PartialMarkupInEntity);
                inMarkup = false;
            }

            // If we were in char data, then send what we got
            if (inCharData)
            {
                // Send what we got, then rethrow
                if (fDocTypeHandler)
                {
                    fDocTypeHandler->doctypeWhitespace
                    (
                        bbSpace.getRawBuffer()
                        , bbSpace.getLen()
                    );
                }
                inCharData = false;
            }

            //
            //  If the entity that just ended was the entity that we started
            //  on, then this is the end of the external subset.
            //
            if (orgReader == toCatch.getReaderNum())
                bDoBreak=true;
        }
        if(bDoBreak)
            break;
    }

    // If we have a doc type handler, tell it the ext subset ends
    if (fDocTypeHandler && isDTD && !inIncludeSect)
        fDocTypeHandler->endExtSubset();
}