void IGXMLScanner::scanDocTypeDecl()

in src/xercesc/internal/IGXMLScanner.cpp [1234:1549]


void IGXMLScanner::scanDocTypeDecl()
{
    //  We have a doc type. So, switch the Grammar.
    switchGrammar(XMLUni::fgDTDEntityString);

    if (fDocTypeHandler)
        fDocTypeHandler->resetDocType();

    // There must be some space after DOCTYPE
    bool skippedSomething;
    fReaderMgr.skipPastSpaces(skippedSomething);
    if (!skippedSomething)
    {
        emitError(XMLErrs::ExpectedWhitespace);

        // Just skip the Doctype declaration and return
        fReaderMgr.skipPastChar(chCloseAngle);
        return;
    }

    // Get a buffer for the root element
    XMLBufBid bbRootName(&fBufMgr);

    //  Get a name from the input, which should be the name of the root
    //  element of the upcoming content.
    int  colonPosition;
    bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
                                     fReaderMgr.getName(bbRootName.getBuffer());
    if (!validName)
    {
        if (bbRootName.isEmpty())
            emitError(XMLErrs::NoRootElemInDOCTYPE);
        else
            emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
        fReaderMgr.skipPastChar(chCloseAngle);
        return;
    }

    //  Store the root element name for later check
    setRootElemName(bbRootName.getRawBuffer());

    //  This element obviously is not going to exist in the element decl
    //  pool yet, but we need to call docTypeDecl. So force it into
    //  the element decl pool, marked as being there because it was in
    //  the DOCTYPE. Later, when its declared, the status will be updated.
    //
    //  Only do this if we are not reusing the validator! If we are reusing,
    //  then look it up instead. It has to exist!
    MemoryManager* const  rootDeclMgr =
        fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;

    DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
    (
        bbRootName.getRawBuffer()
        , fEmptyNamespaceId
        , DTDElementDecl::Any
        , rootDeclMgr
    );

    Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
    rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
    rootDecl->setExternalElemDeclaration(true);
    if(!fUseCachedGrammar)
    {
        fGrammar->putElemDecl(rootDecl);
        rootDeclJanitor.release();
    } else
    {
        // attach this to the undeclared element pool so that it gets deleted
        XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
        if (elemDecl)
        {
            rootDecl->setId(elemDecl->getId());
        }
        else
        {
            rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
            rootDeclJanitor.release();
        }
    }

    // Skip any spaces after the name
    fReaderMgr.skipPastSpaces();

    //  And now if we are looking at a >, then we are done. It is not
    //  required to have an internal or external subset, though why you
    //  would not escapes me.
    if (fReaderMgr.skippedChar(chCloseAngle)) {

        //  If we have a doc type handler and advanced callbacks are enabled,
        //  call the doctype event.
        if (fDocTypeHandler)
            fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
        return;
    }

    // either internal/external subset
    if (fValScheme == Val_Auto && !fValidate)
        fValidate = true;

    bool    hasIntSubset = false;
    bool    hasExtSubset = false;
    XMLCh*  sysId = 0;
    XMLCh*  pubId = 0;

    DTDScanner dtdScanner
    (
        (DTDGrammar*) fGrammar
        , fDocTypeHandler
        , fGrammarPoolMemoryManager
        , fMemoryManager
    );
    dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);

    //  If the next character is '[' then we have no external subset cause
    //  there is no system id, just the opening character of the internal
    //  subset. Else, has to be an id.
    //
    // Just look at the next char, don't eat it.
    if (fReaderMgr.peekNextChar() == chOpenSquare)
    {
        hasIntSubset = true;
    }
    else
    {
        // Indicate we have an external subset
        hasExtSubset = true;
        fHasNoDTD = false;

        // Get buffers for the ids
        XMLBufBid bbPubId(&fBufMgr);
        XMLBufBid bbSysId(&fBufMgr);

        // Get the external subset id
        if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
        {
            fReaderMgr.skipPastChar(chCloseAngle);
            return;
        }

        // Get copies of the ids we got
        pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
        sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
    }

    // Insure that the ids get cleaned up, if they got allocated
    ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
    ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);

    if (hasExtSubset)
    {
        // Skip spaces and check again for the opening of an internal subset
        fReaderMgr.skipPastSpaces();

        // Just look at the next char, don't eat it.
        if (fReaderMgr.peekNextChar() == chOpenSquare) {
            hasIntSubset = true;
        }
    }

    //  If we have a doc type handler and advanced callbacks are enabled,
    //  call the doctype event.
    if (fDocTypeHandler)
        fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);

    //  Ok, if we had an internal subset, we are just past the [ character
    //  and need to parse that first.
    if (hasIntSubset)
    {
        // Eat the opening square bracket
        fReaderMgr.getNextChar();

        checkInternalDTD(hasExtSubset, sysId, pubId);

        //  And try to scan the internal subset. If we fail, try to recover
        //  by skipping forward tot he close angle and returning.
        if (!dtdScanner.scanInternalSubset())
        {
            fReaderMgr.skipPastChar(chCloseAngle);
            return;
        }

        //  Do a sanity check that some expanded PE did not propogate out of
        //  the doctype. This could happen if it was terminated early by bad
        //  syntax.
        if (fReaderMgr.getReaderDepth() > 1)
        {
            emitError(XMLErrs::PEPropogated);

            // Ask the reader manager to pop back down to the main level
            fReaderMgr.cleanStackBackTo(1);
        }

        fReaderMgr.skipPastSpaces();
    }

    // And that should leave us at the closing > of the DOCTYPE line
    if (!fReaderMgr.skippedChar(chCloseAngle))
    {
        //  Do a special check for the common scenario of an extra ] char at
        //  the end. This is easy to recover from.
        if (fReaderMgr.skippedChar(chCloseSquare)
        &&  fReaderMgr.skippedChar(chCloseAngle))
        {
            emitError(XMLErrs::ExtraCloseSquare);
        }
         else
        {
            emitError(XMLErrs::UnterminatedDOCTYPE);
            fReaderMgr.skipPastChar(chCloseAngle);
        }
    }

    //  If we had an external subset, then we need to deal with that one
    //  next. If we are reusing the validator, then don't scan it.
    if (hasExtSubset) {

        InputSource* srcUsed=0;
        Janitor<InputSource> janSrc(srcUsed);
        // If we had an internal subset and we're using the cached grammar, it
        // means that the ignoreCachedDTD is set, so we ignore the cached
        // grammar
        if (fUseCachedGrammar && !hasIntSubset)
        {
            srcUsed = resolveSystemId(sysId, pubId);
            if (srcUsed) {
                janSrc.reset(srcUsed);
                Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());

                if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {

                    fDTDGrammar = (DTDGrammar*) grammar;
                    fGrammar = fDTDGrammar;
                    fValidator->setGrammar(fGrammar);
                    // If we don't report at least the external subset boundaries,
                    // an advanced document handler cannot know when the DTD end,
                    // since we've already sent a doctype decl that indicates there's
                    // there's an external subset.
                    if (fDocTypeHandler)
                    {
                        fDocTypeHandler->startExtSubset();
                        fDocTypeHandler->endExtSubset();
                    }

                    return;
                }
            }
        }

        if (fLoadExternalDTD || fValidate)
        {
            // And now create a reader to read this entity
            XMLReader* reader;
            if (srcUsed) {
                reader = fReaderMgr.createReader
                        (
                            *srcUsed
                            , false
                            , XMLReader::RefFrom_NonLiteral
                            , XMLReader::Type_General
                            , XMLReader::Source_External
                            , fCalculateSrcOfs
                            , fLowWaterMark
                        );
            }
            else {
                reader = fReaderMgr.createReader
                        (
                            sysId
                            , pubId
                            , false
                            , XMLReader::RefFrom_NonLiteral
                            , XMLReader::Type_General
                            , XMLReader::Source_External
                            , srcUsed
                            , fCalculateSrcOfs
                            , fLowWaterMark
                            , fDisableDefaultEntityResolution
                        );
                janSrc.reset(srcUsed);
            }
            //  If it failed then throw an exception
            if (!reader)
                ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);

            if (fToCacheGrammar) {

                unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
                const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);

                fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
                fGrammarResolver->putGrammar(fGrammar);
            }

            //  In order to make the processing work consistently, we have to
            //  make this look like an external entity. So create an entity
            //  decl and fill it in and push it with the reader, as happens
            //  with an external entity. Put a janitor on it to insure it gets
            //  cleaned up. The reader manager does not adopt them.
            const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
            DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
            declDTD->setSystemId(sysId);
            declDTD->setIsExternal(true);

            // Mark this one as a throw at end
            reader->setThrowAtEnd(true);

            // And push it onto the stack, with its pseudo name
            fReaderMgr.pushReaderAdoptEntity(reader, declDTD);

            // Tell it its not in an include section
            dtdScanner.scanExtSubsetDecl(false, true);
        }
    }
}