in src/xercesc/validators/DTD/DTDScanner.cpp [2460:2692]
void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect, const bool isDTD)
{
// Indicate we are in the external subset now
FlagJanitor<bool> janContentFlag(&fInternalSubset, false);
bool bAcceptDecl = !inIncludeSect;
// Get a buffer for whitespace
XMLBufBid bbSpace(fBufMgr);
//
// If we have a doc type handler and we are not being called recursively
// to handle an include section, tell it the ext subset starts
//
if (fDocTypeHandler && isDTD && !inIncludeSect)
fDocTypeHandler->startExtSubset();
//
// We have to play a trick here if the current entity we are parsing
// is a PE. Because the spooling code will put out a whitespace before
// and after an expanded PE if its being scanned outside the context of
// a literal entity, this will confuse this external subset code.
//
// So, we see if that is what is happening and, if so, eat the single
// space, a check for the <?xml string. If we find it, we parse that
// markup right now and put the space back.
//
if (fReaderMgr->isScanningPERefOutOfLiteral())
{
if (fReaderMgr->skippedSpace())
{
if (fScanner->checkXMLDecl(true))
{
scanTextDecl();
bAcceptDecl = false;
// <TBD> Figure out how to do this
// fReaderMgr->unGet(chSpace);
}
}
}
// Get the current reader number
const XMLSize_t orgReader = fReaderMgr->getCurrentReaderNum();
//
// Loop until we hit the end of the external subset entity. Note that
// we use a double loop here in order to avoid the overhead of doing
// the exception setup/teardown work on every loop.
//
bool inMarkup = false;
bool inCharData = false;
while (true)
{
bool bDoBreak=false; // workaround for Borland bug with 'break' in 'catch'
try
{
while (true)
{
XMLCh nextCh;
try {
nextCh = fReaderMgr->peekNextChar();
}
catch (XMLException& ex) {
fScanner->emitError(XMLErrs::XMLException_Fatal, ex.getCode(), ex.getMessage(), NULL, NULL);
nextCh = chNull;
}
if (!nextCh)
{
return; // nothing left
}
else if (nextCh == chOpenAngle)
{
// Get the reader we started this on
// XML 1.0 P28a Well-formedness constraint: PE Between Declarations
const XMLSize_t orgReader = fReaderMgr->getCurrentReaderNum();
bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);
//
// Now scan the markup. Set the flag so that we will know that
// we were in markup if an end of entity exception occurs.
//
fReaderMgr->getNextChar();
inMarkup = true;
scanMarkupDecl(bAcceptDecl);
inMarkup = false;
//
// And see if we got back to the same level. If not, then its
// a partial markup error.
//
if (fReaderMgr->getCurrentReaderNum() != orgReader){
if (wasInPE)
fScanner->emitError(XMLErrs::PEBetweenDecl);
else if (fScanner->getValidationScheme() == XMLScanner::Val_Always)
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
}
}
else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
{
//
// If we have a doc type handler, and advanced callbacks are
// enabled, then gather up whitespace and call back. Otherwise
// just skip whitespaces.
//
if (fDocTypeHandler)
{
inCharData = true;
fReaderMgr->getSpaces(bbSpace.getBuffer());
inCharData = false;
fDocTypeHandler->doctypeWhitespace
(
bbSpace.getRawBuffer()
, bbSpace.getLen()
);
}
else
{
//
// If we hit an end of entity in the middle of white
// space, that's fine. We'll just come back in here
// again on the next round and skip some more.
//
fReaderMgr->skipPastSpaces();
}
}
else if (nextCh == chPercent)
{
//
// Expand (and scan if external) the reference value. Tell
// it to throw an end of entity exception at the end of the
// entity.
//
fReaderMgr->getNextChar();
expandPERef(true, false, false, true);
}
else if (inIncludeSect && (nextCh == chCloseSquare))
{
//
// Its the end of a conditional include section. So scan it and
// decrement the include depth counter.
//
fReaderMgr->getNextChar();
if (!fReaderMgr->skippedChar(chCloseSquare))
{
fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
fReaderMgr->skipPastChar(chCloseAngle);
}
else if (!fReaderMgr->skippedChar(chCloseAngle))
{
fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
fReaderMgr->skipPastChar(chCloseAngle);
}
return;
}
else
{
fReaderMgr->getNextChar();
if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
, fMemoryManager
);
fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
else
{
fScanner->emitError(XMLErrs::InvalidDocumentStructure);
}
// Try to get realigned
static const XMLCh toSkip[] =
{
chPercent, chCloseSquare, chOpenAngle, chNull
};
fReaderMgr->skipUntilInOrWS(toSkip);
}
bAcceptDecl = false;
}
}
catch(const EndOfEntityException& toCatch)
{
//
// If the external entity ended while we were in markup, then that's
// a partial markup error.
//
if (inMarkup)
{
fScanner->emitError(XMLErrs::PartialMarkupInEntity);
inMarkup = false;
}
// If we were in char data, then send what we got
if (inCharData)
{
// Send what we got, then rethrow
if (fDocTypeHandler)
{
fDocTypeHandler->doctypeWhitespace
(
bbSpace.getRawBuffer()
, bbSpace.getLen()
);
}
inCharData = false;
}
//
// If the entity that just ended was the entity that we started
// on, then this is the end of the external subset.
//
if (orgReader == toCatch.getReaderNum())
bDoBreak=true;
}
if(bDoBreak)
break;
}
// If we have a doc type handler, tell it the ext subset ends
if (fDocTypeHandler && isDTD && !inIncludeSect)
fDocTypeHandler->endExtSubset();
}