in src/xercesc/internal/IGXMLScanner2.cpp [2842:3068]
void IGXMLScanner::scanCharData(XMLBuffer& toUse)
{
// We have to watch for the stupid ]]> sequence, which is illegal in
// character data. So this is a little state machine that handles that.
enum States
{
State_Waiting
, State_GotOne
, State_GotTwo
};
// Reset the buffer before we start
toUse.reset();
// Turn on the 'throw at end' flag of the reader manager
ThrowEOEJanitor jan(&fReaderMgr, true);
// In order to be more efficient we have to use kind of a deeply nested
// set of blocks here. The outer block puts on a try and catches end of
// entity exceptions. The inner loop is the per-character loop. If we
// put the try inside the inner loop, it would work but would require
// the exception handling code setup/teardown code to be invoked for
// each character.
XMLCh nextCh;
XMLCh secondCh = 0;
States curState = State_Waiting;
bool escaped = false;
bool gotLeadingSurrogate = false;
bool notDone = true;
while (notDone)
{
try
{
while (true)
{
// Eat through as many plain content characters as possible without
// needing special handling. Moving most content characters here,
// in this one call, rather than running the overall loop once
// per content character, is a speed optimization.
if (curState == State_Waiting && !gotLeadingSurrogate)
{
fReaderMgr.movePlainContentChars(toUse);
}
// Try to get another char from the source
// The code from here on down covers all contengencies,
if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
{
// If we were waiting for a trailing surrogate, its an error
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
notDone = false;
break;
}
// Watch for a reference. Note that the escapement mechanism
// is ignored in this content.
escaped = false;
if (nextCh == chAmpersand)
{
sendCharData(toUse);
// Turn off the throwing at the end of entity during this
ThrowEOEJanitor jan(&fReaderMgr, false);
if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
{
gotLeadingSurrogate = false;
continue;
}
else
{
if (escaped && !fElemStack.isEmpty())
fElemStack.setReferenceEscaped();
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
// Deal with surrogate pairs
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
emitError(XMLErrs::Expected2ndSurrogateChar);
// Make sure the returned char is a valid XML char
if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
, fMemoryManager
);
emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
// Keep the state machine up to date
if (!escaped)
{
if (nextCh == chCloseSquare)
{
if (curState == State_Waiting)
curState = State_GotOne;
else if (curState == State_GotOne)
curState = State_GotTwo;
}
else if (nextCh == chCloseAngle)
{
if (curState == State_GotTwo)
emitError(XMLErrs::BadSequenceInCharData);
curState = State_Waiting;
}
else
{
curState = State_Waiting;
}
}
else
{
curState = State_Waiting;
}
// Add this char to the buffer
toUse.append(nextCh);
if (secondCh)
{
toUse.append(secondCh);
secondCh=0;
}
}
}
catch(const EndOfEntityException& toCatch)
{
// Some entity ended, so we have to send any accumulated
// chars and send an end of entity event.
sendCharData(toUse);
gotLeadingSurrogate = false;
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
// Check the validity constraints as per XML 1.0 Section 2.9
if (fValidate && fStandalone)
{
// See if the text contains whitespace
// Get the raw data we need for the callback
const XMLCh* rawBuf = toUse.getRawBuffer();
const XMLSize_t len = toUse.getLen();
const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
if (isSpaces)
{
// And see if the current element is a 'Children' style content model
const ElemStack::StackElem* topElem = fElemStack.topElement();
if (topElem->fThisElement->isExternal()) {
// Get the character data opts for the current element
XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
{
// And see if the current element is a 'Children' style content model
ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
if(currType)
{
SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
if(modelType == SchemaElementDecl::Children ||
modelType == SchemaElementDecl::ElementOnlyEmpty)
charOpts = XMLElementDecl::SpacesOk;
else if(modelType == SchemaElementDecl::Empty)
charOpts = XMLElementDecl::NoCharData;
}
} else // DTD grammar
charOpts = topElem->fThisElement->getCharDataOpts();
if (charOpts == XMLElementDecl::SpacesOk) // => Element Content
{
// Error - standalone should have a value of "no" as whitespace detected in an
// element type with element content whose element declaration was external
//
fValidator->emitError(XMLValid::NoWSForStandalone);
if(fGrammarType == Grammar::SchemaGrammarType)
{
if (getPSVIHandler())
{
// REVISIT:
// PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
}
}
}
}
}
}
// Send any char data that we accumulated into the buffer
sendCharData(toUse);
}