in src/xercesc/internal/SGXMLScanner.cpp [2852:2995]
bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef
, const XMLCh* const attName
, const XMLCh* const value
, XMLBuffer& toFill)
{
// A simple state value for a whitespace processing state machine
enum States
{
InWhitespace
, InContent
};
// Get the type and name
const XMLAttDef::AttTypes type = (attDef)
?attDef->getType()
:XMLAttDef::CData;
// Assume its going to go fine, and empty the target buffer in preperation
bool retVal = true;
toFill.reset();
// check to see if it's a tokenized type that is declared externally
bool isAttTokenizedExternal = (attDef)
?attDef->isExternal() && (type == XMLAttDef::ID ||
type == XMLAttDef::IDRef ||
type == XMLAttDef::IDRefs ||
type == XMLAttDef::Entity ||
type == XMLAttDef::Entities ||
type == XMLAttDef::NmToken ||
type == XMLAttDef::NmTokens)
:false;
// Loop through the chars of the source value and normalize it according
// to the type.
States curState = InContent;
bool firstNonWS = false;
XMLCh nextCh;
const XMLCh* srcPtr = value;
if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
while (*srcPtr) {
// Get the next character from the source. We have to watch for
// escaped characters (which are indicated by a 0xFFFF value followed
// by the char that was escaped.)
nextCh = *srcPtr;
// Do we have an escaped character ?
if (nextCh == 0xFFFF)
{
nextCh = *++srcPtr;
}
else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
// Check Validity Constraint for Standalone document declaration
// XML 1.0, Section 2.9
if (fStandalone && fValidate && isAttTokenizedExternal)
{
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
}
nextCh = chSpace;
}
else if (nextCh == chOpenAngle) {
// If its not escaped, then make sure its not a < character, which is
// not allowed in attribute values.
emitError(XMLErrs::BracketInAttrValue, attName);
retVal = false;
}
// Add this char to the target buffer
toFill.append(nextCh);
// And move up to the next character in the source
srcPtr++;
}
}
else {
while (*srcPtr)
{
// Get the next character from the source. We have to watch for
// escaped characters (which are indicated by a 0xFFFF value followed
// by the char that was escaped.)
nextCh = *srcPtr;
// Do we have an escaped character ?
if (nextCh == 0xFFFF)
{
nextCh = *++srcPtr;
}
else if (nextCh == chOpenAngle) {
// If its not escaped, then make sure its not a < character, which is
// not allowed in attribute values.
emitError(XMLErrs::BracketInAttrValue, attName);
retVal = false;
}
if (curState == InWhitespace)
{
if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
{
if (firstNonWS)
toFill.append(chSpace);
curState = InContent;
firstNonWS = true;
}
else
{
srcPtr++;
continue;
}
}
else if (curState == InContent)
{
if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
{
curState = InWhitespace;
srcPtr++;
// Check Validity Constraint for Standalone document declaration
// XML 1.0, Section 2.9
if (fStandalone && fValidate && isAttTokenizedExternal)
{
if (!firstNonWS || (nextCh != chSpace && *srcPtr && fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)))
{
// Can't have a standalone document declaration of "yes" if attribute
// values are subject to normalisation
fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
}
}
continue;
}
firstNonWS = true;
}
// Add this char to the target buffer
toFill.append(nextCh);
// And move up to the next character in the source
srcPtr++;
}
}
return retVal;
}