bool SGXMLScanner::normalizeAttValue()

in src/xercesc/internal/SGXMLScanner.cpp [2852:2995]


bool SGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
                                      , const XMLCh* const        attName
                                      , const XMLCh* const        value
                                      ,       XMLBuffer&          toFill)
{
    // A simple state value for a whitespace processing state machine
    enum States
    {
        InWhitespace
        , InContent
    };

    // Get the type and name
    const XMLAttDef::AttTypes type = (attDef)
                            ?attDef->getType()
                            :XMLAttDef::CData;

    // Assume its going to go fine, and empty the target buffer in preperation
    bool retVal = true;
    toFill.reset();

    // check to see if it's a tokenized type that is declared externally 
    bool  isAttTokenizedExternal = (attDef)
                                   ?attDef->isExternal() && (type == XMLAttDef::ID || 
                                                             type == XMLAttDef::IDRef || 
                                                             type == XMLAttDef::IDRefs || 
                                                             type == XMLAttDef::Entity || 
                                                             type == XMLAttDef::Entities || 
                                                             type == XMLAttDef::NmToken || 
                                                             type == XMLAttDef::NmTokens)
                                   :false;

    //  Loop through the chars of the source value and normalize it according
    //  to the type.
    States curState = InContent;
    bool firstNonWS = false;
    XMLCh nextCh;
    const XMLCh* srcPtr = value;

    if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
        while (*srcPtr) {
            //  Get the next character from the source. We have to watch for
            //  escaped characters (which are indicated by a 0xFFFF value followed
            //  by the char that was escaped.)
            nextCh = *srcPtr;

            // Do we have an escaped character ?
            if (nextCh == 0xFFFF)
            {
                nextCh = *++srcPtr;
            }
            else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
                // Check Validity Constraint for Standalone document declaration
                // XML 1.0, Section 2.9
                if (fStandalone && fValidate && isAttTokenizedExternal)
                {
                     // Can't have a standalone document declaration of "yes" if  attribute
                     // values are subject to normalisation
                     fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
                }
                nextCh = chSpace;
            }
            else if (nextCh == chOpenAngle) {
                //  If its not escaped, then make sure its not a < character, which is
                //  not allowed in attribute values.
                emitError(XMLErrs::BracketInAttrValue, attName);
                retVal = false;
            }

            // Add this char to the target buffer
            toFill.append(nextCh);

            // And move up to the next character in the source
            srcPtr++;
        }
    }
    else {
        while (*srcPtr)
        {
            //  Get the next character from the source. We have to watch for
            //  escaped characters (which are indicated by a 0xFFFF value followed
            //  by the char that was escaped.)
            nextCh = *srcPtr;

            // Do we have an escaped character ?
            if (nextCh == 0xFFFF)
            {
                nextCh = *++srcPtr;
            }
            else if (nextCh == chOpenAngle) {
                //  If its not escaped, then make sure its not a < character, which is
                //  not allowed in attribute values.
                emitError(XMLErrs::BracketInAttrValue, attName);
                retVal = false;
            }

            if (curState == InWhitespace)
            {
                if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                {
                    if (firstNonWS)
                        toFill.append(chSpace);
                    curState = InContent;
                    firstNonWS = true;
                }
                else
                {
                    srcPtr++;
                    continue;
                }
            }
            else if (curState == InContent)
            {
                if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                {
                    curState = InWhitespace;
                    srcPtr++;

                    // Check Validity Constraint for Standalone document declaration
                    // XML 1.0, Section 2.9
                    if (fStandalone && fValidate && isAttTokenizedExternal)
                    {
                        if (!firstNonWS || (nextCh != chSpace && *srcPtr && fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)))
                        {
                            // Can't have a standalone document declaration of "yes" if  attribute
                            // values are subject to normalisation
                            fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
                        }
                    }
                    continue;
                }
                firstNonWS = true;
            }

            // Add this char to the target buffer
            toFill.append(nextCh);

            // And move up to the next character in the source
            srcPtr++;
        }
    }

    return retVal;
}