bool IGXMLScanner::scanAttValue()

in src/xercesc/internal/IGXMLScanner2.cpp [2404:2623]


bool IGXMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
                                  , const XMLCh* const        attrName
                                  ,       XMLBuffer&          toFill)
{
    enum States
    {
        InWhitespace
        , InContent
    };

    // Get the type and name
    const XMLAttDef::AttTypes type = (attDef)
                ?attDef->getType()
                :XMLAttDef::CData;

    // Reset the target buffer
    toFill.reset();

    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr.skipIfQuote(quoteCh))
        return false;

    //  We have to get the current reader because we have to ignore closing
    //  quotes until we hit the same reader again.
    const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();

    // check to see if it's a tokenized type that is declared externally 
    bool  isAttTokenizedExternal = (attDef)
                                   ?attDef->isExternal() && (type == XMLAttDef::ID || 
                                                             type == XMLAttDef::IDRef || 
                                                             type == XMLAttDef::IDRefs || 
                                                             type == XMLAttDef::Entity || 
                                                             type == XMLAttDef::Entities || 
                                                             type == XMLAttDef::NmToken || 
                                                             type == XMLAttDef::NmTokens)
                                   :false;

    //  Loop until we get the attribute value. Note that we use a double
    //  loop here to avoid the setup/teardown overhead of the exception
    //  handler on every round.
    XMLCh   nextCh;
    XMLCh   secondCh = 0;
    States  curState = InContent;
    bool    firstNonWS = false;
    bool    gotLeadingSurrogate = false;
    bool    escaped;
    while (true)
    {
        try
        {
            while(true)
            {
                nextCh = fReaderMgr.getNextChar();

                if (!nextCh)
                    ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

                // Check for our ending quote in the same entity
                if (nextCh == quoteCh)
                {
                    if (curReader == fReaderMgr.getCurrentReaderNum())
                        return true;

                    // Watch for spillover into a previous entity
                    if (curReader > fReaderMgr.getCurrentReaderNum())
                    {
                        emitError(XMLErrs::PartialMarkupInEntity);
                        return false;
                    }
                }

                //  Check for an entity ref now, before we let it affect our
                //  whitespace normalization logic below. We ignore the empty flag
                //  in this one.
                escaped = false;
                if (nextCh == chAmpersand)
                {
                    if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
                    {
                        gotLeadingSurrogate = false;
                        continue;
                    }
                }
                else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                {
                    // Deal with surrogate pairs
                    //  Its a leading surrogate. If we already got one, then
                    //  issue an error, else set leading flag to make sure that
                    //  we look for a trailing next time.
                    if (gotLeadingSurrogate)
                        emitError(XMLErrs::Expected2ndSurrogateChar);
                     else
                        gotLeadingSurrogate = true;
                }
                else
                {
                    //  If its a trailing surrogate, make sure that we are
                    //  prepared for that. Else, its just a regular char so make
                    //  sure that we were not expected a trailing surrogate.
                    if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                    {
                        // Its trailing, so make sure we were expecting it
                        if (!gotLeadingSurrogate)
                            emitError(XMLErrs::Unexpected2ndSurrogateChar);
                    }
                    else
                    {
                        //  Its just a char, so make sure we were not expecting a
                        //  trailing surrogate.
                        if (gotLeadingSurrogate)
                            emitError(XMLErrs::Expected2ndSurrogateChar);

                        // Its got to at least be a valid XML character
                        if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                        {
                            XMLCh tmpBuf[9];
                            XMLString::binToText
                            (
                                nextCh
                                , tmpBuf
                                , 8
                                , 16
                                , fMemoryManager
                            );
                            emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
                        }
                    }
                    gotLeadingSurrogate = false;
                }

                //  If its not escaped, then make sure its not a < character, which
                //  is not allowed in attribute values.
                if (!escaped && (nextCh == chOpenAngle))
                    emitError(XMLErrs::BracketInAttrValue, attrName);

                //  If the attribute is a CDATA type we do simple replacement of
                //  tabs and new lines with spaces, if the character is not escaped
                //  by way of a char ref.
                //
                //  Otherwise, we do the standard non-CDATA normalization of
                //  compressing whitespace to single spaces and getting rid of leading
                //  and trailing whitespace.
                if (type == XMLAttDef::CData)
                {
                    if (!escaped)
                    {
                        if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
                        {
                            // Check Validity Constraint for Standalone document declaration
                            // XML 1.0, Section 2.9
                            if (fStandalone && fValidate && isAttTokenizedExternal)
                            {
                                // Can't have a standalone document declaration of "yes" if  attribute
                                // values are subject to normalisation
                                fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                            }
                            nextCh = chSpace;
                        }
                    }
                }
                else
                {
                    if (curState == InWhitespace)
                    {
                        if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                        {
                            if (firstNonWS)
                                toFill.append(chSpace);
                            curState = InContent;
                            firstNonWS = true;
                        }
                        else
                        {
                            continue;
                        }
                    }
                    else if (curState == InContent)
                    {
                        if ((nextCh == chSpace) ||
                            (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped))
                        {
                            curState = InWhitespace;

                            // Check Validity Constraint for Standalone document declaration
                            // XML 1.0, Section 2.9
                            if (fStandalone && fValidate && isAttTokenizedExternal)
                            {
                                if (!firstNonWS || (nextCh != chSpace && fReaderMgr.lookingAtSpace()))
                                {
                                     // Can't have a standalone document declaration of "yes" if  attribute
                                     // values are subject to normalisation
                                     fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
                                }
                            }
                            continue;
                        }
                        firstNonWS = true;
                    }
                }

                // Else add it to the buffer
                toFill.append(nextCh);

                if (secondCh)
                {
                    toFill.append(secondCh);
                    secondCh=0;
                }
            }
        }
        catch(const EndOfEntityException&)
        {
            // Just eat it and continue.
            gotLeadingSurrogate = false;
            escaped = false;
        }
    }
    return true;
}