in src/xercesc/internal/IGXMLScanner.cpp [1552:2105]
bool IGXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Get the QName. In this case, we are not doing namespaces, so we just
// use it as is and don't have to break it into parts.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// Assume it won't be an empty tag
bool isEmpty = false;
// Lets try to look up the element in the validator's element decl pool
// We can pass bogus values for the URI id and the base name. We know that
// this can only be called if we are doing a DTD style validator and that
// he will only look at the QName.
//
// We tell him to fault in a decl if he does not find one.
// Actually, we *don't* tell him to fault in a decl if he does not find one- NG
bool wasAdded = false;
const XMLCh *rawQName = fQNameBuf.getRawBuffer();
XMLElementDecl* elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, 0
, rawQName
, Grammar::TOP_LEVEL_SCOPE
);
// look for it in the undeclared pool:
if(!elemDecl)
{
elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
}
if(!elemDecl)
{
// we're assuming this must be a DTD element. DTD's can be
// used with or without namespaces, but schemas cannot be used without
// namespaces.
wasAdded = true;
elemDecl = new (fMemoryManager) DTDElementDecl
(
rawQName
, fEmptyNamespaceId
, DTDElementDecl::Any
, fMemoryManager
);
elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
}
// We do something different here according to whether we found the
// element or not.
if (wasAdded)
{
// If validating then emit an error
if (fValidate)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the validator pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
}
}
else
{
// If its not marked declared and validating, then emit an error
if (fValidate && !elemDecl->isDeclared())
{
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
}
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
fElemStack.setValidationFlag(fValidate);
// Validate the element
if (fValidate)
fValidator->validateElement(elemDecl);
// If this is the first element and we are validating, check the root
// element.
if (isRoot)
{
fRootGrammar = fGrammar;
if (fValidate)
{
// If a DocType exists, then check if it matches the root name there.
if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
}
}
else
{
// If the element stack is not empty, then add this element as a
// child of the previous top element. If its empty, this is the root
// elem and is not the child of anything.
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
XMLSize_t attCount = 0;
XMLSize_t curAttListSize = fAttrList->size();
wasAdded = false;
fElemCount++;
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
bool bFoundSpace;
fReaderMgr.skipPastSpaces(bFoundSpace);
if (!bFoundSpace)
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
// Ok, peek another char
nextCh = fReaderMgr.peekNextChar();
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared for this element. If we are
// not validating of course it will not be at first, but we will
// fault it into the pool (to avoid lots of redundant errors.)
XMLCh * namePtr = fAttNameBuf.getRawBuffer();
XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
// Note that we want to this first since this will
// make a copy of the namePtr; we can then make use of
// that copy in the hashtable lookup that checks
// for duplicates. This will mean we may have to update
// the type of the XMLAttr later.
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
0
, namePtr
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
, true
, fMemoryManager
);
fAttrList->addElement(curAtt);
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
0
, namePtr
, XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, (attDef)?attDef->getType():XMLAttDef::CData
);
curAtt->setSpecified(true);
}
// reset namePtr so it refers to newly-allocated memory
namePtr = (XMLCh *)curAtt->getName();
if (!attDef)
{
// If there is a validation handler, then we are validating
// so emit an error.
if (fValidate)
{
fValidator->emitError
(
XMLValid::AttNotDefinedForElement
, fAttNameBuf.getRawBuffer()
, elemDecl->getFullName()
);
}
if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, namePtr
, elemDecl->getFullName()
);
}
}
else
{
// prepare for duplicate detection
unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
if(!curCountPtr)
{
curCountPtr = getNewUIntPtr();
*curCountPtr = fElemCount;
fAttDefRegistry->put(attDef, curCountPtr);
}
else if(*curCountPtr < fElemCount)
*curCountPtr = fElemCount;
else
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attDef->getFullName()
, elemDecl->getFullName()
);
}
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attDef, namePtr, fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
return false;
}
else
{
// Something went really wrong
return false;
}
}
// must set the newly-minted value on the XMLAttr:
curAtt->setValue(fAttValueBuf.getRawBuffer());
// Now that its all stretched out, lets look at its type and
// determine if it has a valid value. It will output any needed
// errors, but we just keep going. We only need to do this if
// we are validating.
if (attDef)
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
attDef
, fAttValueBuf.getRawBuffer()
, false
, elemDecl
);
}
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
if(attCount)
{
// clean up after ourselves:
// clear the map used to detect duplicate attributes
fUndeclaredAttrRegistry->removeAll();
}
// Ok, so lets get an enumerator for the attributes of this element
// and run through them for well formedness and validity checks. But
// make sure that we had any attributes before we do it, since the list
// would have have gotten faulted in anyway.
if (elemDecl->hasAttDefs())
{
// N.B.: this assumes DTD validation.
XMLAttDefList& attDefList = elemDecl->getAttDefList();
for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
{
// Get the current att def, for convenience and its def type
const XMLAttDef& curDef = attDefList.getAttDef(i);
const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
if (!attCountPtr || *attCountPtr < fElemCount)
{ // did not occur
if (fValidate)
{
// If we are validating and its required, then an error
if (defType == XMLAttDef::Required)
{
fValidator->emitError
(
XMLValid::RequiredAttrNotProvided
, curDef.getFullName()
);
}
else if ((defType == XMLAttDef::Default) ||
(defType == XMLAttDef::Fixed) )
{
if (fStandalone && curDef.isExternal())
{
// XML 1.0 Section 2.9
// Document is standalone, so attributes must not be defaulted.
fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
}
}
}
// Fault in the value if needed, and bump the att count
if ((defType == XMLAttDef::Default)
|| (defType == XMLAttDef::Fixed))
{
// Let the validator pass judgement on the attribute value
if (fValidate)
{
fValidator->validateAttrValue
(
&curDef
, curDef.getValue()
, false
, elemDecl
);
}
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
0
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
, false
, fMemoryManager
);
fAttrList->addElement(curAtt);
curAttListSize++;
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
0
, curDef.getFullName()
, XMLUni::fgZeroLenString
, curDef.getValue()
, curDef.getType()
);
curAtt->setSpecified(false);
}
attCount++;
}
}
}
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// If validating, then insure that its legal to have no content
if (fValidate)
{
XMLSize_t failure;
bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
if (!res)
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, elemDecl->getFullName()
, elemDecl->getFormattedContentModel()
);
}
}
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
else {
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
if (fDocHandler)
{
fDocHandler->startElement
(
*elemDecl
, fEmptyNamespaceId
, 0
, *fAttrList
, attCount
, isEmpty
, isRoot
);
}
return true;
}