in src/xercesc/internal/WFXMLScanner.cpp [744:1053]
bool WFXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Get the QName. In this case, we are not doing namespaces, so we just
// use it as is and don't have to break it into parts.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// Assume it won't be an empty tag
bool isEmpty = false;
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Lets try to look up the element
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf);
if (!elemDecl) {
if (fElementIndex < fElements->size()) {
elemDecl = fElements->elementAt(fElementIndex);
}
else {
elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
(
fGrammarPoolMemoryManager
);
fElements->addElement(elemDecl);
}
elemDecl->setElementName(XMLUni::fgZeroLenString, qnameRawBuf, fEmptyNamespaceId);
fElementLookup->put((void*)elemDecl->getFullName(), elemDecl);
fElementIndex++;
}
// Expand the element stack and add the new element
fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// We loop until we either see a /> or >, handling attribute/value
// pairs until we get there.
XMLSize_t attCount = 0;
XMLSize_t curAttListSize = fAttrList->size();
while (true)
{
// And get the next non-space character
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
bool bFoundSpace;
fReaderMgr.skipPastSpaces(bFoundSpace);
if (!bFoundSpace)
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
// Ok, peek another char
nextCh = fReaderMgr.peekNextChar();
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return false;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
return false;
}
else
{
// Something went really wrong
return false;
}
}
// See if this attribute is declared more than one for this element.
const XMLCh* attNameRawBuf = fAttNameBuf.getRawBuffer();
XMLSize_t attNameHash = XMLString::hash(attNameRawBuf, 109);
if (attCount) {
for (XMLSize_t k=0; k < attCount; k++) {
if (fAttrNameHashList->elementAt(k) == attNameHash) {
if (
XMLString::equals
(
fAttrList->elementAt(k)->getName()
, attNameRawBuf
)
)
{
emitError
(
XMLErrs::AttrAlreadyUsedInSTag
, attNameRawBuf
, qnameRawBuf
);
break;
}
}
}
}
// Skip any whitespace before the value and then scan the att
// value. This will come back normalized with entity refs and
// char refs expanded.
fReaderMgr.skipPastSpaces();
if (!scanAttValue(attNameRawBuf, fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
return false;
}
else
{
// Something went really wrong
return false;
}
}
// Add this attribute to the attribute list that we use to
// pass them to the handler. We reuse its existing elements
// but expand it as required.
XMLAttr* curAtt;
if (attCount >= curAttListSize)
{
curAtt = new (fMemoryManager) XMLAttr
(
0
, attNameRawBuf
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
, XMLAttDef::CData
, true
, fMemoryManager
);
fAttrList->addElement(curAtt);
fAttrNameHashList->addElement(attNameHash);
}
else
{
curAtt = fAttrList->elementAt(attCount);
curAtt->set
(
0
, attNameRawBuf
, XMLUni::fgZeroLenString
, fAttValueBuf.getRawBuffer()
);
curAtt->setSpecified(true);
fAttrNameHashList->setElementAt(attNameHash, attCount);
}
attCount++;
// And jump back to the top of the loop
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
}
// If we have a document handler, then tell it about this start tag. We
// don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
// any prefix since its just one big name if we are not doing namespaces.
if (fDocHandler)
{
fDocHandler->startElement
(
*elemDecl
, fEmptyNamespaceId
, 0
, *fAttrList
, attCount
, isEmpty
, isRoot
);
}
return true;
}