in src/xercesc/util/XMLURL.cpp [1227:1499]
bool XMLURL::parse(const XMLCh* const urlText, XMLURL& xmlURL)
{
// Simplify things by checking for the psycho scenarios first
if (!*urlText)
return false;
// Before we start, check if this urlText contains valid uri characters
if (!XMLUri::isURIString(urlText))
xmlURL.fHasInvalidChar = true;
else
xmlURL.fHasInvalidChar = false;
//
// The first thing we will do is to check for a file name, so that
// we don't waste time thinking its a URL. If its in the form x:\ or x:/
// and x is an ASCII letter, then assume that's the deal.
//
if (((*urlText >= chLatin_A) && (*urlText <= chLatin_Z))
|| ((*urlText >= chLatin_a) && (*urlText <= chLatin_z)))
{
if (*(urlText + 1) == chColon)
{
if ((*(urlText + 2) == chForwardSlash)
|| (*(urlText + 2) == chBackSlash))
{
return false;
}
}
}
// Get a copy of the URL that we can modify
XMLCh* srcCpy = XMLString::replicate(urlText, xmlURL.fMemoryManager);
ArrayJanitor<XMLCh> janSrcCopy(srcCpy, xmlURL.fMemoryManager);
//
// Get a pointer now that we can run up thrown the source as we parse
// bits and pieces out of it.
//
XMLCh* srcPtr = srcCpy;
// Run up past any spaces
while (*srcPtr)
{
if (!XMLChar1_0::isWhitespace(*srcPtr))
break;
srcPtr++;
}
// Make sure it wasn't all space
if (!*srcPtr)
return false;
//
// Ok, the next thing we have to do is to find either a / or : character.
// If the : is first, we assume we have a protocol. If the / is first,
// then we skip to the host processing.
//
XMLCh* ptr1 = XMLString::findAny(srcPtr, gListOne);
XMLCh* ptr2;
// If we found a protocol, then deal with it
if (ptr1)
{
if (*ptr1 == chColon)
{
// Cap the string at the colon
*ptr1 = 0;
// And try to find it in our list of protocols
xmlURL.fProtocol = lookupByName(srcPtr);
if (xmlURL.fProtocol == Unknown)
return false;
// And move our source pointer up past what we've processed
srcPtr = (ptr1 + 1);
}
}
//
// Ok, next we need to see if we have any host part. If the next
// two characters are //, then we need to check, else move on.
//
if ((*srcPtr == chForwardSlash) && (*(srcPtr + 1) == chForwardSlash))
{
// Move up past the slashes
srcPtr += 2;
//
// If we aren't at the end of the string, then there has to be a
// host part at this point. we will just look for the next / char
// or end of string and make all of that the host for now.
//
if (*srcPtr)
{
// Search from here for a / character
ptr1 = XMLString::findAny(srcPtr, gListFour);
//
// If we found something, then the host is between where
// we are and what we found. Else the host is the rest of
// the content and we are done. If its empty, leave it null.
//
if (ptr1)
{
if (ptr1 != srcPtr)
{
xmlURL.fHost = (XMLCh*) xmlURL.fMemoryManager->allocate
(
((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
);//new XMLCh[(ptr1 - srcPtr) + 1];
ptr2 = xmlURL.fHost;
while (srcPtr < ptr1)
*ptr2++ = *srcPtr++;
*ptr2 = 0;
}
}
else
{
xmlURL.fHost = XMLString::replicate(srcPtr, xmlURL.fMemoryManager);
// Update source pointer to the end
srcPtr += XMLString::stringLen(xmlURL.fHost);
}
}
}
else
{
//
// http protocol requires two forward slashes
// we didn't get them, so throw an exception
//
if (xmlURL.fProtocol == HTTP)
return false;
}
//
// If there was a host part, then we have to grovel through it for
// all the bits and pieces it can hold.
//
if (xmlURL.fHost)
{
//
// Look for a '@' character, which indicates a user name. If we
// find one, then everything between the start of the host data
// and the character is the user name.
//
ptr1 = XMLString::findAny(xmlURL.fHost, gListTwo);
if (ptr1)
{
// Get this info out as the user name
*ptr1 = 0;
xmlURL.fUser = XMLString::replicate(xmlURL.fHost, xmlURL.fMemoryManager);
ptr1++;
// And now cut these chars from the host string
XMLString::cut(xmlURL.fHost, ptr1 - xmlURL.fHost);
// Is there a password inside the user string?
ptr2 = XMLString::findAny(xmlURL.fUser, gListThree);
if (ptr2)
{
// Remove it from the user name string
*ptr2 = 0;
// And copy out the remainder to the password field
ptr2++;
xmlURL.fPassword = XMLString::replicate(ptr2, xmlURL.fMemoryManager);
}
}
//
// Ok, so now we are at the actual host name, if any. If we are
// not at the end of the host data, then lets see if we have a
// port trailing the
//
ptr1 = XMLString::findAny(xmlURL.fHost, gListThree);
if (ptr1)
{
// Remove it from the host name
*ptr1 = 0;
// Try to convert it to a numeric port value and store it
ptr1++;
if (!XMLString::textToBin(ptr1, xmlURL.fPortNum, xmlURL.fMemoryManager))
return false;
}
// If the host ended up empty, then toss is
if (!*(xmlURL.fHost))
{
xmlURL.fMemoryManager->deallocate(xmlURL.fHost);//delete[] fHost;
xmlURL.fHost = 0;
}
}
// If we are at the end, then we are done now
if (!*srcPtr) {
if(xmlURL.fHost) {
static const XMLCh slash[] = { chForwardSlash, chNull };
xmlURL.fPath = XMLString::replicate(slash, xmlURL.fMemoryManager);
}
return true;
}
//
// Next is the path part. It can be absolute, i.e. starting with a
// forward slash character, or relative. Its basically everything up
// to the end of the string or to any trailing query or fragment.
//
ptr1 = XMLString::findAny(srcPtr, gListFive);
if (!ptr1)
{
xmlURL.fPath = XMLString::replicate(srcPtr, xmlURL.fMemoryManager);
return true;
}
// Everything from where we are to what we found is the path
if (ptr1 > srcPtr)
{
xmlURL.fPath = (XMLCh*) xmlURL.fMemoryManager->allocate
(
((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
);//new XMLCh[(ptr1 - srcPtr) + 1];
ptr2 = xmlURL.fPath;
while (srcPtr < ptr1)
*ptr2++ = *srcPtr++;
*ptr2 = 0;
}
//
// If we found a fragment, then it is the rest of the string and we
// are done.
//
if (*srcPtr == chPound)
{
srcPtr++;
xmlURL.fFragment = XMLString::replicate(srcPtr, xmlURL.fMemoryManager);
return true;
}
//
// The query is either the rest of the string, or up to the fragment
// separator.
//
srcPtr++;
ptr1 = XMLString::findAny(srcPtr, gListSix);
if (!ptr1)
{
xmlURL.fQuery = XMLString::replicate(srcPtr, xmlURL.fMemoryManager);
return true;
}
else
{
xmlURL.fQuery = (XMLCh*) xmlURL.fMemoryManager->allocate
(
((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
);//new XMLCh[(ptr1 - srcPtr) + 1];
ptr2 = xmlURL.fQuery;
while (srcPtr < ptr1)
*ptr2++ = *srcPtr++;
*ptr2 = 0;
}
// If we are not at the end now, then everything else is the fragment
if (*srcPtr == chPound)
{
srcPtr++;
xmlURL.fFragment = XMLString::replicate(srcPtr, xmlURL.fMemoryManager);
}
return true;
}