void XMLURL::parse()

in src/xercesc/util/XMLURL.cpp [932:1225]


void XMLURL::parse(const XMLCh* const urlText)
{
    // Simplify things by checking for the psycho scenarios first
    if (!*urlText)
        ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);

    // Before we start, check if this urlText contains valid uri characters
    if (!XMLUri::isURIString(urlText))
        fHasInvalidChar = true;
    else
        fHasInvalidChar = false;

    //
    //  The first thing we will do is to check for a file name, so that
    //  we don't waste time thinking its a URL. If its in the form x:\ or x:/
    //  and x is an ASCII letter, then assume that's the deal.
    //
    if (((*urlText >= chLatin_A) && (*urlText <= chLatin_Z))
    ||  ((*urlText >= chLatin_a) && (*urlText <= chLatin_z)))
    {
        if (*(urlText + 1) == chColon)
        {
            if ((*(urlText + 2) == chForwardSlash)
            ||  (*(urlText + 2) == chBackSlash))
            {
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
            }
        }
    }

    // Get a copy of the URL that we can modify
    XMLCh* srcCpy = XMLString::replicate(urlText, fMemoryManager);
    ArrayJanitor<XMLCh> janSrcCopy(srcCpy, fMemoryManager);

    //
    //  Get a pointer now that we can run up thrown the source as we parse
    //  bits and pieces out of it.
    //
    XMLCh* srcPtr = srcCpy;

    // Run up past any spaces
    while (*srcPtr)
    {
        if (!XMLChar1_0::isWhitespace(*srcPtr))
            break;
        srcPtr++;
    }

    // Make sure it wasn't all space
    if (!*srcPtr)
        ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);

    //
    //  Ok, the next thing we have to do is to find either a / or : character.
    //  If the : is first, we assume we have a protocol. If the / is first,
    //  then we skip to the host processing.
    //
    XMLCh* ptr1 = XMLString::findAny(srcPtr, gListOne);
    XMLCh* ptr2;

    // If we found a protocol, then deal with it
    if (ptr1)
    {
        if (*ptr1 == chColon)
        {
            // Cap the string at the colon
            *ptr1 = 0;

            // And try to find it in our list of protocols
            fProtocol = lookupByName(srcPtr);

            if (fProtocol == Unknown)
            {
                ThrowXMLwithMemMgr1
                (
                    MalformedURLException
                    , XMLExcepts::URL_UnsupportedProto1
                    , srcPtr
                    , fMemoryManager
                );
            }

            // And move our source pointer up past what we've processed
            srcPtr = (ptr1 + 1);
        }
    }

    //
    //  Ok, next we need to see if we have any host part. If the next
    //  two characters are //, then we need to check, else move on.
    //
    if ((*srcPtr == chForwardSlash) && (*(srcPtr + 1) == chForwardSlash))
    {
        // Move up past the slashes
        srcPtr += 2;

        //
        //  If we aren't at the end of the string, then there has to be a
        //  host part at this point. we will just look for the next / char
        //  or end of string and make all of that the host for now.
        //
        if (*srcPtr)
        {
            // Search from here for a / character
            ptr1 = XMLString::findAny(srcPtr, gListFour);

            //
            //  If we found something, then the host is between where
            //  we are and what we found. Else the host is the rest of
            //  the content and we are done. If its empty, leave it null.
            //
            if (ptr1)
            {
                if (ptr1 != srcPtr)
                {
                    fMemoryManager->deallocate(fHost);//delete [] fHost;
                    fHost = (XMLCh*) fMemoryManager->allocate
                    (
                        ((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
                    );//new XMLCh[(ptr1 - srcPtr) + 1];
                    ptr2 = fHost;
                    while (srcPtr < ptr1)
                        *ptr2++ = *srcPtr++;
                    *ptr2 = 0;
                }
            }
             else
            {
                fMemoryManager->deallocate(fHost);//delete [] fHost;
                fHost = XMLString::replicate(srcPtr, fMemoryManager);

                // Update source pointer to the end
                srcPtr += XMLString::stringLen(fHost);
            }
        }
    }
    else
    {
        //
        // http protocol requires two forward slashes
        // we didn't get them, so throw an exception
        //
        if (fProtocol == HTTP) {
            ThrowXMLwithMemMgr
                (
                    MalformedURLException
                    , XMLExcepts::URL_ExpectingTwoSlashes
                    , fMemoryManager
                );
        }
    }

    //
    //  If there was a host part, then we have to grovel through it for
    //  all the bits and pieces it can hold.
    //
    if (fHost)
    {
        //
        //  Look for a '@' character, which indicates a user name. If we
        //  find one, then everything between the start of the host data
        //  and the character is the user name.
        //
        ptr1 = XMLString::findAny(fHost, gListTwo);
        if (ptr1)
        {
            // Get this info out as the user name
            *ptr1 = 0;
            fMemoryManager->deallocate(fUser);//delete [] fUser;
            fUser = XMLString::replicate(fHost, fMemoryManager);
            ptr1++;

            // And now cut these chars from the host string
            XMLString::cut(fHost, ptr1 - fHost);

            // Is there a password inside the user string?
            ptr2 = XMLString::findAny(fUser, gListThree);
            if (ptr2)
            {
                // Remove it from the user name string
                *ptr2 = 0;

                // And copy out the remainder to the password field
                ptr2++;
                fMemoryManager->deallocate(fPassword);//delete [] fPassword;
                fPassword = XMLString::replicate(ptr2, fMemoryManager);
            }
        }

        //
        //  Ok, so now we are at the actual host name, if any. If we are
        //  not at the end of the host data, then lets see if we have a
        //  port trailing the
        //
        ptr1 = XMLString::findAny(fHost, gListThree);
        if (ptr1)
        {
            // Remove it from the host name
            *ptr1 = 0;

            // Try to convert it to a numeric port value and store it
            ptr1++;
            if (!XMLString::textToBin(ptr1, fPortNum, fMemoryManager))
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_BadPortField, fMemoryManager);
        }

        // If the host ended up empty, then toss is
        if (!*fHost)
        {
            fMemoryManager->deallocate(fHost);//delete[] fHost;
            fHost = 0;
        }
    }

    // If we are at the end, then we are done now
    if (!*srcPtr) {
        if(fHost) {
            static const XMLCh slash[] = { chForwardSlash, chNull };
            fPath = XMLString::replicate(slash, fMemoryManager);
        }
        return;
    }

    //
    //  Next is the path part. It can be absolute, i.e. starting with a
    //  forward slash character, or relative. Its basically everything up
    //  to the end of the string or to any trailing query or fragment.
    //
    ptr1 = XMLString::findAny(srcPtr, gListFive);
    if (!ptr1)
    {
        fMemoryManager->deallocate(fPath);//delete [] fPath;
        fPath = XMLString::replicate(srcPtr, fMemoryManager);
        return;
    }

    // Everything from where we are to what we found is the path
    if (ptr1 > srcPtr)
    {
        fMemoryManager->deallocate(fPath);//delete [] fPath;
        fPath = (XMLCh*) fMemoryManager->allocate
        (
            ((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
        );//new XMLCh[(ptr1 - srcPtr) + 1];
        ptr2 = fPath;
        while (srcPtr < ptr1)
            *ptr2++ = *srcPtr++;
        *ptr2 = 0;
    }

    //
    //  If we found a fragment, then it is the rest of the string and we
    //  are done.
    //
    if (*srcPtr == chPound)
    {
        srcPtr++;
        fMemoryManager->deallocate(fFragment);//delete [] fFragment;
        fFragment = XMLString::replicate(srcPtr, fMemoryManager);
        return;
    }

    //
    //  The query is either the rest of the string, or up to the fragment
    //  separator.
    //
    srcPtr++;
    ptr1 = XMLString::findAny(srcPtr, gListSix);
    fMemoryManager->deallocate(fQuery);//delete [] fQuery;
    if (!ptr1)
    {
        fQuery = XMLString::replicate(srcPtr, fMemoryManager);
        return;
    }
     else
    {
        fQuery = (XMLCh*) fMemoryManager->allocate
        (
            ((ptr1 - srcPtr) + 1) * sizeof(XMLCh)
        );//new XMLCh[(ptr1 - srcPtr) + 1];
        ptr2 = fQuery;
        while (srcPtr < ptr1)
            *ptr2++ = *srcPtr++;
        *ptr2 = 0;
    }

    // If we are not at the end now, then everything else is the fragment
    if (*srcPtr == chPound)
    {
        srcPtr++;
        fMemoryManager->deallocate(fFragment);//delete [] fFragment;
        fFragment = XMLString::replicate(srcPtr, fMemoryManager);
    }
}