void XMLUri::initialize()

in src/xercesc/util/XMLUri.cpp [383:678]


void XMLUri::initialize(const XMLUri* const baseURI
                      , const XMLCh*  const uriSpec)
{

    // get a trimmed version of uriSpec
    // uriSpec will NO LONGER be used in this function.
    //
    XMLCh* trimmedUriSpec = XMLString::replicate(uriSpec, fMemoryManager);
    XMLString::trim(trimmedUriSpec);
    ArrayJanitor<XMLCh> janName(trimmedUriSpec, fMemoryManager);
    XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);

    if ( !baseURI &&
        (!trimmedUriSpec || trimmedUriSpecLen == 0))
    {
        ThrowXMLwithMemMgr1(MalformedURLException
               , XMLExcepts::XMLNUM_URI_Component_Empty
               , errMsg_PARAMS
               , fMemoryManager);
    }

	// just make a copy of the base if spec is empty
	if (!trimmedUriSpec || trimmedUriSpecLen == 0)
    {
        initialize(*baseURI);
        return;
	}

	XMLSize_t index = 0;
	bool foundScheme = false;

	// Check for scheme, which must be before `/', '?' or '#'.
        int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
        int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
        int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
        int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);

        if ((colonIdx <= 0) ||
            (colonIdx > slashIdx && slashIdx != -1) ||
            (colonIdx > queryIdx && queryIdx != -1) ||
            (colonIdx > fragmentIdx && fragmentIdx != -1))
        {
            // A standalone base is a valid URI according to spec
            if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) )
            {
                ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager);
            }
        }
        else
        {
            foundScheme = true;
            initializeScheme(trimmedUriSpec);
            index = XMLString::stringLen(fScheme)+1;
        }

    // It's an error if we stop here
    if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
    {
        ThrowXMLwithMemMgr1(MalformedURLException
                , XMLExcepts::XMLNUM_URI_Component_Empty
                , errMsg_PATH
                , fMemoryManager);
    }

	// two slashes means generic URI syntax, so we get the authority
    XMLCh* authUriSpec = (XMLCh*) fMemoryManager->allocate
    (
        (trimmedUriSpecLen+1) * sizeof(XMLCh)
    );//new XMLCh[trimmedUriSpecLen+1];
    ArrayJanitor<XMLCh> authName(authUriSpec, fMemoryManager);
    XMLString::subString(authUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);

    if (((index+1) < trimmedUriSpecLen) &&
        XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
    {
        index += 2;
        XMLSize_t startPos = index;

        // get authority - everything up to path, query or fragment
        XMLCh testChar;
        while (index < trimmedUriSpecLen)
        {
            testChar = trimmedUriSpec[index];
            if (testChar == chForwardSlash ||
                testChar == chQuestion     ||
                testChar == chPound         )
            {
                break;
            }

            index++;
        }

        // if we found authority, parse it out, otherwise we set the
        // host to empty string
        if (index > startPos)
        {
            XMLString::subString(authUriSpec, trimmedUriSpec, startPos, index, fMemoryManager);
            initializeAuthority(authUriSpec);
        }
        else
        {
            //fHost = 0;
            setHost(XMLUni::fgZeroLenString);
        }
    }

    // we need to check if index has exceed the lenght or not
    if (index >= trimmedUriSpecLen)
        return;

    XMLCh* pathUriSpec = (XMLCh*) fMemoryManager->allocate
    (
        (trimmedUriSpecLen+1) * sizeof(XMLCh)
    );//new XMLCh[trimmedUriSpecLen+1];
    ArrayJanitor<XMLCh> pathUriSpecName(pathUriSpec, fMemoryManager);
    XMLString::subString(pathUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);

	initializePath(pathUriSpec);

	// Resolve relative URI to base URI - see RFC 2396 Section 5.2
	// In some cases, it might make more sense to throw an exception
	// (when scheme is specified is the string spec and the base URI
	// is also specified, for example), but we're just following the
	// RFC specifications
	if ( baseURI )
    {
        // check to see if this is the current doc - RFC 2396 5.2 #2
        // note that this is slightly different from the RFC spec in that
        // we don't include the check for query string being null
        // - this handles cases where the urispec is just a query
        // string or a fragment (e.g. "?y" or "#s") -
        // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
        // identified this as a bug in the RFC
        if ((!fPath || !*fPath) &&
            fScheme == 0 &&
            fHost == 0 && fRegAuth == 0)
        {
            fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
            fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
            fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
            fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
            fPort = baseURI->getPort();
            fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
            fMemoryManager->deallocate(fPath);//delete [] fPath;
            fPath = XMLString::replicate(baseURI->getPath(), fMemoryManager);

            if ( !fQueryString )
            {
                fQueryString = XMLString::replicate(baseURI->getQueryString(), fMemoryManager);
            }
            return;
        }

        // check for scheme - RFC 2396 5.2 #3
        // if we found a scheme, it means absolute URI, so we're done
        if (fScheme == 0)
        {
            fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
        }
        else
        {
            return;
        }

        // check for authority - RFC 2396 5.2 #4
        // if we found a host, then we've got a network path, so we're done
        if (fHost == 0 && fRegAuth == 0)
        {
            fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
            fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
            fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
            fPort = baseURI->getPort();
            fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
        }
        else
        {
            return;
        }

        // check for absolute path - RFC 2396 5.2 #5
        if ((fPath && *fPath) &&
            XMLString::startsWith(fPath, SINGLE_SLASH))
        {
            return;
        }

        // if we get to this point, we need to resolve relative path
        // RFC 2396 5.2 #6

        XMLCh* basePath = XMLString::replicate(baseURI->getPath(), fMemoryManager);
        ArrayJanitor<XMLCh> basePathName(basePath, fMemoryManager);

        XMLSize_t bufLen = trimmedUriSpecLen+XMLString::stringLen(fPath)+XMLString::stringLen(basePath)+1;
        XMLCh* path = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
        ArrayJanitor<XMLCh> pathName(path, fMemoryManager);
        path[0] = 0;

        XMLCh* tmp1 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
        ArrayJanitor<XMLCh> tmp1Name(tmp1, fMemoryManager);
        XMLCh* tmp2 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
        ArrayJanitor<XMLCh> tmp2Name(tmp2, fMemoryManager);

        // 6a - get all but the last segment of the base URI path
        if (basePath)
        {
            int lastSlash = XMLString::lastIndexOf(basePath, chForwardSlash);
            if (lastSlash != -1)
            {
                XMLString::subString(path, basePath, 0, lastSlash+1, fMemoryManager);
            }
        }

        // 6b - append the relative URI path
        XMLString::catString(path, fPath);

        // 6c - remove all "./" where "." is a complete path segment
        int iIndex = -1;
        while ((iIndex = XMLString::patternMatch(path, SLASH_DOT_SLASH)) != -1)
        {
            XMLString::subString(tmp1, path, 0, iIndex, fMemoryManager);
            XMLString::subString(tmp2, path, iIndex+2, XMLString::stringLen(path), fMemoryManager);

            path[0] = 0;
            XMLString::catString(path, tmp1);
            XMLString::catString(path, tmp2);
        }

        // 6d - remove "." if path ends with "." as a complete path segment
        if (XMLString::endsWith(path, SLASH_DOT))
        {
            path[XMLString::stringLen(path) - 1] = chNull;
        }

        // 6e - remove all "<segment>/../" where "<segment>" is a complete
        // path segment not equal to ".."
        iIndex = -1;
        int segIndex = -1;
        int offset = 1;

        while ((iIndex = XMLString::patternMatch(&(path[offset]), SLASH_DOTDOT_SLASH)) != -1)
        {
			// Undo offset
			iIndex += offset;

			// Find start of <segment> within substring ending at found point.
			XMLString::subString(tmp1, path, 0, iIndex-1, fMemoryManager);
			segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);

			// Ensure <segment> exists and != ".."
            if (segIndex != -1                &&
                (path[segIndex+1] != chPeriod ||
                 path[segIndex+2] != chPeriod ||
				 segIndex + 3 != iIndex))
            {

                XMLString::subString(tmp1, path, 0, segIndex, fMemoryManager);
                XMLString::subString(tmp2, path, iIndex+3, XMLString::stringLen(path), fMemoryManager);

                path[0] = 0;
                XMLString::catString(path, tmp1);
                XMLString::catString(path, tmp2);

                offset = (segIndex == 0 ? 1 : segIndex);
            }
            else
            {
                offset += 4;
            }
        }// while

        // 6f - remove ending "<segment>/.." where "<segment>" is a
        // complete path segment
        if (XMLString::endsWith(path, SLASH_DOTDOT))
        {
			// Find start of <segment> within substring ending at found point.
            index = XMLString::stringLen(path) - 3;
			XMLString::subString(tmp1, path, 0, index-1, fMemoryManager);
			segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);

            if (segIndex != -1                &&
                (path[segIndex+1] != chPeriod ||
                 path[segIndex+2] != chPeriod ||
				 segIndex + 3 != (int)index))
            {
                path[segIndex+1] = chNull;
            }
        }

        if (getPath())
            fMemoryManager->deallocate(fPath);//delete [] fPath;

        fPath = XMLString::replicate(path, fMemoryManager);

    }
}