private void initialize()

in src/org/apache/xml/utils/URI.java [365:590]


  private void initialize(URI p_base, String p_uriSpec)
          throws MalformedURIException
  {

    if (p_base == null
            && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
    {
      throw new MalformedURIException(
        XMLMessages.createXMLMessage(XMLErrorResources.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
    }

    // just make a copy of the base if spec is empty
    if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
    {
      initialize(p_base);

      return;
    }

    String uriSpec = p_uriSpec.trim();
    int uriSpecLen = uriSpec.length();
    int index = 0;

    // check for scheme
    int colonIndex = uriSpec.indexOf(':');
    if (colonIndex < 0)
    {
      if (p_base == null)
      {
        throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
      }
    }
    else
    {
      initializeScheme(uriSpec);
      uriSpec = uriSpec.substring(colonIndex+1);
      // This is a fix for XALANJ-2059.
      if(m_scheme != null && p_base != null)
      {	  	
        // a) If <uriSpec> starts with a slash (/), it means <uriSpec> is absolute 
        //    and p_base can be ignored.
        //    For example,
        //    uriSpec = file:/myDIR/myXSLFile.xsl
        //    p_base = file:/myWork/
        //
        //    Here, uriSpec has absolute path after scheme file and :
        //    Hence p_base can be ignored.
        // 
        // b) Similarily, according to RFC 2396, uri is resolved for <uriSpec> relative to <p_base>
        //    if scheme in <uriSpec> is same as scheme in <p_base>, else p_base can be ignored.
        // 
        // c) if <p_base> is not hierarchical, it can be ignored.
        //
        if(uriSpec.startsWith("/") || !m_scheme.equals(p_base.m_scheme) || !p_base.getSchemeSpecificPart().startsWith("/"))
        {
          p_base = null;
        }
      }
      // Fix for XALANJ-2059  
      uriSpecLen = uriSpec.length();
    }

    // two slashes means generic URI syntax, so we get the authority
    if (uriSpec.startsWith("//"))
    {
      index += 2;

      int startPos = index;

      // get authority - everything up to path, query or fragment
      char testChar = '\0';

      while (index < uriSpecLen)
      {
        testChar = uriSpec.charAt(index);

        if (testChar == '/' || testChar == '?' || testChar == '#')
        {
          break;
        }

        index++;
      }

      // if we found authority, parse it out, otherwise we set the
      // host to empty string
      if (index > startPos)
      {
        initializeAuthority(uriSpec.substring(startPos, index));
      }
      else
      {
        m_host = "";
      }
    }

    initializePath(uriSpec.substring(index));

    // Resolve relative URI to base URI - see RFC 2396 Section 5.2
    // In some cases, it might make more sense to throw an exception
    // (when scheme is specified is the string spec and the base URI
    // is also specified, for example), but we're just following the
    // RFC specifications 
    if (p_base != null)
    {

      // check to see if this is the current doc - RFC 2396 5.2 #2
      // note that this is slightly different from the RFC spec in that
      // we don't include the check for query string being null
      // - this handles cases where the urispec is just a query
      // string or a fragment (e.g. "?y" or "#s") - 
      // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
      // identified this as a bug in the RFC
      if (m_path.length() == 0 && m_scheme == null && m_host == null)
      {
        m_scheme = p_base.getScheme();
        m_userinfo = p_base.getUserinfo();
        m_host = p_base.getHost();
        m_port = p_base.getPort();
        m_path = p_base.getPath();

        if (m_queryString == null)
        {
          m_queryString = p_base.getQueryString();
        }

        return;
      }

      // check for scheme - RFC 2396 5.2 #3
      // if we found a scheme, it means absolute URI, so we're done
      if (m_scheme == null)
      {
        m_scheme = p_base.getScheme();
      }

      // check for authority - RFC 2396 5.2 #4
      // if we found a host, then we've got a network path, so we're done
      if (m_host == null)
      {
        m_userinfo = p_base.getUserinfo();
        m_host = p_base.getHost();
        m_port = p_base.getPort();
      }
      else
      {
        return;
      }

      // check for absolute path - RFC 2396 5.2 #5
      if (m_path.length() > 0 && m_path.startsWith("/"))
      {
        return;
      }

      // if we get to this point, we need to resolve relative path
      // RFC 2396 5.2 #6
      String path = new String();
      String basePath = p_base.getPath();

      // 6a - get all but the last segment of the base URI path
      if (basePath != null)
      {
        int lastSlash = basePath.lastIndexOf('/');

        if (lastSlash != -1)
        {
          path = basePath.substring(0, lastSlash + 1);
        }
      }

      // 6b - append the relative URI path
      path = path.concat(m_path);

      // 6c - remove all "./" where "." is a complete path segment
      index = -1;

      while ((index = path.indexOf("/./")) != -1)
      {
        path = path.substring(0, index + 1).concat(path.substring(index + 3));
      }

      // 6d - remove "." if path ends with "." as a complete path segment
      if (path.endsWith("/."))
      {
        path = path.substring(0, path.length() - 1);
      }

      // 6e - remove all "<segment>/../" where "<segment>" is a complete 
      // path segment not equal to ".."
      index = -1;

      int segIndex = -1;
      String tempString = null;

      while ((index = path.indexOf("/../")) > 0)
      {
        tempString = path.substring(0, path.indexOf("/../"));
        segIndex = tempString.lastIndexOf('/');

        if (segIndex != -1)
        {
          if (!tempString.substring(segIndex++).equals(".."))
          {
            path = path.substring(0, segIndex).concat(path.substring(index
                    + 4));
          }
        }
      }

      // 6f - remove ending "<segment>/.." where "<segment>" is a 
      // complete path segment
      if (path.endsWith("/.."))
      {
        tempString = path.substring(0, path.length() - 3);
        segIndex = tempString.lastIndexOf('/');

        if (segIndex != -1)
        {
          path = path.substring(0, segIndex + 1);
        }
      }

      m_path = path;
    }
  }