private boolean isValidAuthority()

in src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java [221:311]


  private boolean isValidAuthority(String authority) {
    if (authority == null) {
      return false;
    }

    Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
    if (!authorityMatcher.matches()) {
      return false;
    }

    boolean ipV4Address = false;
    boolean hostname = false;
    // check if authority is IP address or hostname
    String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
    Matcher matchIPV4Pat = IP_V4_DOMAIN_PATTERN.matcher(hostIP);
    ipV4Address = matchIPV4Pat.matches();

    if (ipV4Address) {
      // this is an IP address so check components
      for (int i = 1; i <= 4; i++) {
        String ipSegment = matchIPV4Pat.group(i);
        if (ipSegment == null || ipSegment.length() <= 0) {
          return false;
        }

        try {
          if (Integer.parseInt(ipSegment) > 255) {
            return false;
          }
        } catch (NumberFormatException e) {
          return false;
        }

      }
    } else {
      // Domain is hostname name
      hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
    }

    // rightmost hostname will never start with a digit.
    if (hostname) {
      // LOW-TECH FIX FOR VALIDATOR-202
      // TODO: Rewrite to use ArrayList and .add semantics: see VALIDATOR-203
      char[] chars = hostIP.toCharArray();
      int size = 1;
      for (int i = 0; i < chars.length; i++) {
        if (chars[i] == '.') {
          size++;
        }
      }
      String[] domainSegment = new String[size];
      int segCount = 0;
      int segLen = 0;
      Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);

      while (atomMatcher.find()) {
        domainSegment[segCount] = atomMatcher.group();
        segLen = domainSegment[segCount].length() + 1;
        hostIP = (segLen >= hostIP.length()) ? "" : hostIP.substring(segLen);
        segCount++;
      }
      String topLevel = domainSegment[segCount - 1];
      if (topLevel.length() < 2) {
        return false;
      }

      // First letter of top level must be a alpha
      if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
        return false;
      }

      // Make sure there's a host name preceding the authority.
      if (segCount < 2) {
        return false;
      }
    }

    if (!hostname && !ipV4Address) {
      return false;
    }

    String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
    if (port != null) {
      if (!PORT_PATTERN.matcher(port).matches()) {
        return false;
      }
    }

    String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
    return isBlankOrNull(extra);
  }