public static String getPartition()

in core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java [44:93]


    public static String getPartition(
            final String url, final Metadata metadata, final String partitionMode) {
        String partitionKey = null;
        String host = "";

        // IP in metadata?
        if (partitionMode.equalsIgnoreCase(Constants.PARTITION_MODE_IP)) {
            String ip_provided = metadata.getFirstValue("ip");
            if (StringUtils.isNotBlank(ip_provided)) {
                partitionKey = ip_provided;
            }
        }

        if (partitionKey == null) {
            URL u;
            try {
                u = new URL(url);
                host = u.getHost();
            } catch (MalformedURLException e1) {
                LOG.warn("Invalid URL: {}", url);
                return null;
            }
        }

        // partition by hostname
        if (partitionMode.equalsIgnoreCase(Constants.PARTITION_MODE_HOST)) partitionKey = host;

        // partition by domain : needs fixing
        else if (partitionMode.equalsIgnoreCase(Constants.PARTITION_MODE_DOMAIN)) {
            partitionKey = PaidLevelDomain.getPLD(host);
        }

        // partition by IP
        if (partitionMode.equalsIgnoreCase(Constants.PARTITION_MODE_IP) && partitionKey == null) {
            try {
                long start = System.currentTimeMillis();
                final InetAddress addr = InetAddress.getByName(host);
                partitionKey = addr.getHostAddress();
                long end = System.currentTimeMillis();
                LOG.debug("Resolved IP {} in {} msec for : {}", partitionKey, end - start, url);
            } catch (final Exception e) {
                LOG.warn("Unable to resolve IP for: {}", host);
                return null;
            }
        }

        LOG.debug("Partition Key for: {} > {}", url, partitionKey);

        return partitionKey;
    }