in core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java [143:183]
public static FetchItem create(URL u, String url, Tuple t, String queueMode) {
String queueID;
String key = null;
// reuse any key that might have been given
// be it the hostname, domain or IP
if (t.contains("key")) {
key = t.getStringByField("key");
}
if (StringUtils.isNotBlank(key)) {
queueID = key.toLowerCase(Locale.ROOT);
return new FetchItem(url, t, queueID);
}
if (FetchItemQueues.QUEUE_MODE_IP.equalsIgnoreCase(queueMode)) {
try {
final InetAddress addr = InetAddress.getByName(u.getHost());
key = addr.getHostAddress();
} catch (final UnknownHostException e) {
LOG.warn("Unable to resolve IP for {}, using hostname as key.", u.getHost());
key = u.getHost();
}
} else if (FetchItemQueues.QUEUE_MODE_DOMAIN.equalsIgnoreCase(queueMode)) {
key = PaidLevelDomain.getPLD(u.getHost());
if (key == null) {
LOG.warn("Unknown domain for url: {}, using hostname as key", url);
key = u.getHost();
}
} else {
key = u.getHost();
}
if (key == null) {
LOG.warn("Unknown host for url: {}, using URL string as key", url);
key = u.toExternalForm();
}
queueID = key.toLowerCase(Locale.ROOT);
return new FetchItem(url, t, queueID);
}