in core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java [330:383]
public synchronized FetchItemQueue getFetchItemQueue(String id, Metadata metadata) {
FetchItemQueue fiq = queues.get(id);
long delay = crawlDelay;
long minDelay = minCrawlDelay;
if (metadata != null) {
// custom crawl delay from metadata?
String v = metadata.getFirstValue(CRAWL_DELAY_KEY_NAME);
if (v != null) {
delay = Long.parseLong(v);
}
// custom min crawl delay from metadata?
v = metadata.getFirstValue(CRAWL_MIN_DELAY_KEY_NAME);
if (v != null) {
minDelay = Long.parseLong(v);
}
}
if (fiq == null) {
int threadVal = defaultMaxThread;
// custom maxThread value?
for (Entry<Pattern, Integer> p : customMaxThreads.entrySet()) {
if (p.getKey().matcher(id).matches()) {
threadVal = p.getValue();
break;
}
}
// overridden at URL level
// custom thread number from metadata?
if (metadata != null) {
final String val = metadata.getFirstValue(CRAWL_MAX_THREAD_KEY_NAME);
if (val != null) {
threadVal = Integer.parseInt(val);
}
}
// initialize queue
fiq = new FetchItemQueue(threadVal, delay, minDelay, maxQueueSize);
queues.put(id, fiq);
}
// in cases where we have different pages with the same key that will fall in the same
// queue, each one with a custom min crawl delay, we take the less aggressive
if (fiq.minCrawlDelay < minDelay) {
fiq.minCrawlDelay = minDelay;
}
// same for the normal delay
if (fiq.crawlDelay < delay) {
fiq.crawlDelay = delay;
}
return fiq;
}