in src/main/java/org/apache/sling/commons/scheduler/impl/SchedulerHealthCheck.java [89:182]
public Result execute() {
final FormattingResultLog resultLog = new FormattingResultLog();
try {
long runningCount = 0;
final SortedMap<String, Counter> runningCntSet = metricRegistry.getCounters(new MetricFilter() {
@Override
public boolean matches(String name, Metric metric) {
return name.equals(QuartzScheduler.METRICS_NAME_RUNNING_JOBS);
}
});
if (runningCntSet != null) {
Iterator<Counter> it = runningCntSet.values().iterator();
if (it.hasNext()) {
runningCount = it.next().getCount();
}
runningCount = Math.max(0, runningCount);
}
final SortedMap<String, Gauge> oldestGaugeSet = metricRegistry.getGauges(new MetricFilter() {
@Override
public boolean matches(String name, Metric metric) {
return name.equals(QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS);
}
});
if (oldestGaugeSet.isEmpty()) {
resultLog.warn("Sling Scheduler cannot find any metrics gauge starting with {}",
QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS);
} else {
final long oldestRunningJobInMillis = (Long) oldestGaugeSet.values().iterator().next().getValue();
if (oldestRunningJobInMillis <= -1) {
resultLog.info("Sling Scheduler has no long-running Quartz-Jobs at this moment.");
} else if (oldestRunningJobInMillis > maxQuartzJobDurationAcceptable) {
final String slowPrefix = QuartzScheduler.METRICS_NAME_OLDEST_RUNNING_JOB_MILLIS + ".slow.";
final MetricFilter filter = new MetricFilter() {
@Override
public boolean matches(String name, Metric metric) {
return name.startsWith(slowPrefix);
}
};
final SortedMap<String, Gauge> allGaugeSet = metricRegistry.getGauges(filter);
if (allGaugeSet.isEmpty()) {
resultLog.critical(
"Sling Scheduler has at least one long-running Quartz-Job with the oldest running for {}ms.",
oldestRunningJobInMillis);
} else {
final StringBuffer slowNames = new StringBuffer();
final Iterator<Entry<String, Gauge>> it = allGaugeSet.entrySet().iterator();
int numSlow = 0;
while (it.hasNext()) {
final Entry<String, Gauge> e = it.next();
final Gauge slowGauge = e.getValue();
final long millis = (Long) slowGauge.getValue();
if (millis < 0) {
// skip - this job is no longer running
continue;
}
if (numSlow++ > 0) {
slowNames.append(", ");
}
slowNames.append(e.getKey().substring(slowPrefix.length()));
slowNames.append("=").append(millis).append("ms");
}
if (numSlow == 1) {
resultLog.critical(
"Sling Scheduler has 1 long-running Quartz-Job which is already running for {}ms: {}.",
oldestRunningJobInMillis, slowNames);
} else {
resultLog.critical(
"Sling Scheduler has {} long-running Quartz-Jobs with the oldest running for {}ms: {}.",
numSlow, oldestRunningJobInMillis, slowNames);
}
}
resultLog.info("More details are exposed in metrics including gauges for slow Quartz-Jobs containing shortened job names.");
resultLog.info("Furthermore, thread-dumps can also help narrow down slow Quartz-Jobs.");
} else {
resultLog.info(
"Sling Scheduler has no long-running Quartz-Jobs at this moment, the oldest current Quartz-Job is {}ms.",
oldestRunningJobInMillis);
}
resultLog.info("The total number of currently runnning Quartz-Jobs is {}.", runningCount);
resultLog.info("[The maximum acceptable duration a Quartz-Job should run for is configured to {}ms. "
+ "This duration can be changed in the QuartzScheduler via the configuration manager]({})",
maxQuartzJobDurationAcceptable,
"/system/console/configMgr/org.apache.sling.commons.scheduler.impl.QuartzScheduler");
}
} catch (final Exception e) {
logger.warn("execute: metrics invocation failed with exception: {}", e);
resultLog.healthCheckError("execute: metrics invocation failed with exception: {}", e);
}
return new Result(resultLog);
}