in cloudflare_exporter.go [142:206]
func (e *exporter) scrapeCloudflare(ctx context.Context) error {
if *initialScrapeImmediately {
// Initial scrape, the ticker below won't fire straight away.
// Risks double counting on restart. Only useful for development.
if err := e.scrapeCloudflareOnce(ctx); err != nil {
level.Error(e.logger).Log("error", err)
cfScrapeErrs.Inc()
}
}
ticker := time.Tick(e.scrapeInterval / 2)
for {
select {
case <-ticker:
// To make sure we are not indefinitely backlogged, we will still need to tick the bucket.
// If we were skipping scrapes, or they failed we are effectively skipping the metrics we would otherwise
// have scraped.
// To counter this, we double the scrapeInterval, but make sure, that we actually only scrape once per
// interval if everything is fine.
// If we have an error, we will untick and try again in scrapeInterval/2. This means we can gradually
// recover from failures while not backlogging indefinitely.
//
// Tick will only return true, if we are keeping pace (are at least 1 delay in the past). So if this returns
// false, we are too early, so we just break.
// If tick is true, we are either exactly on time, or we are allowed to catch up on missed scrapes.
if !e.scrapeBucket.tick() {
break
}
if e.skipNextScrapes > 0 {
e.logger.Log("msg", fmt.Sprintf("rate limited, will skip next %d scrapes", e.skipNextScrapes))
e.skipNextScrapes--
e.scrapeBucket.untick()
break
}
if err := e.scrapeCloudflareOnce(ctx); err != nil {
e.scrapeBucket.untick()
// Returning an error here would cause the exporter to crash. If it
// crashloops but prometheus manages to scrape it in between crashes, we
// might never notice that we are not updating our cached metrics.
// Instead, we should alert on the exporter_cloudflare_scrape_errors
// metric.
level.Error(e.logger).Log("error", err)
cfScrapeErrs.Inc()
// We've observed 2 error messages relating to rate limits in the wild:
// - "rate limiter budget depleted, please try again later"
// - "graphql: limit reached, please try again later"
// We crudely check for the substring "limit", and err on the side of
// applying backoff on errors containing it.
if strings.Contains(err.Error(), "limit") {
// Keep track of consecutive rate limit errors seen, and back off one
// extra scrape per consecutive error.
e.consecutiveRateLimitErrs++
e.skipNextScrapes = e.consecutiveRateLimitErrs
}
break
}
e.skipNextScrapes = 0
e.consecutiveRateLimitErrs = 0
case <-ctx.Done():
return nil
}
}
}