in notify/notify.go [773:878]
func (r RetryStage) exec(ctx context.Context, l log.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
var sent []*types.Alert
// If we shouldn't send notifications for resolved alerts, but there are only
// resolved alerts, report them all as successfully notified (we still want the
// notification log to log them for the next run of DedupStage).
if !r.integration.SendResolved() {
firing, ok := FiringAlerts(ctx)
if !ok {
return ctx, nil, errors.New("firing alerts missing")
}
if len(firing) == 0 {
return ctx, alerts, nil
}
for _, a := range alerts {
if a.Status() != model.AlertResolved {
sent = append(sent, a)
}
}
} else {
sent = alerts
}
b := backoff.NewExponentialBackOff()
b.MaxElapsedTime = 0 // Always retry.
tick := backoff.NewTicker(b)
defer tick.Stop()
var (
i = 0
iErr error
)
l = log.With(l, "receiver", r.groupName, "integration", r.integration.String())
if groupKey, ok := GroupKey(ctx); ok {
l = log.With(l, "aggrGroup", groupKey)
}
for {
i++
// Always check the context first to not notify again.
select {
case <-ctx.Done():
if iErr == nil {
iErr = ctx.Err()
if errors.Is(iErr, context.Canceled) {
iErr = NewErrorWithReason(ContextCanceledReason, iErr)
} else if errors.Is(iErr, context.DeadlineExceeded) {
iErr = NewErrorWithReason(ContextDeadlineExceededReason, iErr)
}
}
if iErr != nil {
return ctx, nil, fmt.Errorf("%s/%s: notify retry canceled after %d attempts: %w", r.groupName, r.integration.String(), i, iErr)
}
return ctx, nil, nil
default:
}
select {
case <-tick.C:
now := time.Now()
retry, err := r.integration.Notify(ctx, sent...)
dur := time.Since(now)
r.metrics.notificationLatencySeconds.WithLabelValues(r.labelValues...).Observe(dur.Seconds())
r.metrics.numNotificationRequestsTotal.WithLabelValues(r.labelValues...).Inc()
if err != nil {
r.metrics.numNotificationRequestsFailedTotal.WithLabelValues(r.labelValues...).Inc()
if !retry {
return ctx, alerts, fmt.Errorf("%s/%s: notify retry canceled due to unrecoverable error after %d attempts: %w", r.groupName, r.integration.String(), i, err)
}
if ctx.Err() == nil {
if iErr == nil || err.Error() != iErr.Error() {
// Log the error if the context isn't done and the error isn't the same as before.
level.Warn(l).Log("msg", "Notify attempt failed, will retry later", "attempts", i, "err", err)
}
// Save this error to be able to return the last seen error by an
// integration upon context timeout.
iErr = err
}
} else {
lvl := level.Info(l)
if i <= 1 {
lvl = level.Debug(log.With(l, "alerts", fmt.Sprintf("%v", alerts)))
}
lvl.Log("msg", "Notify success", "attempts", i, "duration", dur)
return ctx, alerts, nil
}
case <-ctx.Done():
if iErr == nil {
iErr = ctx.Err()
if errors.Is(iErr, context.Canceled) {
iErr = NewErrorWithReason(ContextCanceledReason, iErr)
} else if errors.Is(iErr, context.DeadlineExceeded) {
iErr = NewErrorWithReason(ContextDeadlineExceededReason, iErr)
}
}
if iErr != nil {
return ctx, nil, fmt.Errorf("%s/%s: notify retry canceled after %d attempts: %w", r.groupName, r.integration.String(), i, iErr)
}
return ctx, nil, nil
}
}
}