in alerter/engine/worker.go [69:170]
func (e *worker) ExecuteQuery(ctx context.Context) {
// Check if the rule is enabled for this instance by matching any of the alert criteria tags.
var matched bool
for k, v := range e.rule.Criteria {
lowerKey := strings.ToLower(k)
if vv, ok := e.tags[lowerKey]; ok {
for _, value := range v {
if strings.ToLower(vv) == strings.ToLower(value) {
matched = true
break
}
}
}
if matched {
break
}
}
// If tags are specified, but none of them matched, skip the query
if len(e.rule.Criteria) > 0 && !matched {
logger.Infof("Skipping %s/%s on %s/%s because none of the tags matched: %v", e.rule.Namespace, e.rule.Name, e.kustoClient.Endpoint(e.rule.Database), e.rule.Database, e.tags)
return
}
// Try to acquire a worker slot
queue.Workers <- struct{}{}
// Release the worker slot
defer func() { <-queue.Workers }()
ctx, cancel := context.WithTimeout(ctx, maxQueryTime)
defer cancel()
start := time.Now().UTC()
queryContext, err := NewQueryContext(e.rule, start, e.Region)
if err != nil {
logger.Errorf("Failed to wrap query=%s/%s on %s/%s: %s", e.rule.Namespace, e.rule.Name, e.kustoClient.Endpoint(e.rule.Database), e.rule.Database, err)
return
}
logger.Infof("Executing %s/%s on %s/%s", e.rule.Namespace, e.rule.Name, e.kustoClient.Endpoint(e.rule.Database), e.rule.Database)
err, rows := e.kustoClient.Query(ctx, queryContext, e.HandlerFn)
if err != nil {
// This failed because we sent too many notifications.
if errors.Is(err, alert.ErrTooManyRequests) {
err := e.AlertCli.Create(ctx, e.AlertAddr, alert.Alert{
Destination: e.rule.Destination,
Title: fmt.Sprintf("Alert %s/%s has too many notifications in %s", e.rule.Namespace, e.rule.Name, e.Region),
Summary: "This alert has been throttled by ICM due to too many notifications. Please reduce the number of notifications for this alert.",
Severity: 3,
Source: fmt.Sprintf("notification-failure/%s/%s", e.rule.Namespace, e.rule.Name),
CorrelationID: fmt.Sprintf("notification-failure/%s/%s", e.rule.Namespace, e.rule.Name),
})
if err != nil {
logger.Errorf("Failed to send alert for throttled notification for %s/%s: %s", e.rule.Namespace, e.rule.Name, err)
}
return
}
// This failed because the query failed.
logger.Errorf("Failed to execute query=%s/%s on %s/%s: %s", e.rule.Namespace, e.rule.Name, e.kustoClient.Endpoint(e.rule.Database), e.rule.Database, err)
if !isUserError(err) {
metrics.QueryHealth.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(0)
return
}
summary, err := KustoQueryLinks(fmt.Sprintf("This query is failing to execute:<br/><br/><pre>%s</pre><br/><br/>", err.Error()), queryContext.Query, e.kustoClient.Endpoint(e.rule.Database), e.rule.Database)
if err != nil {
logger.Errorf("Failed to send failure alert for %s/%s: %s", e.rule.Namespace, e.rule.Name, err)
metrics.NotificationUnhealthy.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(1)
return
}
endpointBaseName, _ := strings.CutPrefix(e.kustoClient.Endpoint(e.rule.Database), "https://")
err = e.AlertCli.Create(ctx, e.AlertAddr, alert.Alert{
Destination: e.rule.Destination,
Title: fmt.Sprintf("Alert %s/%s has query errors on %s", e.rule.Namespace, e.rule.Name, e.kustoClient.Endpoint(e.rule.Database)),
Summary: summary,
Severity: 3,
Source: fmt.Sprintf("%s/%s", e.rule.Namespace, e.rule.Name),
CorrelationID: fmt.Sprintf("alert-failure/%s/%s/%s", endpointBaseName, e.rule.Namespace, e.rule.Name),
})
if err != nil {
logger.Errorf("Failed to send failure alert for %s/%s/%s: %s", endpointBaseName, e.rule.Namespace, e.rule.Name, err)
// Only set the notification as failed if we are not able to send a failure alert directly.
metrics.NotificationUnhealthy.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(1)
return
} else {
metrics.NotificationUnhealthy.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(0)
}
// Query failed due to user error, so return the query to healthy.
metrics.QueryHealth.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(1)
return
}
metrics.QueryHealth.WithLabelValues(e.rule.Namespace, e.rule.Name).Set(1)
metrics.QueriesRunTotal.WithLabelValues().Inc()
logger.Infof("Completed %s/%s in %s", e.rule.Namespace, e.rule.Name, time.Since(start))
logger.Infof("Query for %s/%s completed with %d entries found", e.rule.Namespace, e.rule.Name, rows)
}