prometheus/alert/client.go (325 lines of code) (raw):
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
package alert
import (
"bytes"
"fmt"
"io/ioutil"
"net/http"
"sort"
"strings"
"github.com/golang/glog"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/promql/parser"
"github.com/thoas/go-funk"
"gopkg.in/yaml.v3"
"github.com/facebookincubator/prometheus-configmanager/fsclient"
"github.com/prometheus/prometheus/pkg/rulefmt"
)
const (
rulesFilePostfix = "_rules.yml"
)
// PrometheusAlertClient provides thread-safe methods for writing, reading,
// and modifying alert configuration files
type PrometheusAlertClient interface {
RuleExists(filePrefix, rulename string) bool
WriteRule(filePrefix string, rule rulefmt.Rule) error
UpdateRule(filePrefix string, rule rulefmt.Rule) error
ReadRules(filePrefix, ruleName string) ([]rulefmt.Rule, error)
DeleteRule(filePrefix, ruleName string) error
BulkUpdateRules(filePrefix string, rules []rulefmt.Rule) (BulkUpdateResults, error)
ReloadPrometheus() error
Tenancy() TenancyConfig
}
type TenancyConfig struct {
RestrictorLabel string `json:"restrictor_label"`
RestrictQueries bool `json:"restrict_queries"`
}
type client struct {
fileLocks *FileLocker
prometheusURL string
fsClient fsclient.FSClient
tenancy TenancyConfig
}
func NewClient(fileLocks *FileLocker, prometheusURL string, fsClient fsclient.FSClient, tenancy TenancyConfig) PrometheusAlertClient {
return &client{
fileLocks: fileLocks,
prometheusURL: prometheusURL,
fsClient: fsClient,
tenancy: tenancy,
}
}
// ValidateRule checks that a new alert rule is a valid specification
func ValidateRule(rule rulefmt.Rule) error {
// convert to RuleNode for validation
node := rulefmt.RuleNode{
Record: yaml.Node{Value: rule.Record},
Alert: yaml.Node{Value: rule.Alert},
Expr: yaml.Node{Value: rule.Expr},
For: 0,
Labels: rule.Labels,
Annotations: rule.Annotations,
}
if len(node.Validate()) != 0 {
err := validateRuleImpl(node)
glog.Errorf("Invalid rule: %v", err)
return err
}
return nil
}
// validateRuleImpl determines the actual causes of the rule validation error.
// Due to how the underlying prometheus types are made (unexported), we have to copy this code
// and run it here to make it work. The actual validation is done with the package
// code.
func validateRuleImpl(r rulefmt.RuleNode) error {
err := errors.New("Rule Validation Error")
if r.Record.Value != "" && r.Alert.Value != "" {
err = fmt.Errorf("%v; only one of 'record' and 'alert' must be set", err)
}
if r.Record.Value == "" && r.Alert.Value == "" {
if r.Record.Value == "0" {
err = fmt.Errorf("%v; one of 'record' or 'alert' must be set", err)
} else {
err = fmt.Errorf("%v; one of 'record' or 'alert' must be set", err)
}
}
if r.Expr.Value == "" {
err = fmt.Errorf("%v; field 'expr' must be set in rule", err)
} else if _, e := parser.ParseExpr(r.Expr.Value); e != nil {
err = fmt.Errorf("%v; could not parse expression: %v", err, e)
}
if r.Record.Value != "" {
if len(r.Annotations) > 0 {
err = fmt.Errorf("%v; invalid field 'annotations' in recording rule", err)
}
if r.For != 0 {
err = fmt.Errorf("%v; invalid field 'for' in recording rule", err)
}
if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) {
err = fmt.Errorf("%v; invalid recording rule name: %s", err, r.Record.Value)
}
}
for k, v := range r.Labels {
if !model.LabelName(k).IsValid() || k == model.MetricNameLabel {
err = fmt.Errorf("%v; invalid label name: %s", err, k)
}
if !model.LabelValue(v).IsValid() {
err = fmt.Errorf("%v; invalid label value: %s", err, v)
}
}
for k := range r.Annotations {
if !model.LabelName(k).IsValid() {
err = fmt.Errorf("%v; invalid annotation name: %s", err, k)
}
}
return err
}
func (c *client) RuleExists(filePrefix, rulename string) bool {
filename := makeFilename(filePrefix)
c.fileLocks.Lock(filename)
defer c.fileLocks.Unlock(filename)
if !c.ruleFileExists(filename) {
return false
}
ruleFile, err := c.readRuleFile(filename)
if err != nil {
return false
}
return ruleFile.GetRule(rulename) != nil
}
// WriteRule takes an alerting rule and writes it to the rules file for the
// given filePrefix
func (c *client) WriteRule(filePrefix string, rule rulefmt.Rule) error {
filename := makeFilename(filePrefix)
c.fileLocks.Lock(filename)
defer c.fileLocks.Unlock(filename)
ruleFile, err := c.readOrInitializeRuleFile(filePrefix, filename)
if err != nil {
return err
}
err = SecureRule(c.tenancy.RestrictQueries, c.tenancy.RestrictorLabel, filePrefix, &rule)
if err != nil {
return err
}
ruleFile.AddRule(rule)
err = c.writeRuleFile(ruleFile, filename)
if err != nil {
return err
}
return nil
}
func (c *client) UpdateRule(filePrefix string, rule rulefmt.Rule) error {
filename := makeFilename(filePrefix)
c.fileLocks.Lock(filename)
defer c.fileLocks.Unlock(filename)
ruleFile, err := c.readRuleFile(filename)
if err != nil {
return fmt.Errorf("rule file %s does not exist: %v", filename, err)
}
err = SecureRule(c.tenancy.RestrictQueries, c.tenancy.RestrictorLabel, filePrefix, &rule)
if err != nil {
return fmt.Errorf("cannot parse expression: \"%s\", %v", rule.Expr, err)
}
err = ruleFile.ReplaceRule(rule)
if err != nil {
return err
}
err = c.writeRuleFile(ruleFile, filename)
if err != nil {
return err
}
return nil
}
func (c *client) ReadRules(filePrefix, ruleName string) ([]rulefmt.Rule, error) {
filename := makeFilename(filePrefix)
c.fileLocks.RLock(filename)
defer c.fileLocks.RUnlock(filename)
if !c.ruleFileExists(filename) {
return []rulefmt.Rule{}, nil
}
ruleFile, err := c.readRuleFile(makeFilename(filePrefix))
if err != nil {
return []rulefmt.Rule{}, err
}
if ruleName == "" {
return ruleFile.Rules(), nil
}
foundRule := ruleFile.GetRule(ruleName)
if foundRule == nil {
return nil, fmt.Errorf("rule %s not found", ruleName)
}
return []rulefmt.Rule{*foundRule}, nil
}
func (c *client) DeleteRule(filePrefix, ruleName string) error {
filename := makeFilename(filePrefix)
c.fileLocks.Lock(filename)
defer c.fileLocks.Unlock(filename)
ruleFile, err := c.readRuleFile(filename)
if err != nil {
return err
}
err = ruleFile.DeleteRule(ruleName)
if err != nil {
return err
}
err = c.writeRuleFile(ruleFile, filename)
if err != nil {
return err
}
return nil
}
func (c *client) BulkUpdateRules(filePrefix string, rules []rulefmt.Rule) (BulkUpdateResults, error) {
filename := makeFilename(filePrefix)
c.fileLocks.Lock(filename)
defer c.fileLocks.Unlock(filename)
ruleFile, err := c.readOrInitializeRuleFile(filePrefix, filename)
if err != nil {
return BulkUpdateResults{}, err
}
results := NewBulkUpdateResults()
for _, newRule := range rules {
ruleName := newRule.Alert
err := SecureRule(c.tenancy.RestrictQueries, c.tenancy.RestrictorLabel, filePrefix, &newRule)
if err != nil {
results.Errors[ruleName] = err
continue
}
if ruleFile.GetRule(ruleName) != nil {
err := ruleFile.ReplaceRule(newRule)
if err != nil {
results.Errors[ruleName] = err
} else {
results.Statuses[ruleName] = "updated"
}
} else {
ruleFile.AddRule(newRule)
results.Statuses[ruleName] = "created"
}
}
err = c.writeRuleFile(ruleFile, filename)
if err != nil {
return results, err
}
return results, nil
}
func (c *client) Tenancy() TenancyConfig {
return c.tenancy
}
func (c *client) ReloadPrometheus() error {
resp, err := http.Post(fmt.Sprintf("http://%s%s", c.prometheusURL, "/-/reload"), "text/plain", &bytes.Buffer{})
if err != nil {
glog.Errorf("error reloading prometheus: %v", err)
return fmt.Errorf("error reloading prometheus: %v", err)
}
if resp.StatusCode != http.StatusOK {
body, _ := ioutil.ReadAll(resp.Body)
glog.Errorf("error reloading prometheus (status %d): %s", resp.StatusCode, string(body))
return fmt.Errorf("error reloading prometheus (status %d): %s", resp.StatusCode, string(body))
}
return nil
}
func (c *client) writeRuleFile(ruleFile *File, filename string) error {
yamlFile, err := yaml.Marshal(ruleFile)
if err != nil {
glog.Errorf("error writing rules file: %v", err)
return fmt.Errorf("error writing rules file: %v", err)
}
err = c.fsClient.WriteFile(filename, yamlFile, 0666)
if err != nil {
glog.Errorf("error writing rules file: %v", err)
return fmt.Errorf("error writing rules file: %v", err)
}
return nil
}
func (c *client) readOrInitializeRuleFile(filePrefix, filename string) (*File, error) {
if c.ruleFileExists(filename) {
return c.readRuleFile(filename)
}
return c.initializeRuleFile(filePrefix, filename)
}
func (c *client) initializeRuleFile(filePrefix, filename string) (*File, error) {
if _, err := c.fsClient.Stat(filename); err == nil {
file, err := c.readRuleFile(filename)
if err != nil {
return nil, err
}
return file, nil
}
return NewFile(filePrefix), nil
}
func (c *client) ruleFileExists(filename string) bool {
_, err := c.fsClient.Stat(filename)
return err == nil
}
func (c *client) readRuleFile(requestedFile string) (*File, error) {
ruleFile := File{}
file, err := c.fsClient.ReadFile(requestedFile)
if err != nil {
glog.Errorf("error reading rules file: %v", err)
return &File{}, fmt.Errorf("error reading rules file: %v", err)
}
err = yaml.Unmarshal(file, &ruleFile)
return &ruleFile, err
}
type BulkUpdateResults struct {
Errors map[string]error
Statuses map[string]string
}
func NewBulkUpdateResults() BulkUpdateResults {
return BulkUpdateResults{
Errors: make(map[string]error),
Statuses: make(map[string]string),
}
}
func (r BulkUpdateResults) String() string {
str := strings.Builder{}
if len(r.Errors) > 0 {
str.WriteString("Errors: \n")
names := funk.Keys(r.Errors).([]string)
sort.Strings(names)
for _, name := range names {
str.WriteString(fmt.Sprintf("\t%s: %s\n", name, r.Errors[name]))
}
}
if len(r.Statuses) > 0 {
str.WriteString("Statuses: \n")
names := funk.Keys(r.Statuses).([]string)
sort.Strings(names)
for _, name := range names {
str.WriteString(fmt.Sprintf("\t%s: %s\n", name, r.Statuses[name]))
}
}
return str.String()
}
func makeFilename(filePrefix string) string {
return filePrefix + rulesFilePostfix
}