tooling/prometheus-rules/main.go (367 lines of code) (raw):
// Copyright 2025 Microsoft Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"errors"
"flag"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"text/template"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/alertsmanagement/armalertsmanagement"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/prometheus/common/model"
"github.com/sirupsen/logrus"
"k8s.io/utils/ptr"
"sigs.k8s.io/yaml"
)
type alertingRuleFile struct {
folderName string
fileBaseName string
testFileBaseName string
rules monitoringv1.PrometheusRule
testFileContent []byte
}
type options struct {
configFile string
outputBicep string
ruleFiles []alertingRuleFile
}
type prometheusRulesConfig struct {
RulesFolders []string
UntestedRules []string
OutputBicep string
}
type cliConfig struct {
PrometheusRules prometheusRulesConfig
}
func newOptions() *options {
o := &options{}
return o
}
func (o *options) addFlags(fs *flag.FlagSet) {
fs.StringVar(&o.configFile, "config-file", "", "Path to configuration ")
}
func readRulesFile(filename string) (*monitoringv1.PrometheusRule, error) {
rawRules, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("failed to read input rules: %v", err)
}
var rules monitoringv1.PrometheusRule
if err := yaml.Unmarshal(rawRules, &rules); err != nil {
return nil, fmt.Errorf("failed to parse input rules: %v", err)
}
return &rules, nil
}
func (o *options) complete() error {
o.ruleFiles = make([]alertingRuleFile, 0)
cfgRaw, err := os.ReadFile(o.configFile)
if err != nil {
return fmt.Errorf("error reading configuration file %v", err)
}
baseDirectory := path.Dir(o.configFile)
config := &cliConfig{}
err = yaml.Unmarshal(cfgRaw, config)
if err != nil {
return fmt.Errorf("error unmarshaling configFile %s file %v", o.configFile, err)
}
o.outputBicep = path.Join(baseDirectory, config.PrometheusRules.OutputBicep)
for _, untestedRules := range config.PrometheusRules.UntestedRules {
filePath := path.Join(baseDirectory, untestedRules)
rules, err := readRulesFile(filePath)
if err != nil {
return fmt.Errorf("error reading rules file %v", err)
}
o.ruleFiles = append(o.ruleFiles, alertingRuleFile{
fileBaseName: filePath,
rules: *rules,
})
}
for _, rulesDir := range config.PrometheusRules.RulesFolders {
err = filepath.WalkDir(path.Join(baseDirectory, rulesDir), func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("error reading rules directory %s, %v", path, err)
}
if d.Type().IsRegular() {
if strings.Contains(path, "_test") {
return nil
}
folderName := filepath.Dir(path)
fileBaseName := filepath.Base(path)
rules, err := readRulesFile(path)
if err != nil {
return fmt.Errorf("error reading rules file %v", err)
}
fileNameParts := strings.Split(fileBaseName, ".")
if len(fileNameParts) != 2 {
return fmt.Errorf("missing filename extension or using '.' in filename")
}
testFile := filepath.Join(folderName, fmt.Sprintf("%s_test.%s", fileNameParts[0], fileNameParts[1]))
_, err = os.Stat(testFile)
if err != nil {
return fmt.Errorf("missing testfile %s for rule file %s", testFile, path)
}
testFileContent, err := os.ReadFile(testFile)
if err != nil {
return fmt.Errorf("error reading testfile %s: %v", testFile, err)
}
o.ruleFiles = append(o.ruleFiles, alertingRuleFile{
folderName: folderName,
fileBaseName: fileBaseName,
testFileBaseName: filepath.Base(testFile),
testFileContent: testFileContent,
rules: *rules,
})
}
return nil
})
if err != nil {
return fmt.Errorf("error reading rules dir %v", err)
}
}
return nil
}
func (o *options) validate(args []string) error {
if len(args) != 0 {
return errors.New("no arguments are supported")
}
if o.configFile == "" {
return errors.New("--config-file is required")
}
return nil
}
func main() {
if os.Getenv("DEBUG") == "true" {
logrus.SetLevel(logrus.DebugLevel)
}
o := newOptions()
o.addFlags(flag.CommandLine)
flag.Parse()
if err := o.validate(flag.Args()); err != nil {
logrus.WithError(err).Fatal("invalid options")
}
if err := o.complete(); err != nil {
logrus.WithError(err).Fatal("could not complete options")
}
if err := runTests(o.ruleFiles); err != nil {
logrus.WithError(err).Fatal("testing rules failed")
}
output, err := os.Create(o.outputBicep)
if err != nil {
logrus.WithError(err).Fatal("failed to create output file")
}
if err := generate(o.ruleFiles, output); err != nil {
logrus.WithError(err).Fatal("failed to generate bicep")
}
}
func runTests(inputRules []alertingRuleFile) error {
dir, err := os.MkdirTemp("/tmp", "prom-rule-test")
if err != nil {
return fmt.Errorf("error creating tempdir %v", err)
}
defer func() {
os.RemoveAll(dir)
}()
logrus.Debugf("Created tempdir %s", dir)
for _, irf := range inputRules {
if irf.testFileBaseName == "" {
continue
}
ruleGroups, err := yaml.Marshal(irf.rules.Spec)
if err != nil {
return fmt.Errorf("error Marshalling rule groups %v", err)
}
tmpFile := fmt.Sprintf("%s%s%s", dir, string(os.PathSeparator), irf.fileBaseName)
err = os.WriteFile(tmpFile, ruleGroups, 0644)
if err != nil {
return fmt.Errorf("error writing rule groups file %v", err)
}
fileNameParts := strings.Split(irf.fileBaseName, ".")
if len(fileNameParts) != 2 {
return fmt.Errorf("missing filename extension or using '.' in filename")
}
testFile := filepath.Join(dir, irf.testFileBaseName)
err = os.WriteFile(testFile, irf.testFileContent, 0644)
if err != nil {
return fmt.Errorf("error writing rule groups test file %v", err)
}
logrus.Debugf("running test %s", irf.testFileBaseName)
cmd := exec.Command("promtool", "test", "rules", testFile)
output, err := cmd.CombinedOutput()
if err != nil {
logrus.Error(string(output))
return fmt.Errorf("error running promtool %v", err)
}
}
return nil
}
func generate(inputRules []alertingRuleFile, output io.WriteCloser) error {
defer func() {
if err := output.Close(); err != nil {
logrus.WithError(err).Error("failed to close output file")
}
}()
if _, err := output.Write([]byte(`param azureMonitoring string
`)); err != nil {
return err
}
for _, irf := range inputRules {
for _, group := range irf.rules.Spec.Groups {
logger := logrus.WithFields(logrus.Fields{
"group": group.Name,
})
if group.QueryOffset != nil {
logger.Warn("query offset is not supported in Microsoft.AlertsManagement/prometheusRuleGroups")
}
if group.Limit != nil {
logger.Warn("alert limit is not supported in Microsoft.AlertsManagement/prometheusRuleGroups")
}
armGroup := armalertsmanagement.PrometheusRuleGroupResource{
Name: ptr.To(group.Name),
Properties: &armalertsmanagement.PrometheusRuleGroupProperties{
Interval: formatDuration(group.Interval),
Enabled: ptr.To(true),
},
}
for _, rule := range group.Rules {
labels := map[string]*string{}
for k, v := range group.Labels {
labels[k] = ptr.To(strings.ReplaceAll(v, "'", "\\'"))
}
for k, v := range rule.Labels {
labels[k] = ptr.To(strings.ReplaceAll(v, "'", "\\'"))
}
annotations := map[string]*string{}
for k, v := range rule.Annotations {
annotations[k] = ptr.To(strings.ReplaceAll(v, "'", "\\'"))
}
if rule.Alert != "" {
armGroup.Properties.Rules = append(armGroup.Properties.Rules, &armalertsmanagement.PrometheusRule{
Alert: ptr.To(rule.Alert),
Enabled: ptr.To(true),
Labels: labels,
Annotations: annotations,
For: formatDuration(rule.For),
Expression: ptr.To(
strings.TrimSpace(
strings.ReplaceAll(rule.Expr.String(), "\n", " "),
),
),
Severity: severityFor(labels),
})
}
}
if len(armGroup.Properties.Rules) > 0 {
if err := writeGroups(armGroup, output); err != nil {
return err
}
}
}
}
return nil
}
func writeGroups(groups armalertsmanagement.PrometheusRuleGroupResource, into io.Writer) error {
tmpl, err := template.New("prometheusRuleGroup").Parse(`
resource {{.name}} 'Microsoft.AlertsManagement/prometheusRuleGroups@2023-03-01' = {
name: '{{.groups.Name}}'
location: resourceGroup().location
properties: {
rules: [
{{- range .groups.Properties.Rules}}
{
alert: '{{.Alert}}'
enabled: {{.Enabled}}
{{- if .Labels}}
labels: {
{{- range $key, $value := .Labels}}
{{$key}}: '{{$value}}'
{{- end }}
}
{{- end -}}
{{- if .Annotations}}
annotations: {
{{- range $key, $value := .Annotations}}
{{$key}}: '{{$value}}'
{{- end }}
}
{{- end }}
expression: '{{.Expression}}'
{{- if .For }}
for: '{{.For}}'
{{- end }}
severity: {{.Severity}}
}
{{- end -}}
]
scopes: [
azureMonitoring
]
}
}
`)
if err != nil {
return err
}
return tmpl.Execute(into, map[string]any{
"name": bicepName(groups.Name),
"groups": groups,
})
}
func bicepName(name *string) string {
if name == nil {
return "FIXME-NAME-NIL"
}
out := strings.Builder{}
upper := false
for _, c := range *name {
if upper {
out.WriteString(strings.ToUpper(string(c)))
upper = false
continue
}
if c == '-' || c == '.' || c == '_' {
upper = true
continue
}
out.WriteRune(c)
}
return out.String()
}
func formatDuration(d *monitoringv1.Duration) *string {
if d == nil {
return nil
}
parsedDuration, err := model.ParseDuration(string(*d))
if err != nil {
logrus.Fatalf("Invalid duration %s", string(*d))
}
minduration, err := model.ParseDuration("1m")
if err != nil {
logrus.Fatalf("Invalid duration %s", string(*d))
}
if parsedDuration < minduration {
logrus.Warningf("Duration '%s' is too short, setting default of 1M", parsedDuration.String())
return ptr.To("PT1M")
}
// TODO: this is likely not precisely correct, but /shrug
return ptr.To("PT" + strings.ToUpper(parsedDuration.String()))
}
func severityFor(labels map[string]*string) *int32 {
severity, ok := labels["severity"]
if !ok || severity == nil {
return nil
}
switch *severity {
case "critical":
return ptr.To(int32(2))
case "warning":
return ptr.To(int32(3))
case "info":
return ptr.To(int32(4))
default:
logrus.Warnf("unknown severity label %q", *severity)
return ptr.To(int32(5))
}
}