services/ContainerService/managedClusters/alerts.yaml (251 lines of code) (raw):
- name: node_cpu_usage_percentage
description: Aggregated average CPU utilization measured in percentage across the
cluster
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-58888
properties:
metricName: node_cpu_usage_percentage
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT5M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
threshold: 95.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: 1303e91d-bc80-4ec2-937e-1d179fc32b43
- name: node_memory_working_set_percentage
description: Container working set memory used in percent
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-56669
properties:
metricName: node_memory_working_set_percentage
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT5M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
threshold: 100.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: 9ae2dfbf-d69b-4802-8497-8b7836bef5e9
- name: node_disk_usage_percentage
description: Disk space used in percent by device
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-3850
properties:
metricName: node_disk_usage_percentage
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 2
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
threshold: 80.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: aa3c5697-5cca-4c16-a37e-94f1d580701e
- name: kube_pod_status_phase
description: Number of pods by phase
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-2360
properties:
metricName: kube_pod_status_phase
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
dimensions:
- operator: include
name: phase
values:
- failed
threshold: 0.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: e38e6e11-ac88-4014-98c8-8e64f70b832a
- name: node_memory_rss_percentage
description: Container RSS memory used in percent
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-2198
properties:
metricName: node_memory_rss_percentage
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
threshold: 90.0
enabled: true
guid: e89e023e-117b-4db4-bfb2-853849e273f5
- name: cluster_autoscaler_unschedulable_pods_count
description: Number of pods that are currently unschedulable in the cluster
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-1383
properties:
metricName: cluster_autoscaler_unschedulable_pods_count
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: GreaterThan
criterionType: StaticThresholdCriterion
threshold: 0.0
enabled: true
guid: 301ca0e3-fc88-4285-be99-a2a587c412f5
- name: cluster_autoscaler_cluster_safe_to_autoscale
description: Determines whether or not cluster autoscaler will take action on the
cluster
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-1311
properties:
metricName: cluster_autoscaler_cluster_safe_to_autoscale
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: LessThan
criterionType: StaticThresholdCriterion
threshold: 1.0
enabled: true
guid: 49fd8bac-d061-459d-8d80-a048c4c8ba56
- name: kube_node_status_condition
description: Statuses for various node conditions
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-1128
properties:
metricName: kube_node_status_condition
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Total
operator: GreaterThan
criterionType: StaticThresholdCriterion
dimensions:
- operator: include
name: status2
values:
- notready
- unknown
threshold: 0.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: ebbbbb92-5208-4d52-b407-6bea8e4473b9
- name: kube_pod_status_ready
description: Number of pods in Ready state
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-564
properties:
metricName: kube_pod_status_ready
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 2
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: LessThan
criterionType: StaticThresholdCriterion
threshold: 1.0
enabled: true
references:
- name: Recommended alert rules
url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules
guid: cebf00b7-7294-4cf7-bc50-108f999d0c67
- name: kube_node_status_allocatable_memory_bytes
description: Total amount of available memory in a managed cluster
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-460
properties:
metricName: kube_node_status_allocatable_memory_bytes
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: LessThan
criterionType: StaticThresholdCriterion
threshold: 2147483648.0
enabled: true
guid: 14fa63fb-7da9-4d2e-9de9-203c4a3e0401
- name: kube_node_status_allocatable_cpu_cores
description: Total number of available cpu cores in a managed cluster
type: Metric
verified: false
visible: true
tags:
- auto-generated
- agc-314
properties:
metricName: kube_node_status_allocatable_cpu_cores
metricNamespace: Microsoft.ContainerService/managedClusters
severity: 3
windowSize: PT5M
evaluationFrequency: PT1M
timeAggregation: Average
operator: LessThan
criterionType: StaticThresholdCriterion
threshold: 2.0
enabled: true
guid: 64a872f9-5ec6-4121-acad-edd12f4c3466