services/ContainerService/managedClusters/alerts.yaml (251 lines of code) (raw):

- name: node_cpu_usage_percentage description: Aggregated average CPU utilization measured in percentage across the cluster type: Metric verified: false visible: true tags: - auto-generated - agc-58888 properties: metricName: node_cpu_usage_percentage metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT5M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 95.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: 1303e91d-bc80-4ec2-937e-1d179fc32b43 - name: node_memory_working_set_percentage description: Container working set memory used in percent type: Metric verified: false visible: true tags: - auto-generated - agc-56669 properties: metricName: node_memory_working_set_percentage metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT5M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 100.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: 9ae2dfbf-d69b-4802-8497-8b7836bef5e9 - name: node_disk_usage_percentage description: Disk space used in percent by device type: Metric verified: false visible: true tags: - auto-generated - agc-3850 properties: metricName: node_disk_usage_percentage metricNamespace: Microsoft.ContainerService/managedClusters severity: 2 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 80.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: aa3c5697-5cca-4c16-a37e-94f1d580701e - name: kube_pod_status_phase description: Number of pods by phase type: Metric verified: false visible: true tags: - auto-generated - agc-2360 properties: metricName: kube_pod_status_phase metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion dimensions: - operator: include name: phase values: - failed threshold: 0.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: e38e6e11-ac88-4014-98c8-8e64f70b832a - name: node_memory_rss_percentage description: Container RSS memory used in percent type: Metric verified: false visible: true tags: - auto-generated - agc-2198 properties: metricName: node_memory_rss_percentage metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 90.0 enabled: true guid: e89e023e-117b-4db4-bfb2-853849e273f5 - name: cluster_autoscaler_unschedulable_pods_count description: Number of pods that are currently unschedulable in the cluster type: Metric verified: false visible: true tags: - auto-generated - agc-1383 properties: metricName: cluster_autoscaler_unschedulable_pods_count metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: GreaterThan criterionType: StaticThresholdCriterion threshold: 0.0 enabled: true guid: 301ca0e3-fc88-4285-be99-a2a587c412f5 - name: cluster_autoscaler_cluster_safe_to_autoscale description: Determines whether or not cluster autoscaler will take action on the cluster type: Metric verified: false visible: true tags: - auto-generated - agc-1311 properties: metricName: cluster_autoscaler_cluster_safe_to_autoscale metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 enabled: true guid: 49fd8bac-d061-459d-8d80-a048c4c8ba56 - name: kube_node_status_condition description: Statuses for various node conditions type: Metric verified: false visible: true tags: - auto-generated - agc-1128 properties: metricName: kube_node_status_condition metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Total operator: GreaterThan criterionType: StaticThresholdCriterion dimensions: - operator: include name: status2 values: - notready - unknown threshold: 0.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: ebbbbb92-5208-4d52-b407-6bea8e4473b9 - name: kube_pod_status_ready description: Number of pods in Ready state type: Metric verified: false visible: true tags: - auto-generated - agc-564 properties: metricName: kube_pod_status_ready metricNamespace: Microsoft.ContainerService/managedClusters severity: 2 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: LessThan criterionType: StaticThresholdCriterion threshold: 1.0 enabled: true references: - name: Recommended alert rules url: https://learn.microsoft.com/azure/azure-monitor/containers/container-insights-metric-alerts?tabs=arm-template%2Cazure-portal#recommended-alert-rules guid: cebf00b7-7294-4cf7-bc50-108f999d0c67 - name: kube_node_status_allocatable_memory_bytes description: Total amount of available memory in a managed cluster type: Metric verified: false visible: true tags: - auto-generated - agc-460 properties: metricName: kube_node_status_allocatable_memory_bytes metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: LessThan criterionType: StaticThresholdCriterion threshold: 2147483648.0 enabled: true guid: 14fa63fb-7da9-4d2e-9de9-203c4a3e0401 - name: kube_node_status_allocatable_cpu_cores description: Total number of available cpu cores in a managed cluster type: Metric verified: false visible: true tags: - auto-generated - agc-314 properties: metricName: kube_node_status_allocatable_cpu_cores metricNamespace: Microsoft.ContainerService/managedClusters severity: 3 windowSize: PT5M evaluationFrequency: PT1M timeAggregation: Average operator: LessThan criterionType: StaticThresholdCriterion threshold: 2.0 enabled: true guid: 64a872f9-5ec6-4121-acad-edd12f4c3466