terraform/modules/monitoring/main.tf (426 lines of code) (raw):
resource "google_monitoring_dashboard" "performance_dashboard" {
dashboard_json = <<EOF
{
"displayName": "Performance Dashboard",
"gridLayout": {
"widgets": [
{
"title": "GKE scheduler scheduling attempts",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"prometheus.googleapis.com/scheduler_schedule_attempts_total/counter\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_DELTA"
}
}
}
}
]
}
},
{
"title": "GKE scheduler pending pods",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"prometheus.googleapis.com/scheduler_pending_pods/gauge\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_COUNT",
"crossSeriesReducer": "REDUCE_COUNT",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE container restart count",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/restart_count\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_DELTA",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE container CPU request utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/cpu/request_utilization\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE container CPU limit utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/cpu/limit_utilization\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE container memory request utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/memory/request_utilization\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE container memory limit utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/memory/limit_utilization\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "GCE instance bytes written",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"compute.googleapis.com/instance/disk/write_bytes_count\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"instance_name\""
]
}
}
}
}
]
}
},
{
"title": "GCE instance bytes read",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"compute.googleapis.com/instance/disk/read_bytes_count\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"instance_name\""
]
}
}
}
}
]
}
},
{
"title": "GCE instance disk read operations",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"compute.googleapis.com/instance/disk/read_ops_count\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"instance_name\""
]
}
}
}
}
]
}
},
{
"title": "GCE instance disk write operations",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"compute.googleapis.com/instance/disk/write_ops_count\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"instance_name\""
]
}
}
}
}
]
}
},
{
"title": "Cumulative CPU usage used by cores on GKE node",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/node/cpu/core_usage_time\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_RATE",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"node_name\""
]
}
}
}
}
]
}
},
{
"title": "Cumulative memory bytes used by GKE node",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/node/memory/used_bytes\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"node_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE node % allocated memory utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/node/memory/allocatable_utilization\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"node_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE node % allocated CPU utilization",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/node/cpu/allocatable_utilization\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_MAX",
"crossSeriesReducer": "REDUCE_MAX",
"groupByFields": [
"resource.label.\"node_name\""
]
}
}
}
}
]
}
},
{
"title": "GKE Container Uptime",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"kubernetes.io/container/uptime\" resource.type=\"k8s_container\"",
"aggregation": {
"alignmentPeriod":"60s",
"perSeriesAligner":"ALIGN_SUM",
"crossSeriesReducer": "REDUCE_SUM",
"groupByFields": [
"resource.label.\"container_name\""
]
}
}
}
}
]
}
},
{
"title": "Total number of requests per GKE service",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"prometheus.googleapis.com/nginx_ingress_controller_requests/counter\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_DELTA",
"crossSeriesReducer": "REDUCE_PERCENTILE_95",
"groupByFields": [
"metric.label.\"service\"",
"metric.label.\"status\"",
"metric.label.\"method\""
]
}
}
}
}
]
}
},
{
"title": "Request latency per GKE service",
"xyChart": {
"dataSets": [
{
"minAlignmentPeriod": "60s",
"timeSeriesQuery": {
"timeSeriesFilter": {
"filter": "metric.type=\"prometheus.googleapis.com/nginx_ingress_controller_request_duration_seconds/histogram\"",
"aggregation": {
"alignmentPeriod": "60s",
"perSeriesAligner": "ALIGN_DELTA",
"crossSeriesReducer": "REDUCE_PERCENTILE_95",
"groupByFields": [
"metric.label.\"service\"",
"metric.label.\"status\"",
"metric.label.\"method\""
]
}
}
}
}
]
}
}
]
}
}
EOF
}