modules/monitoring/dashboard/dashboards/HPC.json.tpl (595 lines of code) (raw):
{
"displayName": "${title}: ${deployment_name}",
"labels": ${jsonencode(labels)},
"gridLayout": {
"columns": 2,
"widgets": [
{
"text": {
"content": "HPC metrics from the ${deployment_name} deployment of the Cluster Toolkit.",
"format": "MARKDOWN"
},
"title": "${title}"
},
{
"title": "VM Instance - Memory utilization",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"agent.googleapis.com/memory/percent_used\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - CPU Utilization",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"compute.googleapis.com/instance/cpu/utilization\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\"",
"pickTimeSeriesFilter": {
"direction": "TOP",
"numTimeSeries": 20,
"rankingMethod": "METHOD_MEAN"
}
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - CPU utilization (agent)",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"agent.googleapis.com/cpu/utilization\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
},
"unitOverride": "%"
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Disk read operations",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/disk/read_ops_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Disk write operations",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/disk/write_ops_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Disk Read Bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"agent.googleapis.com/disk/read_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Disk Write Bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"agent.googleapis.com/disk/write_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "Throttled read bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/disk/throttled_read_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "Throttled write bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/disk/throttled_write_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Received packets",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/network/received_packets_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "VM Instance - Sent packets",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/network/sent_packets_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "VM Instance - Received bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/network/received_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Sent bytes",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_MEAN",
"groupByFields": [
"metric.label.\"instance_name\"",
"metric.label.\"loadbalanced\"",
"resource.label.\"project_id\"",
"resource.label.\"instance_id\"",
"resource.label.\"zone\""
],
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"compute.googleapis.com/instance/network/sent_bytes_count\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\"",
"secondaryAggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
}
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "GCE VM Instance - Network Traffic Bytes (agent)",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"agent.googleapis.com/interface/traffic\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "Network Packets (agent)",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_RATE"
},
"filter": "metric.type=\"agent.googleapis.com/interface/packets\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "TCP connections",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"agent.googleapis.com/network/tcp_connections\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
},
"unitOverride": "1"
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "VM Instance - CPU utilization for steal",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "STACKED_BAR",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MAX"
},
"filter": "metric.type=\"agent.googleapis.com/cpu/utilization\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\" metric.label.\"cpu_state\"=\"steal\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
},
{
"title": "VM Instance - CPU utilization [MEAN]",
"xyChart": {
"chartOptions": {
"mode": "COLOR"
},
"dataSets": [
{
"minAlignmentPeriod": "60s",
"plotType": "LINE",
"targetAxis": "Y1",
"timeSeriesQuery": {
"apiSource": "DEFAULT_CLOUD",
"timeSeriesFilter": {
"aggregation": {
"alignmentPeriod": "60s",
"crossSeriesReducer": "REDUCE_NONE",
"perSeriesAligner": "ALIGN_MEAN"
},
"filter": "metric.type=\"compute.googleapis.com/instance/cpu/utilization\" resource.type=\"gce_instance\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\""
}
}
}
],
"timeshiftDuration": "0s",
"yAxis": {
"label": "y1Axis",
"scale": "LINEAR"
}
}
}%{ for widget in widgets ~},
${widget}
%{endfor ~}
]
}
}