in server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java [234:318]
private static void setUpMetrics() {
counterTotalReceivedDataSize = metricsManager.addLabeledCounter(TOTAL_RECEIVED_DATA);
counterTotalWriteDataSize = metricsManager.addLabeledCounter(TOTAL_WRITE_DATA);
counterTotalWriteBlockSize = metricsManager.addLabeledCounter(TOTAL_WRITE_BLOCK);
counterTotalWriteTime = metricsManager.addLabeledCounter(TOTAL_WRITE_TIME);
counterWriteException = metricsManager.addLabeledCounter(TOTAL_WRITE_EXCEPTION);
counterWriteSlow = metricsManager.addLabeledCounter(TOTAL_WRITE_SLOW);
counterWriteTotal = metricsManager.addLabeledCounter(TOTAL_WRITE_NUM);
counterEventSizeThresholdLevel1 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL1);
counterEventSizeThresholdLevel2 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL2);
counterEventSizeThresholdLevel3 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL3);
counterEventSizeThresholdLevel4 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL4);
counterTotalReadDataSize = metricsManager.addLabeledCounter(TOTAL_READ_DATA);
counterTotalReadLocalDataFileSize =
metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_DATA_FILE);
counterTotalReadLocalIndexFileSize =
metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_INDEX_FILE);
counterTotalReadMemoryDataSize = metricsManager.addLabeledCounter(TOTAL_READ_MEMORY_DATA);
counterTotalReadTime = metricsManager.addLabeledCounter(TOTAL_READ_TIME);
counterTotalDroppedEventNum = metricsManager.addLabeledCounter(TOTAL_DROPPED_EVENT_NUM);
counterTotalFailedWrittenEventNum =
metricsManager.addLabeledCounter(TOTAL_FAILED_WRITTEN_EVENT_NUM);
counterTotalHadoopWriteDataSize = metricsManager.addLabeledCounter(TOTAL_HADOOP_WRITE_DATA);
counterTotalLocalFileWriteDataSize =
metricsManager.addLabeledCounter(TOTAL_LOCALFILE_WRITE_DATA);
counterTotalRequireBufferFailed = metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED);
counterTotalRequireBufferFailedForRegularPartition =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_REGULAR_PARTITION);
counterTotalRequireBufferFailedForHugePartition =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_HUGE_PARTITION);
counterLocalStorageTotalWrite = metricsManager.addLabeledCounter(STORAGE_TOTAL_WRITE_LOCAL);
counterLocalStorageRetryWrite = metricsManager.addLabeledCounter(STORAGE_RETRY_WRITE_LOCAL);
counterLocalStorageFailedWrite = metricsManager.addLabeledCounter(STORAGE_FAILED_WRITE_LOCAL);
counterLocalStorageSuccessWrite = metricsManager.addLabeledCounter(STORAGE_SUCCESS_WRITE_LOCAL);
counterRemoteStorageTotalWrite =
metricsManager.addCounter(
STORAGE_TOTAL_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageRetryWrite =
metricsManager.addCounter(
STORAGE_RETRY_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageFailedWrite =
metricsManager.addCounter(
STORAGE_FAILED_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageSuccessWrite =
metricsManager.addCounter(
STORAGE_SUCCESS_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterTotalRequireReadMemoryNum = metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY);
counterTotalRequireReadMemoryRetryNum =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_RETRY);
counterTotalRequireReadMemoryFailedNum =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_FAILED);
counterTotalAppNum = metricsManager.addLabeledCounter(TOTAL_APP_NUM);
counterTotalAppWithHugePartitionNum =
metricsManager.addLabeledCounter(TOTAL_APP_WITH_HUGE_PARTITION_NUM);
counterTotalPartitionNum = metricsManager.addLabeledCounter(TOTAL_PARTITION_NUM);
counterTotalHugePartitionNum = metricsManager.addLabeledCounter(TOTAL_HUGE_PARTITION_NUM);
gaugeLocalStorageTotalDirsNum = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_DIRS_NUM);
gaugeLocalStorageCorruptedDirsNum =
metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM);
gaugeLocalStorageTotalSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_SPACE);
gaugeLocalStorageWholeDiskUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_WHOLE_DISK_USED_SPACE);
gaugeLocalStorageServiceUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_SERVICE_USED_SPACE);
gaugeLocalStorageUsedSpaceRatio =
metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE_RATIO);
gaugeIsHealthy = metricsManager.addLabeledGauge(IS_HEALTHY);
gaugeAllocatedBufferSize = metricsManager.addLabeledGauge(ALLOCATED_BUFFER_SIZE);
gaugeInFlushBufferSize = metricsManager.addLabeledGauge(IN_FLUSH_BUFFER_SIZE);
gaugeUsedBufferSize = metricsManager.addLabeledGauge(USED_BUFFER_SIZE);
gaugeReadBufferUsedSize = metricsManager.addLabeledGauge(READ_USED_BUFFER_SIZE);
gaugeWriteHandler = metricsManager.addLabeledGauge(TOTAL_WRITE_HANDLER);
gaugeEventQueueSize = metricsManager.addLabeledGauge(EVENT_QUEUE_SIZE);
gaugeAppNum = metricsManager.addLabeledGauge(APP_NUM_WITH_NODE);
gaugeTotalPartitionNum = metricsManager.addLabeledGauge(PARTITION_NUM_WITH_NODE);
gaugeHugePartitionNum = metricsManager.addLabeledGauge(HUGE_PARTITION_NUM);
gaugeAppWithHugePartitionNum = metricsManager.addLabeledGauge(APP_WITH_HUGE_PARTITION_NUM);
counterLocalFileEventFlush = metricsManager.addCounter(LOCAL_FILE_EVENT_FLUSH_NUM);
counterHadoopEventFlush = metricsManager.addCounter(HADOOP_EVENT_FLUSH_NUM);
}