private static void setUpMetrics()

in server/src/main/java/org/apache/uniffle/server/ShuffleServerMetrics.java [368:535]


  private static void setUpMetrics(ShuffleServerConf serverConf) {
    counterTotalReceivedDataSize = metricsManager.addLabeledCounter(TOTAL_RECEIVED_DATA);
    counterTotalWriteDataSize = metricsManager.addLabeledCounter(TOTAL_WRITE_DATA);
    counterTotalDeleteDataSize = metricsManager.addLabeledCounter(TOTAL_DELETE_DATA);
    counterTotalFlushFileNum = metricsManager.addLabeledCounter(TOTAL_FLUSH_FILE_NUM);
    counterTotalDeleteFileNum = metricsManager.addLabeledCounter(TOTAL_DELETE_FILE_NUM);
    gaugeStorageUsedBytes = metricsManager.addLabeledGauge(STORAGE_USED_BYTES);
    gaugeFlushFileNum = metricsManager.addLabeledGauge(FLUSH_FILE_NUM);
    counterTotalWriteBlockSize = metricsManager.addLabeledCounter(TOTAL_WRITE_BLOCK);
    appHistogramWriteBlockSize =
        metricsManager.addHistogram(
            WRITE_BLOCK_SIZE,
            ConfigUtils.convertBytesStringToDoubleArray(
                serverConf.get(ShuffleServerConf.APP_LEVEL_SHUFFLE_BLOCK_SIZE_METRIC_BUCKETS)),
            METRICS_APP_LABEL_NAME);
    counterTotalWriteTime = metricsManager.addLabeledCounter(TOTAL_WRITE_TIME);
    counterWriteException = metricsManager.addLabeledCounter(TOTAL_WRITE_EXCEPTION);
    counterWriteSlow = metricsManager.addLabeledCounter(TOTAL_WRITE_SLOW);
    counterWriteTotal = metricsManager.addLabeledCounter(TOTAL_WRITE_NUM);
    counterEventSizeThresholdLevel1 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL1);
    counterEventSizeThresholdLevel2 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL2);
    counterEventSizeThresholdLevel3 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL3);
    counterEventSizeThresholdLevel4 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL4);
    counterTotalReadDataSize = metricsManager.addLabeledCounter(TOTAL_READ_DATA);
    counterTotalReadLocalDataFileSize =
        metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_DATA_FILE);
    counterTotalReadLocalIndexFileSize =
        metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_INDEX_FILE);
    counterTotalReadMemoryDataSize = metricsManager.addLabeledCounter(TOTAL_READ_MEMORY_DATA);
    counterTotalReadTime = metricsManager.addLabeledCounter(TOTAL_READ_TIME);
    counterTotalDroppedEventNum = metricsManager.addLabeledCounter(TOTAL_DROPPED_EVENT_NUM);
    counterTotalFailedWrittenEventNum =
        metricsManager.addLabeledCounter(TOTAL_FAILED_WRITTEN_EVENT_NUM);
    counterTotalHadoopWriteDataSize =
        metricsManager.addCounter(
            TOTAL_HADOOP_WRITE_DATA, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
    counterTotalHadoopWriteDataSizeForHugePartition =
        metricsManager.addCounter(
            TOTAL_HADOOP_WRITE_DATA_FOR_HUGE_PARTITION,
            Constants.METRICS_TAG_LABEL_NAME,
            STORAGE_HOST_LABEL);
    counterTotalLocalFileWriteDataSize =
        metricsManager.addCounter(TOTAL_LOCALFILE_WRITE_DATA, LOCAL_DISK_PATH_LABEL);

    counterTotalRequireBufferFailed = metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED);
    counterTotalRequireBufferFailedForRegularPartition =
        metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_REGULAR_PARTITION);
    counterTotalRequireBufferFailedForHugePartition =
        metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_HUGE_PARTITION);

    counterLocalStorageTotalWrite = metricsManager.addLabeledCounter(STORAGE_TOTAL_WRITE_LOCAL);
    counterLocalStorageRetryWrite = metricsManager.addLabeledCounter(STORAGE_RETRY_WRITE_LOCAL);
    counterLocalStorageFailedWrite = metricsManager.addLabeledCounter(STORAGE_FAILED_WRITE_LOCAL);
    counterLocalStorageSuccessWrite = metricsManager.addLabeledCounter(STORAGE_SUCCESS_WRITE_LOCAL);
    counterRemoteStorageTotalWrite =
        metricsManager.addCounter(
            STORAGE_TOTAL_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
    counterRemoteStorageRetryWrite =
        metricsManager.addCounter(
            STORAGE_RETRY_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
    counterRemoteStorageFailedWrite =
        metricsManager.addCounter(
            STORAGE_FAILED_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
    counterRemoteStorageSuccessWrite =
        metricsManager.addCounter(
            STORAGE_SUCCESS_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
    counterTotalRequireReadMemoryNum = metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY);
    counterTotalRequireReadMemoryRetryNum =
        metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_RETRY);
    counterTotalRequireReadMemoryFailedNum =
        metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_FAILED);

    counterTotalAppNum = metricsManager.addLabeledCounter(TOTAL_APP_NUM);
    counterTotalAppWithHugePartitionNum =
        metricsManager.addLabeledCounter(TOTAL_APP_WITH_HUGE_PARTITION_NUM);
    counterTotalPartitionNum = metricsManager.addLabeledCounter(TOTAL_PARTITION_NUM);
    counterTotalHugePartitionNum = metricsManager.addLabeledCounter(TOTAL_HUGE_PARTITION_NUM);
    counterTotalHugePartitionExceedHardLimitNum =
        metricsManager.addLabeledCounter(TOTAL_HUGE_PARTITION_EXCEED_HARD_LIMIT_NUM);
    counterLocalRenameAndDeletionFaileTd =
        metricsManager.addLabeledCounter(TOTAL_LOCAL_RENAME_AND_DELETION_FAILED);

    gaugeLocalStorageIsWritable =
        metricsManager.addGauge(LOCAL_STORAGE_IS_WRITABLE, LOCAL_DISK_PATH_LABEL);
    gaugeLocalStorageIsTimeout =
        metricsManager.addGauge(LOCAL_STORAGE_IS_TIMEOUT, LOCAL_DISK_PATH_LABEL);
    gaugeLocalStorageTotalDirsNum = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_DIRS_NUM);
    gaugeLocalStorageCorruptedDirsNum =
        metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM);
    gaugeLocalStorageTotalSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_SPACE);
    gaugeLocalStorageWholeDiskUsedSpace =
        metricsManager.addLabeledGauge(LOCAL_STORAGE_WHOLE_DISK_USED_SPACE);
    gaugeLocalStorageServiceUsedSpace =
        metricsManager.addLabeledGauge(LOCAL_STORAGE_SERVICE_USED_SPACE);
    gaugeLocalStorageUsedSpaceRatio =
        metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE_RATIO);

    gaugeIsHealthy = metricsManager.addLabeledGauge(IS_HEALTHY);
    gaugeAllocatedBufferSize = metricsManager.addLabeledGauge(ALLOCATED_BUFFER_SIZE);
    gaugeInFlushBufferSize = metricsManager.addLabeledGauge(IN_FLUSH_BUFFER_SIZE);
    gaugeUsedBufferSize = metricsManager.addLabeledGauge(USED_BUFFER_SIZE);
    gaugeReadBufferUsedSize = metricsManager.addLabeledGauge(READ_USED_BUFFER_SIZE);
    gaugeWriteHandler = metricsManager.addLabeledGauge(TOTAL_WRITE_HANDLER);
    gaugeMergeEventQueueSize = metricsManager.addLabeledGauge(MERGE_EVENT_QUEUE_SIZE);
    gaugeHadoopFlushThreadPoolQueueSize =
        metricsManager.addLabeledGauge(HADOOP_FLUSH_THREAD_POOL_QUEUE_SIZE);
    gaugeLocalfileFlushThreadPoolQueueSize =
        metricsManager.addLabeledGauge(LOCALFILE_FLUSH_THREAD_POOL_QUEUE_SIZE);
    gaugeFallbackFlushThreadPoolQueueSize =
        metricsManager.addLabeledGauge(FALLBACK_FLUSH_THREAD_POOL_QUEUE_SIZE);

    gaugeAppNum = metricsManager.addLabeledGauge(APP_NUM_WITH_NODE);
    gaugeTotalPartitionNum = metricsManager.addLabeledGauge(PARTITION_NUM_WITH_NODE);

    gaugeReadLocalDataFileThreadNum =
        metricsManager.addLabeledGauge(READ_LOCAL_DATA_FILE_THREAD_NUM);
    gaugeReadLocalIndexFileThreadNum =
        metricsManager.addLabeledGauge(READ_LOCAL_INDEX_FILE_THREAD_NUM);
    gaugeReadMemoryDataThreadNum = metricsManager.addLabeledGauge(READ_MEMORY_DATA_THREAD_NUM);
    gaugeReadLocalDataFileBufferSize =
        metricsManager.addLabeledGauge(READ_LOCAL_DATA_FILE_BUFFER_SIZE);
    gaugeReadLocalIndexFileBufferSize =
        metricsManager.addLabeledGauge(READ_LOCAL_INDEX_FILE_BUFFER_SIZE);
    gaugeReadMemoryDataBufferSize = metricsManager.addLabeledGauge(READ_MEMORY_DATA_BUFFER_SIZE);

    gaugeHugePartitionNum = metricsManager.addLabeledGauge(HUGE_PARTITION_NUM);
    gaugeAppWithHugePartitionNum = metricsManager.addLabeledGauge(APP_WITH_HUGE_PARTITION_NUM);

    counterLocalFileEventFlush = metricsManager.addCounter(LOCAL_FILE_EVENT_FLUSH_NUM);
    counterHadoopEventFlush = metricsManager.addCounter(HADOOP_EVENT_FLUSH_NUM);

    counterPreAllocatedBufferExpired =
        metricsManager.addCounter(TOTAL_EXPIRED_PRE_ALLOCATED_BUFFER_NUM);

    counterAppNotFound = metricsManager.addCounter(TOTAL_APP_NOT_FOUND_NUM);

    summaryTotalRemoveResourceTime = metricsManager.addSummary(TOTAL_REMOVE_RESOURCE_TIME);
    summaryTotalRemoveResourceByShuffleIdsTime =
        metricsManager.addSummary(TOTAL_REMOVE_RESOURCE_BY_SHUFFLE_IDS_TIME);

    gaugeTotalDataSizeUsage =
        Gauge.build()
            .name(TOPN_OF_TOTAL_DATA_SIZE_FOR_APP)
            .help("top N of total shuffle data size for app level")
            .labelNames("app_id")
            .register(metricsManager.getCollectorRegistry());

    gaugeInMemoryDataSizeUsage =
        Gauge.build()
            .name(TOPN_OF_IN_MEMORY_DATA_SIZE_FOR_APP)
            .help("top N of in memory shuffle data size for app level")
            .labelNames("app_id")
            .register(metricsManager.getCollectorRegistry());

    gaugeOnDiskDataSizeUsage =
        Gauge.build()
            .name(TOPN_OF_ON_LOCALFILE_DATA_SIZE_FOR_APP)
            .help("top N of on disk shuffle data size for app level")
            .labelNames("app_id")
            .register(metricsManager.getCollectorRegistry());

    gaugeOnHadoopDataSizeUsage =
        Gauge.build()
            .name(TOPN_OF_ON_HADOOP_DATA_SIZE_FOR_APP)
            .help("top N of on hadoop shuffle data size for app level")
            .labelNames("app_id")
            .register(metricsManager.getCollectorRegistry());
  }