static long calculateDetectorRequirementBytes()

in x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java [83:203]


    static long calculateDetectorRequirementBytes(Detector detector, long bucketSpanSeconds, Map<String, Long> overallCardinality) {

        long answer = 0;
        boolean addFieldValueWorkspace = false;

        // These values for detectors assume splitting is via a partition field
        switch (detector.getFunction()) {
            case DISTINCT_COUNT:
            case LOW_DISTINCT_COUNT:
            case HIGH_DISTINCT_COUNT:
                addFieldValueWorkspace = true;
            case COUNT:
            case LOW_COUNT:
            case HIGH_COUNT:
            case NON_ZERO_COUNT:
            case LOW_NON_ZERO_COUNT:
            case HIGH_NON_ZERO_COUNT:
                answer = ByteSizeValue.ofKb(32).getBytes();
                break;
            case RARE:
            case FREQ_RARE:
                answer = ByteSizeValue.ofKb(2).getBytes();
                break;
            case INFO_CONTENT:
            case LOW_INFO_CONTENT:
            case HIGH_INFO_CONTENT:
                addFieldValueWorkspace = true;
            case MEAN:
            case LOW_MEAN:
            case HIGH_MEAN:
            case AVG:
            case LOW_AVG:
            case HIGH_AVG:
            case MIN:
            case MAX:
            case SUM:
            case LOW_SUM:
            case HIGH_SUM:
            case NON_NULL_SUM:
            case LOW_NON_NULL_SUM:
            case HIGH_NON_NULL_SUM:
            case VARP:
            case LOW_VARP:
            case HIGH_VARP:
                answer = ByteSizeValue.ofKb(48).getBytes();
                break;
            case METRIC:
                // metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
                answer = ByteSizeValue.ofKb(120).getBytes();
                break;
            case MEDIAN:
            case LOW_MEDIAN:
            case HIGH_MEDIAN:
                answer = ByteSizeValue.ofKb(64).getBytes();
                break;
            case TIME_OF_DAY:
            case TIME_OF_WEEK:
                answer = ByteSizeValue.ofKb(10).getBytes();
                break;
            case LAT_LONG:
                answer = ByteSizeValue.ofKb(64).getBytes();
                break;
            default:
                assert false : "unhandled detector function: " + detector.getFunction().getFullName();
        }

        long partitionFieldCardinalityEstimate = 1;
        String partitionFieldName = detector.getPartitionFieldName();
        if (partitionFieldName != null) {
            partitionFieldCardinalityEstimate = Math.max(
                1,
                cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true)
            );
        }

        String byFieldName = detector.getByFieldName();
        if (byFieldName != null) {
            long byFieldCardinalityEstimate = cardinalityEstimate(
                Detector.BY_FIELD_NAME_FIELD.getPreferredName(),
                byFieldName,
                overallCardinality,
                true
            );
            // Assume the number of by field values in each partition reduces if the cardinality of both by and partition fields is high
            // The memory cost of a by field is about 2/3rds that of a partition field
            double multiplier = Math.ceil(
                reducedCardinality(byFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds) * 2.0 / 3.0
            );
            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, (long) multiplier);
        }

        String overFieldName = detector.getOverFieldName();
        if (overFieldName != null) {
            long overFieldCardinalityEstimate = cardinalityEstimate(
                Detector.OVER_FIELD_NAME_FIELD.getPreferredName(),
                overFieldName,
                overallCardinality,
                true
            );
            // Assume the number of over field values in each partition reduces if the cardinality of both over and partition fields is high
            double multiplier = Math.ceil(
                reducedCardinality(overFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds)
            );
            // Over fields don't multiply the whole estimate, just add a small amount (estimate 768 bytes) per value
            answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(768, (long) multiplier));
        }

        if (partitionFieldName != null) {
            answer = multiplyNonNegativeLongsWithMaxValueCap(answer, partitionFieldCardinalityEstimate);
        }

        if (addFieldValueWorkspace) {
            // The field value workspace should really be the maximum over all buckets of the
            // length of all the distinct values of the function field concatenated in the bucket.
            // However, that would be very expensive and complex for the caller to calculate so
            // we just allow a fixed amount.
            answer = addNonNegativeLongsWithMaxValueCap(answer, ByteSizeValue.ofMb(5).getBytes());
        }

        return answer;
    }