in x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateModelMemoryAction.java [83:203]
static long calculateDetectorRequirementBytes(Detector detector, long bucketSpanSeconds, Map<String, Long> overallCardinality) {
long answer = 0;
boolean addFieldValueWorkspace = false;
// These values for detectors assume splitting is via a partition field
switch (detector.getFunction()) {
case DISTINCT_COUNT:
case LOW_DISTINCT_COUNT:
case HIGH_DISTINCT_COUNT:
addFieldValueWorkspace = true;
case COUNT:
case LOW_COUNT:
case HIGH_COUNT:
case NON_ZERO_COUNT:
case LOW_NON_ZERO_COUNT:
case HIGH_NON_ZERO_COUNT:
answer = ByteSizeValue.ofKb(32).getBytes();
break;
case RARE:
case FREQ_RARE:
answer = ByteSizeValue.ofKb(2).getBytes();
break;
case INFO_CONTENT:
case LOW_INFO_CONTENT:
case HIGH_INFO_CONTENT:
addFieldValueWorkspace = true;
case MEAN:
case LOW_MEAN:
case HIGH_MEAN:
case AVG:
case LOW_AVG:
case HIGH_AVG:
case MIN:
case MAX:
case SUM:
case LOW_SUM:
case HIGH_SUM:
case NON_NULL_SUM:
case LOW_NON_NULL_SUM:
case HIGH_NON_NULL_SUM:
case VARP:
case LOW_VARP:
case HIGH_VARP:
answer = ByteSizeValue.ofKb(48).getBytes();
break;
case METRIC:
// metric analyses mean, min and max simultaneously, and uses about 2.5 times the memory of one of these
answer = ByteSizeValue.ofKb(120).getBytes();
break;
case MEDIAN:
case LOW_MEDIAN:
case HIGH_MEDIAN:
answer = ByteSizeValue.ofKb(64).getBytes();
break;
case TIME_OF_DAY:
case TIME_OF_WEEK:
answer = ByteSizeValue.ofKb(10).getBytes();
break;
case LAT_LONG:
answer = ByteSizeValue.ofKb(64).getBytes();
break;
default:
assert false : "unhandled detector function: " + detector.getFunction().getFullName();
}
long partitionFieldCardinalityEstimate = 1;
String partitionFieldName = detector.getPartitionFieldName();
if (partitionFieldName != null) {
partitionFieldCardinalityEstimate = Math.max(
1,
cardinalityEstimate(Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName, overallCardinality, true)
);
}
String byFieldName = detector.getByFieldName();
if (byFieldName != null) {
long byFieldCardinalityEstimate = cardinalityEstimate(
Detector.BY_FIELD_NAME_FIELD.getPreferredName(),
byFieldName,
overallCardinality,
true
);
// Assume the number of by field values in each partition reduces if the cardinality of both by and partition fields is high
// The memory cost of a by field is about 2/3rds that of a partition field
double multiplier = Math.ceil(
reducedCardinality(byFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds) * 2.0 / 3.0
);
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, (long) multiplier);
}
String overFieldName = detector.getOverFieldName();
if (overFieldName != null) {
long overFieldCardinalityEstimate = cardinalityEstimate(
Detector.OVER_FIELD_NAME_FIELD.getPreferredName(),
overFieldName,
overallCardinality,
true
);
// Assume the number of over field values in each partition reduces if the cardinality of both over and partition fields is high
double multiplier = Math.ceil(
reducedCardinality(overFieldCardinalityEstimate, partitionFieldCardinalityEstimate, bucketSpanSeconds)
);
// Over fields don't multiply the whole estimate, just add a small amount (estimate 768 bytes) per value
answer = addNonNegativeLongsWithMaxValueCap(answer, multiplyNonNegativeLongsWithMaxValueCap(768, (long) multiplier));
}
if (partitionFieldName != null) {
answer = multiplyNonNegativeLongsWithMaxValueCap(answer, partitionFieldCardinalityEstimate);
}
if (addFieldValueWorkspace) {
// The field value workspace should really be the maximum over all buckets of the
// length of all the distinct values of the function field concatenated in the bucket.
// However, that would be very expensive and complex for the caller to calculate so
// we just allow a fixed amount.
answer = addNonNegativeLongsWithMaxValueCap(answer, ByteSizeValue.ofMb(5).getBytes());
}
return answer;
}