export function basicJobValidation()

in x-pack/platform/plugins/shared/ml/common/util/job_utils.ts [445:658]


export function basicJobValidation(
  job: Job,
  fields: object | undefined,
  limits: MlServerLimits,
  skipMmlChecks = false
): ValidationResults {
  const messages: ValidationResults['messages'] = [];
  let valid = true;

  if (job) {
    // Job details
    if (isEmpty(job.job_id)) {
      messages.push({ id: 'job_id_empty' });
      valid = false;
    } else if (isJobIdValid(job.job_id) === false) {
      messages.push({ id: 'job_id_invalid' });
      valid = false;
    } else if (maxLengthValidator(JOB_ID_MAX_LENGTH)(job.job_id)) {
      messages.push({ id: 'job_id_invalid_max_length', maxLength: JOB_ID_MAX_LENGTH });
      valid = false;
    } else {
      messages.push({ id: 'job_id_valid' });
    }

    // group names
    const { messages: groupsMessages, valid: groupsValid } = validateGroupNames(job);

    messages.push(...groupsMessages);
    valid = valid && groupsValid;

    // Analysis Configuration
    if (job.analysis_config.categorization_filters) {
      let v = true;
      each(job.analysis_config.categorization_filters, (d) => {
        try {
          new RegExp(d);
        } catch (e) {
          v = false;
        }

        if (
          job.analysis_config.categorization_field_name === undefined ||
          job.analysis_config.categorization_field_name === ''
        ) {
          v = false;
        }

        if (d === '') {
          v = false;
        }
      });

      if (v) {
        messages.push({ id: 'categorization_filters_valid' });
      } else {
        messages.push({ id: 'categorization_filters_invalid' });
        valid = false;
      }
    }
    let categorizerDetectorMissingPartitionField = false;
    if (job.analysis_config.detectors.length === 0) {
      messages.push({ id: 'detectors_empty' });
      valid = false;
    } else {
      let v = true;

      each(job.analysis_config.detectors, (d) => {
        if (isEmpty(d.function)) {
          v = false;
        }
        // if detector has an ml category, check if the partition_field is missing
        const needToHavePartitionFieldName =
          job.analysis_config.per_partition_categorization?.enabled === true &&
          (d.by_field_name === MLCATEGORY || d.over_field_name === MLCATEGORY);

        if (needToHavePartitionFieldName && d.partition_field_name === undefined) {
          categorizerDetectorMissingPartitionField = true;
        }
      });
      if (v) {
        messages.push({ id: 'detectors_function_not_empty' });
      } else {
        messages.push({ id: 'detectors_function_empty' });
        valid = false;
      }
      if (categorizerDetectorMissingPartitionField) {
        messages.push({ id: 'categorizer_detector_missing_per_partition_field' });
        valid = false;
      }
    }

    if (job.analysis_config.detectors.length >= 2) {
      // check if the detectors with mlcategory might have different per_partition_field values
      // if per_partition_categorization is enabled
      if (job.analysis_config.per_partition_categorization !== undefined) {
        if (
          job.analysis_config.per_partition_categorization.enabled ||
          (job.analysis_config.per_partition_categorization.stop_on_warn &&
            Array.isArray(job.analysis_config.detectors) &&
            job.analysis_config.detectors.length >= 2)
        ) {
          const categorizationDetectors = job.analysis_config.detectors.filter(
            (d) =>
              d.by_field_name === MLCATEGORY ||
              d.over_field_name === MLCATEGORY ||
              d.partition_field_name === MLCATEGORY
          );
          const uniqPartitions = [
            ...new Set(
              categorizationDetectors
                .map((d) => d.partition_field_name)
                .filter((name) => name !== undefined)
            ),
          ];
          if (uniqPartitions.length > 1) {
            valid = false;
            messages.push({
              id: 'categorizer_varying_per_partition_fields',
              fields: uniqPartitions.join(', '),
            });
          }
        }
      }

      // check for duplicate detectors
      // create an array of objects with a subset of the attributes
      // where we want to make sure they are not be the same across detectors
      const compareSubSet = job.analysis_config.detectors.map((d) =>
        pick(d, [
          'function',
          'field_name',
          'by_field_name',
          'over_field_name',
          'partition_field_name',
        ])
      );

      const dedupedSubSet = uniqWithIsEqual(compareSubSet);

      if (compareSubSet.length !== dedupedSubSet.length) {
        messages.push({ id: 'detectors_duplicates' });
        valid = false;
      }
    }

    // we skip this influencer test because the client side form check is ignoring it
    // and the server side tests have their own influencer test
    // TODO: clarify if this is still needed or can be deleted
    /*
    if (job.analysis_config.influencers &&
      job.analysis_config.influencers.length === 0) {
      messages.push({ id: 'influencers_low' });
      valid = false;
    } else {
      messages.push({ id: 'success_influencers' });
    }
    */

    if (job.analysis_config.bucket_span === '' || job.analysis_config.bucket_span === undefined) {
      messages.push({ id: 'bucket_span_empty' });
      valid = false;
    } else {
      if (isValidTimeInterval(job.analysis_config.bucket_span)) {
        messages.push({
          id: 'bucket_span_valid',
          bucketSpan: job.analysis_config.bucket_span,
        });
      } else {
        messages.push({ id: 'bucket_span_invalid' });
        valid = false;
      }
    }

    // Datafeed
    if (typeof fields !== 'undefined') {
      const loadedFields = Object.keys(fields);
      if (loadedFields.length === 0) {
        messages.push({ id: 'index_fields_invalid' });
        valid = false;
      } else {
        messages.push({ id: 'index_fields_valid' });
      }
    }

    if (skipMmlChecks === false) {
      // model memory limit
      const mml = job.analysis_limits && job.analysis_limits.model_memory_limit;
      const { messages: mmlUnitMessages, valid: mmlUnitValid } = validateModelMemoryLimitUnits(
        mml as string | undefined
      );

      messages.push(...mmlUnitMessages);
      valid = valid && mmlUnitValid;

      if (mmlUnitValid) {
        // if mml is a valid format,
        // run the validation against max mml
        const { messages: mmlMessages, valid: mmlValid } = validateModelMemoryLimit(job, limits);

        messages.push(...mmlMessages);
        valid = valid && mmlValid;
      }
    }
  } else {
    valid = false;
  }

  return {
    messages,
    valid,
    contains: (id) => messages.some((m) => id === m.id),
    find: (id) => messages.find((m) => id === m.id),
  };
}