specification/ml/put_data_frame_analytics/MlPutDataFrameAnalyticsRequest.ts (33 lines of code) (raw):
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import {
DataframeAnalysisAnalyzedFields,
DataframeAnalysisContainer,
DataframeAnalyticsDestination,
DataframeAnalyticsSource
} from '@ml/_types/DataframeAnalytics'
import { RequestBase } from '@_types/Base'
import { HttpHeaders, Id, Metadata, VersionString } from '@_types/common'
import { integer } from '@_types/Numeric'
/**
* Create a data frame analytics job.
* This API creates a data frame analytics job that performs an analysis on the
* source indices and stores the outcome in a destination index.
* By default, the query used in the source configuration is `{"match_all": {}}`.
*
* If the destination index does not exist, it is created automatically when you start the job.
*
* If you supply only a subset of the regression or classification parameters, hyperparameter optimization occurs. It determines a value for each of the undefined parameters.
* @rest_spec_name ml.put_data_frame_analytics
* @availability stack since=7.3.0 stability=stable
* @availability serverless stability=stable visibility=public
* @cluster_privileges manage_ml
* @index_privileges create_index, index, manage, read, view_index_metadata
* @doc_id put-dfanalytics
* @doc_tag ml data frame
*/
export interface Request extends RequestBase {
urls: [
{
path: '/_ml/data_frame/analytics/{id}'
methods: ['PUT']
}
]
path_parts: {
/**
* Identifier for the data frame analytics job. This identifier can contain
* lowercase alphanumeric characters (a-z and 0-9), hyphens, and
* underscores. It must start and end with alphanumeric characters.
*/
id: Id
}
body: {
/**
* Specifies whether this job can start when there is insufficient machine
* learning node capacity for it to be immediately assigned to a node. If
* set to `false` and a machine learning node with capacity to run the job
* cannot be immediately found, the API returns an error. If set to `true`,
* the API does not return an error; the job waits in the `starting` state
* until sufficient machine learning node capacity is available. This
* behavior is also affected by the cluster-wide
* `xpack.ml.max_lazy_ml_nodes` setting.
* @server_default false
* @doc_id ml-settings
*/
allow_lazy_start?: boolean
/**
* The analysis configuration, which contains the information necessary to
* perform one of the following types of analysis: classification, outlier
* detection, or regression.
*/
analysis: DataframeAnalysisContainer
/**
* Specifies `includes` and/or `excludes` patterns to select which fields
* will be included in the analysis. The patterns specified in `excludes`
* are applied last, therefore `excludes` takes precedence. In other words,
* if the same field is specified in both `includes` and `excludes`, then
* the field will not be included in the analysis. If `analyzed_fields` is
* not set, only the relevant fields will be included. For example, all the
* numeric fields for outlier detection.
* The supported fields vary for each type of analysis. Outlier detection
* requires numeric or `boolean` data to analyze. The algorithms don’t
* support missing values therefore fields that have data types other than
* numeric or boolean are ignored. Documents where included fields contain
* missing values, null values, or an array are also ignored. Therefore the
* `dest` index may contain documents that don’t have an outlier score.
* Regression supports fields that are numeric, `boolean`, `text`,
* `keyword`, and `ip` data types. It is also tolerant of missing values.
* Fields that are supported are included in the analysis, other fields are
* ignored. Documents where included fields contain an array with two or
* more values are also ignored. Documents in the `dest` index that don’t
* contain a results field are not included in the regression analysis.
* Classification supports fields that are numeric, `boolean`, `text`,
* `keyword`, and `ip` data types. It is also tolerant of missing values.
* Fields that are supported are included in the analysis, other fields are
* ignored. Documents where included fields contain an array with two or
* more values are also ignored. Documents in the `dest` index that don’t
* contain a results field are not included in the classification analysis.
* Classification analysis can be improved by mapping ordinal variable
* values to a single number. For example, in case of age ranges, you can
* model the values as `0-14 = 0`, `15-24 = 1`, `25-34 = 2`, and so on.
*/
analyzed_fields?: DataframeAnalysisAnalyzedFields
/**
* A description of the job.
*/
description?: string
/**
* The destination configuration.
*/
dest: DataframeAnalyticsDestination
/**
* The maximum number of threads to be used by the analysis. Using more
* threads may decrease the time necessary to complete the analysis at the
* cost of using more CPU. Note that the process may use additional threads
* for operational functionality other than the analysis itself.
* @server_default 1
*/
max_num_threads?: integer
_meta?: Metadata
/**
* The approximate maximum amount of memory resources that are permitted for
* analytical processing. If your `elasticsearch.yml` file contains an
* `xpack.ml.max_model_memory_limit` setting, an error occurs when you try
* to create data frame analytics jobs that have `model_memory_limit` values
* greater than that setting.
* @server_default 1gb
*/
model_memory_limit?: string
/**
* The configuration of how to source the analysis data.
*/
source: DataframeAnalyticsSource
/**
* @availability stack since=8.0.0
* @availability serverless
*/
headers?: HttpHeaders
/**
* @availability stack since=7.16.0
* @availability serverless
*/
version?: VersionString
}
}