specification/ml/put_datafeed/MlPutDatafeedRequest.ts

/* * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import { ChunkingConfig, DelayedDataCheckConfig } from '@ml/_types/Datafeed' import { Dictionary } from '@spec_utils/Dictionary' import { AggregationContainer } from '@_types/aggregations/AggregationContainer' import { RequestBase } from '@_types/Base' import { ExpandWildcards, HttpHeaders, Id, Indices, IndicesOptions } from '@_types/common' import { RuntimeFields } from '@_types/mapping/RuntimeFields' import { integer } from '@_types/Numeric' import { QueryContainer } from '@_types/query_dsl/abstractions' import { ScriptField } from '@_types/Scripting' import { Duration } from '@_types/Time' /** * Create a datafeed. * Datafeeds retrieve data from Elasticsearch for analysis by an anomaly detection job. * You can associate only one datafeed with each anomaly detection job. * The datafeed contains a query that runs at a defined interval (`frequency`). * If you are concerned about delayed data, you can add a delay (`query_delay') at each interval. * By default, the datafeed uses the following query: `{"match_all": {"boost": 1}}`. * * When Elasticsearch security features are enabled, your datafeed remembers which roles the user who created it had * at the time of creation and runs the query using those same roles. If you provide secondary authorization headers, * those credentials are used instead. * You must use Kibana, this API, or the create anomaly detection jobs API to create a datafeed. Do not add a datafeed * directly to the `.ml-config` index. Do not give users `write` privileges on the `.ml-config` index. * @rest_spec_name ml.put_datafeed * @availability stack since=5.4.0 stability=stable * @availability serverless stability=stable visibility=public * @index_privileges read * @cluster_privileges manage_ml * @doc_tag ml anomaly * @doc_id ml-put-datafeed */ export interface Request extends RequestBase { urls: [ { path: '/_ml/datafeeds/{datafeed_id}' methods: ['PUT'] } ] path_parts: { /** * A numerical character string that uniquely identifies the datafeed. * This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. * It must start and end with alphanumeric characters. */ datafeed_id: Id } query_parameters: { /** * If true, wildcard indices expressions that resolve into no concrete indices are ignored. This includes the `_all` * string or when no indices are specified. * @server_default true */ allow_no_indices?: boolean /** * Type of index that wildcard patterns can match. If the request can target data streams, this argument determines * whether wildcard expressions match hidden data streams. Supports comma-separated values. * @server_default open */ expand_wildcards?: ExpandWildcards /** * If true, concrete, expanded, or aliased indices are ignored when frozen. * @server_default true * @deprecated 7.16.0 */ ignore_throttled?: boolean /** * If true, unavailable indices (missing or closed) are ignored. * @server_default false */ ignore_unavailable?: boolean } body: { /** * If set, the datafeed performs aggregation searches. * Support for aggregations is limited and should be used only with low cardinality data. * @aliases aggs */ aggregations?: Dictionary<string, AggregationContainer> /** * Datafeeds might be required to search over long time periods, for several months or years. * This search is split into time chunks in order to ensure the load on Elasticsearch is managed. * Chunking configuration controls how the size of these time chunks are calculated; * it is an advanced configuration option. */ chunking_config?: ChunkingConfig /** * Specifies whether the datafeed checks for missing data and the size of the window. * The datafeed can optionally search over indices that have already been read in an effort to determine whether * any data has subsequently been added to the index. If missing data is found, it is a good indication that the * `query_delay` is set too low and the data is being indexed after the datafeed has passed that moment in time. * This check runs only on real-time datafeeds. */ delayed_data_check_config?: DelayedDataCheckConfig /** * The interval at which scheduled queries are made while the datafeed runs in real time. * The default value is either the bucket span for short bucket spans, or, for longer bucket spans, a sensible * fraction of the bucket span. When `frequency` is shorter than the bucket span, interim results for the last * (partial) bucket are written then eventually overwritten by the full bucket results. If the datafeed uses * aggregations, this value must be divisible by the interval of the date histogram aggregation. */ frequency?: Duration /** * An array of index names. Wildcards are supported. If any of the indices are in remote clusters, the master * nodes and the machine learning nodes must have the `remote_cluster_client` role. * @aliases indexes * */ indices?: Indices /** * Specifies index expansion options that are used during search */ indices_options?: IndicesOptions /** * Identifier for the anomaly detection job. */ job_id?: Id /** * If a real-time datafeed has never seen any data (including during any initial training period), it automatically * stops and closes the associated job after this many real-time searches return no documents. In other words, * it stops after `frequency` times `max_empty_searches` of real-time operation. If not set, a datafeed with no * end time that sees no data remains started until it is explicitly stopped. By default, it is not set. */ max_empty_searches?: integer /** * The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an * Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this * object is passed verbatim to Elasticsearch. * @server_default {"match_all": {"boost": 1}} */ query?: QueryContainer /** * The number of seconds behind real time that data is queried. For example, if data from 10:04 a.m. might * not be searchable in Elasticsearch until 10:06 a.m., set this property to 120 seconds. The default * value is randomly selected between `60s` and `120s`. This randomness improves the query performance * when there are multiple jobs running on the same node. */ query_delay?: Duration /** * Specifies runtime fields for the datafeed search. */ runtime_mappings?: RuntimeFields /** * Specifies scripts that evaluate custom expressions and returns script fields to the datafeed. * The detector configuration objects in a job can contain functions that use these script fields. */ script_fields?: Dictionary<string, ScriptField> /** * The size parameter that is used in Elasticsearch searches when the datafeed does not use aggregations. * The maximum value is the value of `index.max_result_window`, which is 10,000 by default. * @server_default 1000 */ scroll_size?: integer /** * @availability stack since=8.0.0 * @availability serverless */ headers?: HttpHeaders } }

specification/ml/put_datafeed/MlPutDatafeedRequest.ts (49 lines of code) (raw):