specification/inference/put_elser/PutElserRequest.ts (25 lines of code) (raw):

/* * Licensed to Elasticsearch B.V. under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch B.V. licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import { ElserServiceSettings, ElserServiceType, ElserTaskType } from '@inference/_types/CommonTypes' import { InferenceChunkingSettings } from '@inference/_types/Services' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' /** * Create an ELSER inference endpoint. * * Create an inference endpoint to perform an inference task with the `elser` service. * You can also deploy ELSER by using the Elasticsearch inference integration. * * > info * > Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings. * * The API request will automatically download and deploy the ELSER model if it isn't already downloaded. * * > info * > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value. * * After creating the endpoint, wait for the model deployment to complete before using it. * To verify the deployment status, use the get trained model statistics API. * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. * @rest_spec_name inference.put_elser * @availability stack since=8.11.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public * @deprecated 8.16.0 The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings. * @cluster_privileges manage_inference * @doc_id inference-api-put-elser */ export interface Request extends RequestBase { urls: [ { path: '/_inference/{task_type}/{elser_inference_id}' methods: ['PUT'] } ] path_parts: { /** * The type of the inference task that the model will perform. */ task_type: ElserTaskType /** * The unique identifier of the inference endpoint. */ elser_inference_id: Id } body: { /** * The chunking configuration object. * @ext_doc_id inference-chunking */ chunking_settings?: InferenceChunkingSettings /** * The type of service supported for the specified task type. In this case, `elser`. */ service: ElserServiceType /** * Settings used to install the inference model. These settings are specific to the `elser` service. */ service_settings: ElserServiceSettings } }