diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index b5a5d9d71..20fc9cfb7 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -1121,12 +1121,17 @@ async def create( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + if_primary_term: t.Optional[int] = None, + if_seq_no: t.Optional[int] = None, include_source_on_error: t.Optional[bool] = None, + op_type: t.Optional[t.Union[str, t.Literal["create", "index"]]] = None, pipeline: t.Optional[str] = None, pretty: t.Optional[bool] = None, refresh: t.Optional[ t.Union[bool, str, t.Literal["false", "true", "wait_for"]] ] = None, + require_alias: t.Optional[bool] = None, + require_data_stream: t.Optional[bool] = None, routing: t.Optional[str] = None, timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, version: t.Optional[int] = None, @@ -1204,8 +1209,18 @@ async def create( :param id: A unique identifier for the document. To automatically generate a document ID, use the `POST //_doc/` request format. :param document: + :param if_primary_term: Only perform the operation if the document has this primary + term. + :param if_seq_no: Only perform the operation if the document has this sequence + number. :param include_source_on_error: True or false if to include the document source in the error message in case of parsing errors. + :param op_type: Set to `create` to only index the document if it does not already + exist (put if absent). If a document with the specified `_id` already exists, + the indexing operation will fail. The behavior is the same as using the `/_create` + endpoint. If a document ID is specified, this paramater defaults to `index`. + Otherwise, it defaults to `create`. If the request targets a data stream, + an `op_type` of `create` is required. :param pipeline: The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, setting the value to `_none` turns off the default ingest pipeline for this request. If a final @@ -1214,6 +1229,9 @@ async def create( :param refresh: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. If `wait_for`, it waits for a refresh to make this operation visible to search. If `false`, it does nothing with refreshes. + :param require_alias: If `true`, the destination must be an index alias. + :param require_data_stream: If `true`, the request's actions must target a data + stream (existing or to be created). :param routing: A custom value that is used to route operations to a specific shard. :param timeout: The period the request waits for the following operations: automatic @@ -1254,14 +1272,24 @@ async def create( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if if_primary_term is not None: + __query["if_primary_term"] = if_primary_term + if if_seq_no is not None: + __query["if_seq_no"] = if_seq_no if include_source_on_error is not None: __query["include_source_on_error"] = include_source_on_error + if op_type is not None: + __query["op_type"] = op_type if pipeline is not None: __query["pipeline"] = pipeline if pretty is not None: __query["pretty"] = pretty if refresh is not None: __query["refresh"] = refresh + if require_alias is not None: + __query["require_alias"] = require_alias + if require_data_stream is not None: + __query["require_data_stream"] = require_data_stream if routing is not None: __query["routing"] = routing if timeout is not None: @@ -1553,7 +1581,7 @@ async def delete_by_query( If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text @@ -3720,6 +3748,7 @@ async def open_point_in_time( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, index_filter: t.Optional[t.Mapping[str, t.Any]] = None, + max_concurrent_shard_requests: t.Optional[int] = None, preference: t.Optional[str] = None, pretty: t.Optional[bool] = None, routing: t.Optional[str] = None, @@ -3775,6 +3804,8 @@ async def open_point_in_time( a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` on every shard. + :param max_concurrent_shard_requests: Maximum number of concurrent shard requests + that each sub-search request executes per node. :param preference: The node or shard the operation should be performed on. By default, it is random. :param routing: A custom value that is used to route operations to a specific @@ -3802,6 +3833,8 @@ async def open_point_in_time( __query["human"] = human if ignore_unavailable is not None: __query["ignore_unavailable"] = ignore_unavailable + if max_concurrent_shard_requests is not None: + __query["max_concurrent_shard_requests"] = max_concurrent_shard_requests if preference is not None: __query["preference"] = preference if pretty is not None: @@ -5973,7 +6006,20 @@ async def terms_enum( ) @_rewrite_parameters( - body_fields=("doc", "filter", "per_field_analyzer"), + body_fields=( + "doc", + "field_statistics", + "fields", + "filter", + "offsets", + "payloads", + "per_field_analyzer", + "positions", + "routing", + "term_statistics", + "version", + "version_type", + ), ) async def termvectors( self, @@ -6050,9 +6096,9 @@ async def termvectors( (the sum of document frequencies for all terms in this field). * The sum of total term frequencies (the sum of total term frequencies of each term in this field). - :param fields: A comma-separated list or wildcard expressions of fields to include - in the statistics. It is used as the default list unless a specific field - list is provided in the `completion_fields` or `fielddata_fields` parameters. + :param fields: A list of fields to include in the statistics. It is used as the + default list unless a specific field list is provided in the `completion_fields` + or `fielddata_fields` parameters. :param filter: Filter terms based on their tf-idf scores. This could be useful in order find out a good characteristic vector of a document. This feature works in a similar manner to the second phase of the More Like This Query. @@ -6090,41 +6136,41 @@ async def termvectors( __body: t.Dict[str, t.Any] = body if body is not None else {} if error_trace is not None: __query["error_trace"] = error_trace - if field_statistics is not None: - __query["field_statistics"] = field_statistics - if fields is not None: - __query["fields"] = fields if filter_path is not None: __query["filter_path"] = filter_path if human is not None: __query["human"] = human - if offsets is not None: - __query["offsets"] = offsets - if payloads is not None: - __query["payloads"] = payloads - if positions is not None: - __query["positions"] = positions if preference is not None: __query["preference"] = preference if pretty is not None: __query["pretty"] = pretty if realtime is not None: __query["realtime"] = realtime - if routing is not None: - __query["routing"] = routing - if term_statistics is not None: - __query["term_statistics"] = term_statistics - if version is not None: - __query["version"] = version - if version_type is not None: - __query["version_type"] = version_type if not __body: if doc is not None: __body["doc"] = doc + if field_statistics is not None: + __body["field_statistics"] = field_statistics + if fields is not None: + __body["fields"] = fields if filter is not None: __body["filter"] = filter + if offsets is not None: + __body["offsets"] = offsets + if payloads is not None: + __body["payloads"] = payloads if per_field_analyzer is not None: __body["per_field_analyzer"] = per_field_analyzer + if positions is not None: + __body["positions"] = positions + if routing is not None: + __body["routing"] = routing + if term_statistics is not None: + __body["term_statistics"] = term_statistics + if version is not None: + __body["version"] = version + if version_type is not None: + __body["version_type"] = version_type if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -6475,7 +6521,7 @@ async def update_by_query( wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text diff --git a/elasticsearch/_async/client/esql.py b/elasticsearch/_async/client/esql.py index fc4404a17..6e3e00524 100644 --- a/elasticsearch/_async/client/esql.py +++ b/elasticsearch/_async/client/esql.py @@ -35,6 +35,7 @@ class EsqlClient(NamespacedClient): "params", "profile", "tables", + "wait_for_completion_timeout", ), ignore_deprecated_options={"params"}, ) @@ -152,8 +153,6 @@ async def async_query( __query["keep_on_completion"] = keep_on_completion if pretty is not None: __query["pretty"] = pretty - if wait_for_completion_timeout is not None: - __query["wait_for_completion_timeout"] = wait_for_completion_timeout if not __body: if query is not None: __body["query"] = query @@ -171,6 +170,8 @@ async def async_query( __body["profile"] = profile if tables is not None: __body["tables"] = tables + if wait_for_completion_timeout is not None: + __body["wait_for_completion_timeout"] = wait_for_completion_timeout __headers = {"accept": "application/json", "content-type": "application/json"} return await self.perform_request( # type: ignore[return-value] "POST", diff --git a/elasticsearch/_async/client/indices.py b/elasticsearch/_async/client/indices.py index 22699684b..be758d06f 100644 --- a/elasticsearch/_async/client/indices.py +++ b/elasticsearch/_async/client/indices.py @@ -1621,7 +1621,9 @@ async def exists_index_template( name: str, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + flat_settings: t.Optional[bool] = None, human: t.Optional[bool] = None, + local: t.Optional[bool] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, ) -> HeadApiResponse: @@ -1636,6 +1638,10 @@ async def exists_index_template( :param name: Comma-separated list of index template names used to limit the request. Wildcard (*) expressions are supported. + :param flat_settings: If true, returns settings in flat format. + :param local: If true, the request retrieves information from the local node + only. Defaults to false, which means information is retrieved from the master + node. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. @@ -1649,8 +1655,12 @@ async def exists_index_template( __query["error_trace"] = error_trace if filter_path is not None: __query["filter_path"] = filter_path + if flat_settings is not None: + __query["flat_settings"] = flat_settings if human is not None: __query["human"] = human + if local is not None: + __query["local"] = local if master_timeout is not None: __query["master_timeout"] = master_timeout if pretty is not None: @@ -1800,9 +1810,6 @@ async def field_usage_stats( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, pretty: t.Optional[bool] = None, - wait_for_active_shards: t.Optional[ - t.Union[int, t.Union[str, t.Literal["all", "index-setting"]]] - ] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -1832,9 +1839,6 @@ async def field_usage_stats( in the statistics. :param ignore_unavailable: If `true`, missing or closed indices are not included in the response. - :param wait_for_active_shards: The number of shard copies that must be active - before proceeding with the operation. Set to all or any positive integer - up to the total number of shards in the index (`number_of_replicas+1`). """ if index in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'index'") @@ -1857,8 +1861,6 @@ async def field_usage_stats( __query["ignore_unavailable"] = ignore_unavailable if pretty is not None: __query["pretty"] = pretty - if wait_for_active_shards is not None: - __query["wait_for_active_shards"] = wait_for_active_shards __headers = {"accept": "application/json"} return await self.perform_request( # type: ignore[return-value] "GET", @@ -3838,6 +3840,7 @@ async def put_settings( master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, preserve_existing: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + reopen: t.Optional[bool] = None, timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -3880,6 +3883,9 @@ async def put_settings( no response is received before the timeout expires, the request fails and returns an error. :param preserve_existing: If `true`, existing index settings remain unchanged. + :param reopen: Whether to close and reopen the index to apply non-dynamic settings. + If set to `true` the indices to which the settings are being applied will + be closed temporarily and then reopened in order to apply the changes. :param timeout: Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error. """ @@ -3917,6 +3923,8 @@ async def put_settings( __query["preserve_existing"] = preserve_existing if pretty is not None: __query["pretty"] = pretty + if reopen is not None: + __query["reopen"] = reopen if timeout is not None: __query["timeout"] = timeout __body = settings if settings is not None else body @@ -3984,7 +3992,7 @@ async def put_template( :param name: The name of the template :param aliases: Aliases for the index. - :param cause: + :param cause: User defined reason for creating/updating the index template :param create: If true, this request cannot replace or update existing index templates. :param index_patterns: Array of wildcard expressions used to match the names @@ -4222,6 +4230,7 @@ async def reload_search_analyzers( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + resource: t.Optional[str] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -4249,6 +4258,7 @@ async def reload_search_analyzers( that are open, closed or both. :param ignore_unavailable: Whether specified concrete indices should be ignored when unavailable (missing or closed) + :param resource: Changed resource to reload analyzers from if applicable """ if index in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'index'") @@ -4269,6 +4279,8 @@ async def reload_search_analyzers( __query["ignore_unavailable"] = ignore_unavailable if pretty is not None: __query["pretty"] = pretty + if resource is not None: + __query["resource"] = resource __headers = {"accept": "application/json"} return await self.perform_request( # type: ignore[return-value] "POST", @@ -4505,6 +4517,7 @@ async def rollover( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + lazy: t.Optional[bool] = None, mappings: t.Optional[t.Mapping[str, t.Any]] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, @@ -4561,6 +4574,9 @@ async def rollover( conditions are satisfied. :param dry_run: If `true`, checks whether the current index satisfies the specified conditions but does not perform a rollover. + :param lazy: If set to true, the rollover action will only mark a data stream + to signal that it needs to be rolled over at the next write. Only allowed + on data streams. :param mappings: Mapping for fields in the index. If specified, this mapping can include field names, field data types, and mapping paramaters. :param master_timeout: Period to wait for a connection to the master node. If @@ -4595,6 +4611,8 @@ async def rollover( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if lazy is not None: + __query["lazy"] = lazy if master_timeout is not None: __query["master_timeout"] = master_timeout if pretty is not None: @@ -4915,6 +4933,8 @@ async def simulate_index_template( self, *, name: str, + cause: t.Optional[str] = None, + create: t.Optional[bool] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -4932,6 +4952,10 @@ async def simulate_index_template( ``_ :param name: Name of the index to simulate + :param cause: User defined reason for dry-run creating the new template for simulation + purposes + :param create: Whether the index template we optionally defined in the body should + only be dry-run added if new or can also replace an existing one :param include_defaults: If true, returns all relevant default configurations for the index template. :param master_timeout: Period to wait for a connection to the master node. If @@ -4943,6 +4967,10 @@ async def simulate_index_template( __path_parts: t.Dict[str, str] = {"name": _quote(name)} __path = f'/_index_template/_simulate_index/{__path_parts["name"]}' __query: t.Dict[str, t.Any] = {} + if cause is not None: + __query["cause"] = cause + if create is not None: + __query["create"] = create if error_trace is not None: __query["error_trace"] = error_trace if filter_path is not None: @@ -4985,6 +5013,7 @@ async def simulate_template( *, name: t.Optional[str] = None, allow_auto_create: t.Optional[bool] = None, + cause: t.Optional[str] = None, composed_of: t.Optional[t.Sequence[str]] = None, create: t.Optional[bool] = None, data_stream: t.Optional[t.Mapping[str, t.Any]] = None, @@ -5021,6 +5050,8 @@ async def simulate_template( via `actions.auto_create_index`. If set to `false`, then indices or data streams matching the template must always be explicitly created, and may never be automatically created. + :param cause: User defined reason for dry-run creating the new template for simulation + purposes :param composed_of: An ordered list of component template names. Component templates are merged in the order specified, meaning that the last component template specified has the highest precedence. @@ -5065,6 +5096,8 @@ async def simulate_template( __path = "/_index_template/_simulate" __query: t.Dict[str, t.Any] = {} __body: t.Dict[str, t.Any] = body if body is not None else {} + if cause is not None: + __query["cause"] = cause if create is not None: __query["create"] = create if error_trace is not None: diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py index 7c7218558..7b5a8906d 100644 --- a/elasticsearch/_async/client/inference.py +++ b/elasticsearch/_async/client/inference.py @@ -234,6 +234,67 @@ async def get( path_parts=__path_parts, ) + @_rewrite_parameters( + body_name="chat_completion_request", + ) + async def post_eis_chat_completion( + self, + *, + eis_inference_id: str, + chat_completion_request: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Perform a chat completion task through the Elastic Inference Service (EIS).

+

Perform a chat completion inference task with the elastic service.

+ + + ``_ + + :param eis_inference_id: The unique identifier of the inference endpoint. + :param chat_completion_request: + """ + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if chat_completion_request is None and body is None: + raise ValueError( + "Empty value passed for parameters 'chat_completion_request' and 'body', one of them should be set." + ) + elif chat_completion_request is not None and body is not None: + raise ValueError("Cannot set both 'chat_completion_request' and 'body'") + __path_parts: t.Dict[str, str] = {"eis_inference_id": _quote(eis_inference_id)} + __path = ( + f'/_inference/chat_completion/{__path_parts["eis_inference_id"]}/_stream' + ) + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __body = ( + chat_completion_request if chat_completion_request is not None else body + ) + __headers = {"accept": "application/json", "content-type": "application/json"} + return await self.perform_request( # type: ignore[return-value] + "POST", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.post_eis_chat_completion", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_name="inference_config", ) @@ -321,6 +382,1522 @@ async def put( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_alibabacloud( + self, + *, + task_type: t.Union[ + str, t.Literal["completion", "rerank", "space_embedding", "text_embedding"] + ], + alibabacloud_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["alibabacloud-ai-search"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an AlibabaCloud AI Search inference endpoint.

+

Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param alibabacloud_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `alibabacloud-ai-search`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `alibabacloud-ai-search` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if alibabacloud_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'alibabacloud_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "alibabacloud_inference_id": _quote(alibabacloud_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["alibabacloud_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_alibabacloud", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_amazonbedrock( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + amazonbedrock_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazonbedrock"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Amazon Bedrock inference endpoint.

+

Creates an inference endpoint to perform an inference task with the amazonbedrock service.

+
+

info + You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

+
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonbedrock_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazonbedrock`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazonbedrock` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonbedrock_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonbedrock_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonbedrock_inference_id": _quote(amazonbedrock_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonbedrock_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonbedrock", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_anthropic( + self, + *, + task_type: t.Union[str, t.Literal["completion"]], + anthropic_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["anthropic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Anthropic inference endpoint.

+

Create an inference endpoint to perform an inference task with the anthropic service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `completion`. + :param anthropic_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `anthropic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `watsonxai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if anthropic_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'anthropic_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "anthropic_inference_id": _quote(anthropic_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["anthropic_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_anthropic", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_azureaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure AI studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param azureaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureaistudio_inference_id": _quote(azureaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_azureopenai( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureopenai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureopenai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureopenai service.

+

The list of chat completion models that you can choose from in your Azure OpenAI deployment include:

+ +

The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param azureopenai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureopenai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `azureopenai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureopenai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureopenai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureopenai_inference_id": _quote(azureopenai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureopenai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureopenai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_cohere( + self, + *, + task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]], + cohere_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["cohere"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Cohere inference endpoint.

+

Create an inference endpoint to perform an inference task with the cohere service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param cohere_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `cohere`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `cohere` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if cohere_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'cohere_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "cohere_inference_id": _quote(cohere_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["cohere_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_cohere", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings"), + ) + async def put_eis( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion"]], + eis_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elastic Inference Service (EIS) inference endpoint.

+

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param eis_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elastic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elastic` service. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "eis_inference_id": _quote(eis_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_eis", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_elasticsearch( + self, + *, + task_type: t.Union[ + str, t.Literal["rerank", "sparse_embedding", "text_embedding"] + ], + elasticsearch_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elasticsearch"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elasticsearch inference endpoint.

+

Create an inference endpoint to perform an inference task with the elasticsearch service.

+
+

info + Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.

+
+

If you use the ELSER or the E5 model through the elasticsearch service, the API request will automatically download and deploy the model if it isn't downloaded yet.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elasticsearch_inference_id: The unique identifier of the inference endpoint. + The must not match the `model_id`. + :param service: The type of service supported for the specified task type. In + this case, `elasticsearch`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elasticsearch` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elasticsearch_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'elasticsearch_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elasticsearch_inference_id": _quote(elasticsearch_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elasticsearch_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elasticsearch", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_elser( + self, + *, + task_type: t.Union[str, t.Literal["sparse_embedding"]], + elser_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elser"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an ELSER inference endpoint.

+

Create an inference endpoint to perform an inference task with the elser service. + You can also deploy ELSER by using the Elasticsearch inference integration.

+
+

info + Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.

+
+

The API request will automatically download and deploy the ELSER model if it isn't already downloaded.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elser_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elser`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elser` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elser_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'elser_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elser_inference_id": _quote(elser_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elser_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elser", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_googleaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + googleaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googleaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Google AI Studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the googleaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googleaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googleaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googleaistudio` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googleaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googleaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googleaistudio_inference_id": _quote(googleaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googleaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googleaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_googlevertexai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + googlevertexai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Google Vertex AI inference endpoint.

+

Create an inference endpoint to perform an inference task with the googlevertexai service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googlevertexai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googlevertexai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googlevertexai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googlevertexai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googlevertexai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googlevertexai_inference_id": _quote(googlevertexai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googlevertexai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googlevertexai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_hugging_face( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + huggingface_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Hugging Face inference endpoint.

+

Create an inference endpoint to perform an inference task with the hugging_face service.

+

You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. + Create the endpoint and copy the URL after the endpoint initialization has been finished.

+

The following models are recommended for the Hugging Face service:

+
    +
  • all-MiniLM-L6-v2
  • +
  • all-MiniLM-L12-v2
  • +
  • all-mpnet-base-v2
  • +
  • e5-base-v2
  • +
  • e5-small-v2
  • +
  • multilingual-e5-base
  • +
  • multilingual-e5-small
  • +
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param huggingface_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `hugging_face`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `hugging_face` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if huggingface_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'huggingface_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "huggingface_inference_id": _quote(huggingface_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["huggingface_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_hugging_face", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_jinaai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + jinaai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["jinaai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an JinaAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the jinaai service.

+

To review the available rerank models, refer to https://jina.ai/reranker. + To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param jinaai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `jinaai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `jinaai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if jinaai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'jinaai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "jinaai_inference_id": _quote(jinaai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["jinaai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_jinaai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + async def put_mistral( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + mistral_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Mistral inference endpoint.

+

Creates an inference endpoint to perform an inference task with the mistral service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `text_embedding`. + :param mistral_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `mistral`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `mistral` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if mistral_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'mistral_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "mistral_inference_id": _quote(mistral_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["mistral_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_mistral", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_openai( + self, + *, + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], + openai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["openai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param openai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `openai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if openai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'openai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "openai_inference_id": _quote(openai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["openai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_openai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_voyageai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + voyageai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["voyageai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a VoyageAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the voyageai service.

+

Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param voyageai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `voyageai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `voyageai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if voyageai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'voyageai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "voyageai_inference_id": _quote(voyageai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["voyageai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_voyageai", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=("service", "service_settings"), ) @@ -341,7 +1918,7 @@ async def put_watsonx( .. raw:: html

Create a Watsonx inference endpoint.

-

Creates an inference endpoint to perform an inference task with the watsonxai service. +

Create an inference endpoint to perform an inference task with the watsonxai service. You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai inference service. You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. diff --git a/elasticsearch/_async/client/security.py b/elasticsearch/_async/client/security.py index a346a906b..a0a2d52d0 100644 --- a/elasticsearch/_async/client/security.py +++ b/elasticsearch/_async/client/security.py @@ -2867,12 +2867,12 @@ async def oidc_authenticate( ) @_rewrite_parameters( - body_fields=("access_token", "refresh_token"), + body_fields=("token", "refresh_token"), ) async def oidc_logout( self, *, - access_token: t.Optional[str] = None, + token: t.Optional[str] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -2892,11 +2892,11 @@ async def oidc_logout( ``_ - :param access_token: The access token to be invalidated. + :param token: The access token to be invalidated. :param refresh_token: The refresh token to be invalidated. """ - if access_token is None and body is None: - raise ValueError("Empty value passed for parameter 'access_token'") + if token is None and body is None: + raise ValueError("Empty value passed for parameter 'token'") __path_parts: t.Dict[str, str] = {} __path = "/_security/oidc/logout" __query: t.Dict[str, t.Any] = {} @@ -2910,8 +2910,8 @@ async def oidc_logout( if pretty is not None: __query["pretty"] = pretty if not __body: - if access_token is not None: - __body["access_token"] = access_token + if token is not None: + __body["token"] = token if refresh_token is not None: __body["refresh_token"] = refresh_token __headers = {"accept": "application/json", "content-type": "application/json"} diff --git a/elasticsearch/_async/client/watcher.py b/elasticsearch/_async/client/watcher.py index ce90b268c..e880075ad 100644 --- a/elasticsearch/_async/client/watcher.py +++ b/elasticsearch/_async/client/watcher.py @@ -845,7 +845,10 @@ async def update_settings(

Update Watcher index settings. Update settings for the Watcher internal index (.watches). Only a subset of settings can be modified. - This includes index.auto_expand_replicas and index.number_of_replicas.

+ This includes index.auto_expand_replicas, index.number_of_replicas, index.routing.allocation.exclude.*, + index.routing.allocation.include.* and index.routing.allocation.require.*. + Modification of index.routing.allocation.include._tier_preference is an exception and is not allowed as the + Watcher shards must always be in the data_content tier.

``_ diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py index cf46a7c8d..a95b9449c 100644 --- a/elasticsearch/_sync/client/__init__.py +++ b/elasticsearch/_sync/client/__init__.py @@ -1119,12 +1119,17 @@ def create( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + if_primary_term: t.Optional[int] = None, + if_seq_no: t.Optional[int] = None, include_source_on_error: t.Optional[bool] = None, + op_type: t.Optional[t.Union[str, t.Literal["create", "index"]]] = None, pipeline: t.Optional[str] = None, pretty: t.Optional[bool] = None, refresh: t.Optional[ t.Union[bool, str, t.Literal["false", "true", "wait_for"]] ] = None, + require_alias: t.Optional[bool] = None, + require_data_stream: t.Optional[bool] = None, routing: t.Optional[str] = None, timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, version: t.Optional[int] = None, @@ -1202,8 +1207,18 @@ def create( :param id: A unique identifier for the document. To automatically generate a document ID, use the `POST //_doc/` request format. :param document: + :param if_primary_term: Only perform the operation if the document has this primary + term. + :param if_seq_no: Only perform the operation if the document has this sequence + number. :param include_source_on_error: True or false if to include the document source in the error message in case of parsing errors. + :param op_type: Set to `create` to only index the document if it does not already + exist (put if absent). If a document with the specified `_id` already exists, + the indexing operation will fail. The behavior is the same as using the `/_create` + endpoint. If a document ID is specified, this paramater defaults to `index`. + Otherwise, it defaults to `create`. If the request targets a data stream, + an `op_type` of `create` is required. :param pipeline: The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, setting the value to `_none` turns off the default ingest pipeline for this request. If a final @@ -1212,6 +1227,9 @@ def create( :param refresh: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. If `wait_for`, it waits for a refresh to make this operation visible to search. If `false`, it does nothing with refreshes. + :param require_alias: If `true`, the destination must be an index alias. + :param require_data_stream: If `true`, the request's actions must target a data + stream (existing or to be created). :param routing: A custom value that is used to route operations to a specific shard. :param timeout: The period the request waits for the following operations: automatic @@ -1252,14 +1270,24 @@ def create( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if if_primary_term is not None: + __query["if_primary_term"] = if_primary_term + if if_seq_no is not None: + __query["if_seq_no"] = if_seq_no if include_source_on_error is not None: __query["include_source_on_error"] = include_source_on_error + if op_type is not None: + __query["op_type"] = op_type if pipeline is not None: __query["pipeline"] = pipeline if pretty is not None: __query["pretty"] = pretty if refresh is not None: __query["refresh"] = refresh + if require_alias is not None: + __query["require_alias"] = require_alias + if require_data_stream is not None: + __query["require_data_stream"] = require_data_stream if routing is not None: __query["routing"] = routing if timeout is not None: @@ -1551,7 +1579,7 @@ def delete_by_query( If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text @@ -3718,6 +3746,7 @@ def open_point_in_time( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, index_filter: t.Optional[t.Mapping[str, t.Any]] = None, + max_concurrent_shard_requests: t.Optional[int] = None, preference: t.Optional[str] = None, pretty: t.Optional[bool] = None, routing: t.Optional[str] = None, @@ -3773,6 +3802,8 @@ def open_point_in_time( a missing or closed index. :param index_filter: Filter indices if the provided query rewrites to `match_none` on every shard. + :param max_concurrent_shard_requests: Maximum number of concurrent shard requests + that each sub-search request executes per node. :param preference: The node or shard the operation should be performed on. By default, it is random. :param routing: A custom value that is used to route operations to a specific @@ -3800,6 +3831,8 @@ def open_point_in_time( __query["human"] = human if ignore_unavailable is not None: __query["ignore_unavailable"] = ignore_unavailable + if max_concurrent_shard_requests is not None: + __query["max_concurrent_shard_requests"] = max_concurrent_shard_requests if preference is not None: __query["preference"] = preference if pretty is not None: @@ -5971,7 +6004,20 @@ def terms_enum( ) @_rewrite_parameters( - body_fields=("doc", "filter", "per_field_analyzer"), + body_fields=( + "doc", + "field_statistics", + "fields", + "filter", + "offsets", + "payloads", + "per_field_analyzer", + "positions", + "routing", + "term_statistics", + "version", + "version_type", + ), ) def termvectors( self, @@ -6048,9 +6094,9 @@ def termvectors( (the sum of document frequencies for all terms in this field). * The sum of total term frequencies (the sum of total term frequencies of each term in this field). - :param fields: A comma-separated list or wildcard expressions of fields to include - in the statistics. It is used as the default list unless a specific field - list is provided in the `completion_fields` or `fielddata_fields` parameters. + :param fields: A list of fields to include in the statistics. It is used as the + default list unless a specific field list is provided in the `completion_fields` + or `fielddata_fields` parameters. :param filter: Filter terms based on their tf-idf scores. This could be useful in order find out a good characteristic vector of a document. This feature works in a similar manner to the second phase of the More Like This Query. @@ -6088,41 +6134,41 @@ def termvectors( __body: t.Dict[str, t.Any] = body if body is not None else {} if error_trace is not None: __query["error_trace"] = error_trace - if field_statistics is not None: - __query["field_statistics"] = field_statistics - if fields is not None: - __query["fields"] = fields if filter_path is not None: __query["filter_path"] = filter_path if human is not None: __query["human"] = human - if offsets is not None: - __query["offsets"] = offsets - if payloads is not None: - __query["payloads"] = payloads - if positions is not None: - __query["positions"] = positions if preference is not None: __query["preference"] = preference if pretty is not None: __query["pretty"] = pretty if realtime is not None: __query["realtime"] = realtime - if routing is not None: - __query["routing"] = routing - if term_statistics is not None: - __query["term_statistics"] = term_statistics - if version is not None: - __query["version"] = version - if version_type is not None: - __query["version_type"] = version_type if not __body: if doc is not None: __body["doc"] = doc + if field_statistics is not None: + __body["field_statistics"] = field_statistics + if fields is not None: + __body["fields"] = fields if filter is not None: __body["filter"] = filter + if offsets is not None: + __body["offsets"] = offsets + if payloads is not None: + __body["payloads"] = payloads if per_field_analyzer is not None: __body["per_field_analyzer"] = per_field_analyzer + if positions is not None: + __body["positions"] = positions + if routing is not None: + __body["routing"] = routing + if term_statistics is not None: + __body["term_statistics"] = term_statistics + if version is not None: + __body["version"] = version + if version_type is not None: + __body["version_type"] = version_type if not __body: __body = None # type: ignore[assignment] __headers = {"accept": "application/json"} @@ -6473,7 +6519,7 @@ def update_by_query( wildcard expressions match hidden data streams. It supports comma-separated values, such as `open,hidden`. Valid values are: `all`, `open`, `closed`, `hidden`, `none`. - :param from_: Starting offset (default: 0) + :param from_: Skips the specified number of documents. :param ignore_unavailable: If `false`, the request returns an error if it targets a missing or closed index. :param lenient: If `true`, format-based query failures (such as providing text diff --git a/elasticsearch/_sync/client/esql.py b/elasticsearch/_sync/client/esql.py index 8f8d1d107..b4ddd2052 100644 --- a/elasticsearch/_sync/client/esql.py +++ b/elasticsearch/_sync/client/esql.py @@ -35,6 +35,7 @@ class EsqlClient(NamespacedClient): "params", "profile", "tables", + "wait_for_completion_timeout", ), ignore_deprecated_options={"params"}, ) @@ -152,8 +153,6 @@ def async_query( __query["keep_on_completion"] = keep_on_completion if pretty is not None: __query["pretty"] = pretty - if wait_for_completion_timeout is not None: - __query["wait_for_completion_timeout"] = wait_for_completion_timeout if not __body: if query is not None: __body["query"] = query @@ -171,6 +170,8 @@ def async_query( __body["profile"] = profile if tables is not None: __body["tables"] = tables + if wait_for_completion_timeout is not None: + __body["wait_for_completion_timeout"] = wait_for_completion_timeout __headers = {"accept": "application/json", "content-type": "application/json"} return self.perform_request( # type: ignore[return-value] "POST", diff --git a/elasticsearch/_sync/client/indices.py b/elasticsearch/_sync/client/indices.py index 31758cff9..625e4a89f 100644 --- a/elasticsearch/_sync/client/indices.py +++ b/elasticsearch/_sync/client/indices.py @@ -1621,7 +1621,9 @@ def exists_index_template( name: str, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + flat_settings: t.Optional[bool] = None, human: t.Optional[bool] = None, + local: t.Optional[bool] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, ) -> HeadApiResponse: @@ -1636,6 +1638,10 @@ def exists_index_template( :param name: Comma-separated list of index template names used to limit the request. Wildcard (*) expressions are supported. + :param flat_settings: If true, returns settings in flat format. + :param local: If true, the request retrieves information from the local node + only. Defaults to false, which means information is retrieved from the master + node. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. @@ -1649,8 +1655,12 @@ def exists_index_template( __query["error_trace"] = error_trace if filter_path is not None: __query["filter_path"] = filter_path + if flat_settings is not None: + __query["flat_settings"] = flat_settings if human is not None: __query["human"] = human + if local is not None: + __query["local"] = local if master_timeout is not None: __query["master_timeout"] = master_timeout if pretty is not None: @@ -1800,9 +1810,6 @@ def field_usage_stats( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, pretty: t.Optional[bool] = None, - wait_for_active_shards: t.Optional[ - t.Union[int, t.Union[str, t.Literal["all", "index-setting"]]] - ] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -1832,9 +1839,6 @@ def field_usage_stats( in the statistics. :param ignore_unavailable: If `true`, missing or closed indices are not included in the response. - :param wait_for_active_shards: The number of shard copies that must be active - before proceeding with the operation. Set to all or any positive integer - up to the total number of shards in the index (`number_of_replicas+1`). """ if index in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'index'") @@ -1857,8 +1861,6 @@ def field_usage_stats( __query["ignore_unavailable"] = ignore_unavailable if pretty is not None: __query["pretty"] = pretty - if wait_for_active_shards is not None: - __query["wait_for_active_shards"] = wait_for_active_shards __headers = {"accept": "application/json"} return self.perform_request( # type: ignore[return-value] "GET", @@ -3838,6 +3840,7 @@ def put_settings( master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, preserve_existing: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + reopen: t.Optional[bool] = None, timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, ) -> ObjectApiResponse[t.Any]: """ @@ -3880,6 +3883,9 @@ def put_settings( no response is received before the timeout expires, the request fails and returns an error. :param preserve_existing: If `true`, existing index settings remain unchanged. + :param reopen: Whether to close and reopen the index to apply non-dynamic settings. + If set to `true` the indices to which the settings are being applied will + be closed temporarily and then reopened in order to apply the changes. :param timeout: Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error. """ @@ -3917,6 +3923,8 @@ def put_settings( __query["preserve_existing"] = preserve_existing if pretty is not None: __query["pretty"] = pretty + if reopen is not None: + __query["reopen"] = reopen if timeout is not None: __query["timeout"] = timeout __body = settings if settings is not None else body @@ -3984,7 +3992,7 @@ def put_template( :param name: The name of the template :param aliases: Aliases for the index. - :param cause: + :param cause: User defined reason for creating/updating the index template :param create: If true, this request cannot replace or update existing index templates. :param index_patterns: Array of wildcard expressions used to match the names @@ -4222,6 +4230,7 @@ def reload_search_analyzers( human: t.Optional[bool] = None, ignore_unavailable: t.Optional[bool] = None, pretty: t.Optional[bool] = None, + resource: t.Optional[str] = None, ) -> ObjectApiResponse[t.Any]: """ .. raw:: html @@ -4249,6 +4258,7 @@ def reload_search_analyzers( that are open, closed or both. :param ignore_unavailable: Whether specified concrete indices should be ignored when unavailable (missing or closed) + :param resource: Changed resource to reload analyzers from if applicable """ if index in SKIP_IN_PATH: raise ValueError("Empty value passed for parameter 'index'") @@ -4269,6 +4279,8 @@ def reload_search_analyzers( __query["ignore_unavailable"] = ignore_unavailable if pretty is not None: __query["pretty"] = pretty + if resource is not None: + __query["resource"] = resource __headers = {"accept": "application/json"} return self.perform_request( # type: ignore[return-value] "POST", @@ -4505,6 +4517,7 @@ def rollover( error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, + lazy: t.Optional[bool] = None, mappings: t.Optional[t.Mapping[str, t.Any]] = None, master_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, pretty: t.Optional[bool] = None, @@ -4561,6 +4574,9 @@ def rollover( conditions are satisfied. :param dry_run: If `true`, checks whether the current index satisfies the specified conditions but does not perform a rollover. + :param lazy: If set to true, the rollover action will only mark a data stream + to signal that it needs to be rolled over at the next write. Only allowed + on data streams. :param mappings: Mapping for fields in the index. If specified, this mapping can include field names, field data types, and mapping paramaters. :param master_timeout: Period to wait for a connection to the master node. If @@ -4595,6 +4611,8 @@ def rollover( __query["filter_path"] = filter_path if human is not None: __query["human"] = human + if lazy is not None: + __query["lazy"] = lazy if master_timeout is not None: __query["master_timeout"] = master_timeout if pretty is not None: @@ -4915,6 +4933,8 @@ def simulate_index_template( self, *, name: str, + cause: t.Optional[str] = None, + create: t.Optional[bool] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -4932,6 +4952,10 @@ def simulate_index_template( ``_ :param name: Name of the index to simulate + :param cause: User defined reason for dry-run creating the new template for simulation + purposes + :param create: Whether the index template we optionally defined in the body should + only be dry-run added if new or can also replace an existing one :param include_defaults: If true, returns all relevant default configurations for the index template. :param master_timeout: Period to wait for a connection to the master node. If @@ -4943,6 +4967,10 @@ def simulate_index_template( __path_parts: t.Dict[str, str] = {"name": _quote(name)} __path = f'/_index_template/_simulate_index/{__path_parts["name"]}' __query: t.Dict[str, t.Any] = {} + if cause is not None: + __query["cause"] = cause + if create is not None: + __query["create"] = create if error_trace is not None: __query["error_trace"] = error_trace if filter_path is not None: @@ -4985,6 +5013,7 @@ def simulate_template( *, name: t.Optional[str] = None, allow_auto_create: t.Optional[bool] = None, + cause: t.Optional[str] = None, composed_of: t.Optional[t.Sequence[str]] = None, create: t.Optional[bool] = None, data_stream: t.Optional[t.Mapping[str, t.Any]] = None, @@ -5021,6 +5050,8 @@ def simulate_template( via `actions.auto_create_index`. If set to `false`, then indices or data streams matching the template must always be explicitly created, and may never be automatically created. + :param cause: User defined reason for dry-run creating the new template for simulation + purposes :param composed_of: An ordered list of component template names. Component templates are merged in the order specified, meaning that the last component template specified has the highest precedence. @@ -5065,6 +5096,8 @@ def simulate_template( __path = "/_index_template/_simulate" __query: t.Dict[str, t.Any] = {} __body: t.Dict[str, t.Any] = body if body is not None else {} + if cause is not None: + __query["cause"] = cause if create is not None: __query["create"] = create if error_trace is not None: diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py index eae9108b5..f64f61208 100644 --- a/elasticsearch/_sync/client/inference.py +++ b/elasticsearch/_sync/client/inference.py @@ -234,6 +234,67 @@ def get( path_parts=__path_parts, ) + @_rewrite_parameters( + body_name="chat_completion_request", + ) + def post_eis_chat_completion( + self, + *, + eis_inference_id: str, + chat_completion_request: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Perform a chat completion task through the Elastic Inference Service (EIS).

+

Perform a chat completion inference task with the elastic service.

+ + + ``_ + + :param eis_inference_id: The unique identifier of the inference endpoint. + :param chat_completion_request: + """ + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if chat_completion_request is None and body is None: + raise ValueError( + "Empty value passed for parameters 'chat_completion_request' and 'body', one of them should be set." + ) + elif chat_completion_request is not None and body is not None: + raise ValueError("Cannot set both 'chat_completion_request' and 'body'") + __path_parts: t.Dict[str, str] = {"eis_inference_id": _quote(eis_inference_id)} + __path = ( + f'/_inference/chat_completion/{__path_parts["eis_inference_id"]}/_stream' + ) + __query: t.Dict[str, t.Any] = {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + __body = ( + chat_completion_request if chat_completion_request is not None else body + ) + __headers = {"accept": "application/json", "content-type": "application/json"} + return self.perform_request( # type: ignore[return-value] + "POST", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.post_eis_chat_completion", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_name="inference_config", ) @@ -321,6 +382,1522 @@ def put( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_alibabacloud( + self, + *, + task_type: t.Union[ + str, t.Literal["completion", "rerank", "space_embedding", "text_embedding"] + ], + alibabacloud_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["alibabacloud-ai-search"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an AlibabaCloud AI Search inference endpoint.

+

Create an inference endpoint to perform an inference task with the alibabacloud-ai-search service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param alibabacloud_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `alibabacloud-ai-search`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `alibabacloud-ai-search` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if alibabacloud_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'alibabacloud_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "alibabacloud_inference_id": _quote(alibabacloud_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["alibabacloud_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_alibabacloud", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_amazonbedrock( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + amazonbedrock_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazonbedrock"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Amazon Bedrock inference endpoint.

+

Creates an inference endpoint to perform an inference task with the amazonbedrock service.

+
+

info + You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.

+
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonbedrock_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazonbedrock`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazonbedrock` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonbedrock_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonbedrock_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonbedrock_inference_id": _quote(amazonbedrock_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonbedrock_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonbedrock", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_anthropic( + self, + *, + task_type: t.Union[str, t.Literal["completion"]], + anthropic_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["anthropic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Anthropic inference endpoint.

+

Create an inference endpoint to perform an inference task with the anthropic service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `completion`. + :param anthropic_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `anthropic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `watsonxai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if anthropic_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'anthropic_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "anthropic_inference_id": _quote(anthropic_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["anthropic_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_anthropic", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_azureaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure AI studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param azureaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureaistudio_inference_id": _quote(azureaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_azureopenai( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + azureopenai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["azureopenai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Azure OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the azureopenai service.

+

The list of chat completion models that you can choose from in your Azure OpenAI deployment include:

+ +

The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param azureopenai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `azureopenai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `azureopenai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if azureopenai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'azureopenai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "azureopenai_inference_id": _quote(azureopenai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["azureopenai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_azureopenai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_cohere( + self, + *, + task_type: t.Union[str, t.Literal["completion", "rerank", "text_embedding"]], + cohere_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["cohere"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Cohere inference endpoint.

+

Create an inference endpoint to perform an inference task with the cohere service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param cohere_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `cohere`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `cohere` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if cohere_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'cohere_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "cohere_inference_id": _quote(cohere_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["cohere_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_cohere", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings"), + ) + def put_eis( + self, + *, + task_type: t.Union[str, t.Literal["chat_completion"]], + eis_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elastic"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elastic Inference Service (EIS) inference endpoint.

+

Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param eis_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elastic`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elastic` service. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if eis_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'eis_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "eis_inference_id": _quote(eis_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["eis_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_eis", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_elasticsearch( + self, + *, + task_type: t.Union[ + str, t.Literal["rerank", "sparse_embedding", "text_embedding"] + ], + elasticsearch_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elasticsearch"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Elasticsearch inference endpoint.

+

Create an inference endpoint to perform an inference task with the elasticsearch service.

+
+

info + Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.

+
+

If you use the ELSER or the E5 model through the elasticsearch service, the API request will automatically download and deploy the model if it isn't downloaded yet.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elasticsearch_inference_id: The unique identifier of the inference endpoint. + The must not match the `model_id`. + :param service: The type of service supported for the specified task type. In + this case, `elasticsearch`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elasticsearch` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elasticsearch_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'elasticsearch_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elasticsearch_inference_id": _quote(elasticsearch_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elasticsearch_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elasticsearch", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_elser( + self, + *, + task_type: t.Union[str, t.Literal["sparse_embedding"]], + elser_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["elser"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an ELSER inference endpoint.

+

Create an inference endpoint to perform an inference task with the elser service. + You can also deploy ELSER by using the Elasticsearch inference integration.

+
+

info + Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.

+
+

The API request will automatically download and deploy the ELSER model if it isn't already downloaded.

+
+

info + You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

+
+

After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param elser_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `elser`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `elser` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if elser_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'elser_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "elser_inference_id": _quote(elser_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["elser_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_elser", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_googleaistudio( + self, + *, + task_type: t.Union[str, t.Literal["completion", "text_embedding"]], + googleaistudio_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googleaistudio"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an Google AI Studio inference endpoint.

+

Create an inference endpoint to perform an inference task with the googleaistudio service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googleaistudio_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googleaistudio`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googleaistudio` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googleaistudio_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googleaistudio_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googleaistudio_inference_id": _quote(googleaistudio_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googleaistudio_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googleaistudio", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_googlevertexai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + googlevertexai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["googlevertexai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Google Vertex AI inference endpoint.

+

Create an inference endpoint to perform an inference task with the googlevertexai service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param googlevertexai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `googlevertexai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `googlevertexai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if googlevertexai_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'googlevertexai_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "googlevertexai_inference_id": _quote(googlevertexai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["googlevertexai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_googlevertexai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_hugging_face( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + huggingface_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["hugging_face"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Hugging Face inference endpoint.

+

Create an inference endpoint to perform an inference task with the hugging_face service.

+

You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. + Select the model you want to use on the new endpoint creation page (for example intfloat/e5-small-v2), then select the sentence embeddings task under the advanced configuration section. + Create the endpoint and copy the URL after the endpoint initialization has been finished.

+

The following models are recommended for the Hugging Face service:

+
    +
  • all-MiniLM-L6-v2
  • +
  • all-MiniLM-L12-v2
  • +
  • all-mpnet-base-v2
  • +
  • e5-base-v2
  • +
  • e5-small-v2
  • +
  • multilingual-e5-base
  • +
  • multilingual-e5-small
  • +
+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param huggingface_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `hugging_face`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `hugging_face` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if huggingface_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'huggingface_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "huggingface_inference_id": _quote(huggingface_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["huggingface_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_hugging_face", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_jinaai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + jinaai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["jinaai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an JinaAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the jinaai service.

+

To review the available rerank models, refer to https://jina.ai/reranker. + To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param jinaai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `jinaai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `jinaai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if jinaai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'jinaai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "jinaai_inference_id": _quote(jinaai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["jinaai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_jinaai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=("service", "service_settings", "chunking_settings"), + ) + def put_mistral( + self, + *, + task_type: t.Union[str, t.Literal["text_embedding"]], + mistral_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["mistral"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a Mistral inference endpoint.

+

Creates an inference endpoint to perform an inference task with the mistral service.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The task type. The only valid task type for the model to perform + is `text_embedding`. + :param mistral_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `mistral`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `mistral` service. + :param chunking_settings: The chunking configuration object. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if mistral_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'mistral_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "mistral_inference_id": _quote(mistral_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["mistral_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_mistral", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_openai( + self, + *, + task_type: t.Union[ + str, t.Literal["chat_completion", "completion", "text_embedding"] + ], + openai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["openai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create an OpenAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

+

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + After creating the endpoint, wait for the model deployment to complete before using it. + To verify the deployment status, use the get trained model statistics API. + Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". + Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + NOTE: The `chat_completion` task type only supports streaming and only through + the _stream API. + :param openai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `openai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `openai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if openai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'openai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "openai_inference_id": _quote(openai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["openai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_openai", + path_parts=__path_parts, + ) + + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_voyageai( + self, + *, + task_type: t.Union[str, t.Literal["rerank", "text_embedding"]], + voyageai_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["voyageai"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

Create a VoyageAI inference endpoint.

+

Create an inference endpoint to perform an inference task with the voyageai service.

+

Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

+ + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param voyageai_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `voyageai`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `voyageai` service. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type you specified. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if voyageai_inference_id in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'voyageai_inference_id'") + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "voyageai_inference_id": _quote(voyageai_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["voyageai_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_voyageai", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=("service", "service_settings"), ) @@ -341,7 +1918,7 @@ def put_watsonx( .. raw:: html

Create a Watsonx inference endpoint.

-

Creates an inference endpoint to perform an inference task with the watsonxai service. +

Create an inference endpoint to perform an inference task with the watsonxai service. You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai inference service. You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.

When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. diff --git a/elasticsearch/_sync/client/security.py b/elasticsearch/_sync/client/security.py index 9dceb7041..e9d13b797 100644 --- a/elasticsearch/_sync/client/security.py +++ b/elasticsearch/_sync/client/security.py @@ -2867,12 +2867,12 @@ def oidc_authenticate( ) @_rewrite_parameters( - body_fields=("access_token", "refresh_token"), + body_fields=("token", "refresh_token"), ) def oidc_logout( self, *, - access_token: t.Optional[str] = None, + token: t.Optional[str] = None, error_trace: t.Optional[bool] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, @@ -2892,11 +2892,11 @@ def oidc_logout( ``_ - :param access_token: The access token to be invalidated. + :param token: The access token to be invalidated. :param refresh_token: The refresh token to be invalidated. """ - if access_token is None and body is None: - raise ValueError("Empty value passed for parameter 'access_token'") + if token is None and body is None: + raise ValueError("Empty value passed for parameter 'token'") __path_parts: t.Dict[str, str] = {} __path = "/_security/oidc/logout" __query: t.Dict[str, t.Any] = {} @@ -2910,8 +2910,8 @@ def oidc_logout( if pretty is not None: __query["pretty"] = pretty if not __body: - if access_token is not None: - __body["access_token"] = access_token + if token is not None: + __body["token"] = token if refresh_token is not None: __body["refresh_token"] = refresh_token __headers = {"accept": "application/json", "content-type": "application/json"} diff --git a/elasticsearch/_sync/client/watcher.py b/elasticsearch/_sync/client/watcher.py index 93d335194..88dbfb718 100644 --- a/elasticsearch/_sync/client/watcher.py +++ b/elasticsearch/_sync/client/watcher.py @@ -845,7 +845,10 @@ def update_settings(

Update Watcher index settings. Update settings for the Watcher internal index (.watches). Only a subset of settings can be modified. - This includes index.auto_expand_replicas and index.number_of_replicas.

+ This includes index.auto_expand_replicas, index.number_of_replicas, index.routing.allocation.exclude.*, + index.routing.allocation.include.* and index.routing.allocation.require.*. + Modification of index.routing.allocation.include._tier_preference is an exception and is not allowed as the + Watcher shards must always be in the data_content tier.

``_ diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index 50f30b405..eb61be48a 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -762,6 +762,11 @@ class Boolean(Field): :arg fielddata: :arg index: :arg null_value: + :arg ignore_malformed: + :arg script: + :arg on_script_error: + :arg time_series_dimension: For internal use by Elastic only. Marks + the field as a time series dimension. Defaults to false. :arg doc_values: :arg copy_to: :arg store: @@ -789,6 +794,10 @@ def __init__( ] = DEFAULT, index: Union[bool, "DefaultType"] = DEFAULT, null_value: Union[bool, "DefaultType"] = DEFAULT, + ignore_malformed: Union[bool, "DefaultType"] = DEFAULT, + script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, + on_script_error: Union[Literal["fail", "continue"], "DefaultType"] = DEFAULT, + time_series_dimension: Union[bool, "DefaultType"] = DEFAULT, doc_values: Union[bool, "DefaultType"] = DEFAULT, copy_to: Union[ Union[str, "InstrumentedField"], @@ -816,6 +825,14 @@ def __init__( kwargs["index"] = index if null_value is not DEFAULT: kwargs["null_value"] = null_value + if ignore_malformed is not DEFAULT: + kwargs["ignore_malformed"] = ignore_malformed + if script is not DEFAULT: + kwargs["script"] = script + if on_script_error is not DEFAULT: + kwargs["on_script_error"] = on_script_error + if time_series_dimension is not DEFAULT: + kwargs["time_series_dimension"] = time_series_dimension if doc_values is not DEFAULT: kwargs["doc_values"] = doc_values if copy_to is not DEFAULT: @@ -1390,11 +1407,29 @@ def __init__( class DenseVector(Field): """ - :arg element_type: - :arg dims: - :arg similarity: - :arg index: - :arg index_options: + :arg dims: Number of vector dimensions. Can't exceed `4096`. If `dims` + is not specified, it will be set to the length of the first vector + added to the field. + :arg element_type: The data type used to encode vectors. The supported + data types are `float` (default), `byte`, and `bit`. Defaults to + `float` if omitted. + :arg index: If `true`, you can search this field using the kNN search + API. Defaults to `True` if omitted. + :arg index_options: An optional section that configures the kNN + indexing algorithm. The HNSW algorithm has two internal parameters + that influence how the data structure is built. These can be + adjusted to improve the accuracy of results, at the expense of + slower indexing speed. This parameter can only be specified when + `index` is `true`. + :arg similarity: The vector similarity metric to use in kNN search. + Documents are ranked by their vector field's similarity to the + query vector. The `_score` of each document will be derived from + the similarity, in a way that ensures scores are positive and that + a larger score corresponds to a higher ranking. Defaults to + `l2_norm` when `element_type` is `bit` otherwise defaults to + `cosine`. `bit` vectors only support `l2_norm` as their + similarity metric. This parameter can only be specified when + `index` is `true`. :arg meta: Metadata about the field. :arg properties: :arg ignore_above: @@ -1413,13 +1448,16 @@ class DenseVector(Field): def __init__( self, *args: Any, - element_type: Union[str, "DefaultType"] = DEFAULT, dims: Union[int, "DefaultType"] = DEFAULT, - similarity: Union[str, "DefaultType"] = DEFAULT, + element_type: Union[Literal["bit", "byte", "float"], "DefaultType"] = DEFAULT, index: Union[bool, "DefaultType"] = DEFAULT, index_options: Union[ "types.DenseVectorIndexOptions", Dict[str, Any], "DefaultType" ] = DEFAULT, + similarity: Union[ + Literal["cosine", "dot_product", "l2_norm", "max_inner_product"], + "DefaultType", + ] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT, ignore_above: Union[int, "DefaultType"] = DEFAULT, @@ -1432,16 +1470,16 @@ def __init__( ] = DEFAULT, **kwargs: Any, ): - if element_type is not DEFAULT: - kwargs["element_type"] = element_type if dims is not DEFAULT: kwargs["dims"] = dims - if similarity is not DEFAULT: - kwargs["similarity"] = similarity + if element_type is not DEFAULT: + kwargs["element_type"] = element_type if index is not DEFAULT: kwargs["index"] = index if index_options is not DEFAULT: kwargs["index_options"] = index_options + if similarity is not DEFAULT: + kwargs["similarity"] = similarity if meta is not DEFAULT: kwargs["meta"] = meta if properties is not DEFAULT: @@ -1905,6 +1943,7 @@ class GeoShape(Field): :arg coerce: :arg ignore_malformed: :arg ignore_z_value: + :arg index: :arg orientation: :arg strategy: :arg doc_values: @@ -1930,6 +1969,7 @@ def __init__( coerce: Union[bool, "DefaultType"] = DEFAULT, ignore_malformed: Union[bool, "DefaultType"] = DEFAULT, ignore_z_value: Union[bool, "DefaultType"] = DEFAULT, + index: Union[bool, "DefaultType"] = DEFAULT, orientation: Union[Literal["right", "left"], "DefaultType"] = DEFAULT, strategy: Union[Literal["recursive", "term"], "DefaultType"] = DEFAULT, doc_values: Union[bool, "DefaultType"] = DEFAULT, @@ -1957,6 +1997,8 @@ def __init__( kwargs["ignore_malformed"] = ignore_malformed if ignore_z_value is not DEFAULT: kwargs["ignore_z_value"] = ignore_z_value + if index is not DEFAULT: + kwargs["index"] = index if orientation is not DEFAULT: kwargs["orientation"] = orientation if strategy is not DEFAULT: @@ -3497,8 +3539,18 @@ def __init__( class SemanticText(Field): """ - :arg inference_id: (required) :arg meta: + :arg inference_id: Inference endpoint that will be used to generate + embeddings for the field. This parameter cannot be updated. Use + the Create inference API to create the endpoint. If + `search_inference_id` is specified, the inference endpoint will + only be used at index time. Defaults to `.elser-2-elasticsearch` + if omitted. + :arg search_inference_id: Inference endpoint that will be used to + generate embeddings at query time. You can update this parameter + by using the Update mapping API. Use the Create inference API to + create the endpoint. If not specified, the inference endpoint + defined by inference_id will be used at both index and query time. """ name = "semantic_text" @@ -3506,14 +3558,17 @@ class SemanticText(Field): def __init__( self, *args: Any, - inference_id: Union[str, "DefaultType"] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, + inference_id: Union[str, "DefaultType"] = DEFAULT, + search_inference_id: Union[str, "DefaultType"] = DEFAULT, **kwargs: Any, ): - if inference_id is not DEFAULT: - kwargs["inference_id"] = inference_id if meta is not DEFAULT: kwargs["meta"] = meta + if inference_id is not DEFAULT: + kwargs["inference_id"] = inference_id + if search_inference_id is not DEFAULT: + kwargs["search_inference_id"] = search_inference_id super().__init__(*args, **kwargs) diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 4ea6d8361..7474769c6 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -364,34 +364,57 @@ def __init__( class DenseVectorIndexOptions(AttrDict[Any]): """ - :arg type: (required) - :arg m: - :arg ef_construction: - :arg confidence_interval: - """ - - type: Union[str, DefaultType] - m: Union[int, DefaultType] - ef_construction: Union[int, DefaultType] + :arg type: (required) The type of kNN algorithm to use. + :arg confidence_interval: The confidence interval to use when + quantizing the vectors. Can be any value between and including + `0.90` and `1.0` or exactly `0`. When the value is `0`, this + indicates that dynamic quantiles should be calculated for + optimized quantization. When between `0.90` and `1.0`, this value + restricts the values used when calculating the quantization + thresholds. For example, a value of `0.95` will only use the + middle `95%` of the values when calculating the quantization + thresholds (e.g. the highest and lowest `2.5%` of values will be + ignored). Defaults to `1/(dims + 1)` for `int8` quantized vectors + and `0` for `int4` for dynamic quantile calculation. Only + applicable to `int8_hnsw`, `int4_hnsw`, `int8_flat`, and + `int4_flat` index types. + :arg ef_construction: The number of candidates to track while + assembling the list of nearest neighbors for each new node. Only + applicable to `hnsw`, `int8_hnsw`, and `int4_hnsw` index types. + Defaults to `100` if omitted. + :arg m: The number of neighbors each node will be connected to in the + HNSW graph. Only applicable to `hnsw`, `int8_hnsw`, and + `int4_hnsw` index types. Defaults to `16` if omitted. + """ + + type: Union[ + Literal["flat", "hnsw", "int4_flat", "int4_hnsw", "int8_flat", "int8_hnsw"], + DefaultType, + ] confidence_interval: Union[float, DefaultType] + ef_construction: Union[int, DefaultType] + m: Union[int, DefaultType] def __init__( self, *, - type: Union[str, DefaultType] = DEFAULT, - m: Union[int, DefaultType] = DEFAULT, - ef_construction: Union[int, DefaultType] = DEFAULT, + type: Union[ + Literal["flat", "hnsw", "int4_flat", "int4_hnsw", "int8_flat", "int8_hnsw"], + DefaultType, + ] = DEFAULT, confidence_interval: Union[float, DefaultType] = DEFAULT, + ef_construction: Union[int, DefaultType] = DEFAULT, + m: Union[int, DefaultType] = DEFAULT, **kwargs: Any, ): if type is not DEFAULT: kwargs["type"] = type - if m is not DEFAULT: - kwargs["m"] = m - if ef_construction is not DEFAULT: - kwargs["ef_construction"] = ef_construction if confidence_interval is not DEFAULT: kwargs["confidence_interval"] = confidence_interval + if ef_construction is not DEFAULT: + kwargs["ef_construction"] = ef_construction + if m is not DEFAULT: + kwargs["m"] = m super().__init__(kwargs)