diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 35d993d8..82353ebd 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -39,6 +39,7 @@ providers, telemetry, vector_io, + benchmarks, eval_tasks, toolgroups, vector_dbs, @@ -94,6 +95,7 @@ class LlamaStackClient(SyncAPIClient): scoring: scoring.ScoringResource scoring_functions: scoring_functions.ScoringFunctionsResource eval_tasks: eval_tasks.EvalTasksResource + benchmarks: benchmarks.BenchmarksResource with_raw_response: LlamaStackClientWithRawResponse with_streaming_response: LlamaStackClientWithStreamedResponse @@ -176,6 +178,7 @@ def __init__( self.scoring = scoring.ScoringResource(self) self.scoring_functions = scoring_functions.ScoringFunctionsResource(self) self.eval_tasks = eval_tasks.EvalTasksResource(self) + self.benchmarks = benchmarks.BenchmarksResource(self) self.with_raw_response = LlamaStackClientWithRawResponse(self) self.with_streaming_response = LlamaStackClientWithStreamedResponse(self) @@ -310,6 +313,7 @@ class AsyncLlamaStackClient(AsyncAPIClient): scoring: scoring.AsyncScoringResource scoring_functions: scoring_functions.AsyncScoringFunctionsResource eval_tasks: eval_tasks.AsyncEvalTasksResource + benchmarks: benchmarks.AsyncBenchmarksResource with_raw_response: AsyncLlamaStackClientWithRawResponse with_streaming_response: AsyncLlamaStackClientWithStreamedResponse @@ -392,6 +396,7 @@ def __init__( self.scoring = scoring.AsyncScoringResource(self) self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self) self.eval_tasks = eval_tasks.AsyncEvalTasksResource(self) + self.benchmarks = benchmarks.AsyncBenchmarksResource(self) self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self) self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self) @@ -529,6 +534,7 @@ def __init__(self, client: LlamaStackClient) -> None: self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring) self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions) self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks) + self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks) class AsyncLlamaStackClientWithRawResponse: @@ -560,6 +566,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: client.scoring_functions ) self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks) + self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks) class LlamaStackClientWithStreamedResponse: @@ -591,6 +598,7 @@ def __init__(self, client: LlamaStackClient) -> None: client.scoring_functions ) self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks) + self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks) class AsyncLlamaStackClientWithStreamedResponse: @@ -624,6 +632,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: client.scoring_functions ) self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks) + self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks) Client = LlamaStackClient diff --git a/src/llama_stack_client/_decoders/jsonl.py b/src/llama_stack_client/_decoders/jsonl.py index e9d29a1c..ac5ac74f 100644 --- a/src/llama_stack_client/_decoders/jsonl.py +++ b/src/llama_stack_client/_decoders/jsonl.py @@ -17,11 +17,15 @@ class JSONLDecoder(Generic[_T]): into a given type. """ - http_response: httpx.Response | None + http_response: httpx.Response """The HTTP response this decoder was constructed from""" def __init__( - self, *, raw_iterator: Iterator[bytes], line_type: type[_T], http_response: httpx.Response | None + self, + *, + raw_iterator: Iterator[bytes], + line_type: type[_T], + http_response: httpx.Response, ) -> None: super().__init__() self.http_response = http_response @@ -29,6 +33,13 @@ def __init__( self._line_type = line_type self._iterator = self.__decode__() + def close(self) -> None: + """Close the response body stream. + + This is called automatically if you consume the entire stream. + """ + self.http_response.close() + def __decode__(self) -> Iterator[_T]: buf = b"" for chunk in self._raw_iterator: @@ -63,10 +74,14 @@ class AsyncJSONLDecoder(Generic[_T]): into a given type. """ - http_response: httpx.Response | None + http_response: httpx.Response def __init__( - self, *, raw_iterator: AsyncIterator[bytes], line_type: type[_T], http_response: httpx.Response | None + self, + *, + raw_iterator: AsyncIterator[bytes], + line_type: type[_T], + http_response: httpx.Response, ) -> None: super().__init__() self.http_response = http_response @@ -74,6 +89,13 @@ def __init__( self._line_type = line_type self._iterator = self.__decode__() + async def close(self) -> None: + """Close the response body stream. + + This is called automatically if you consume the entire stream. + """ + await self.http_response.aclose() + async def __decode__(self) -> AsyncIterator[_T]: buf = b"" async for chunk in self._raw_iterator: diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_client/_models.py index 12c34b7d..c4401ff8 100644 --- a/src/llama_stack_client/_models.py +++ b/src/llama_stack_client/_models.py @@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object: If the given value does not match the expected type then it is returned as-is. """ + + # store a reference to the original type we were given before we extract any inner + # types so that we can properly resolve forward references in `TypeAliasType` annotations + original_type = None + # we allow `object` as the input type because otherwise, passing things like # `Literal['value']` will be reported as a type error by type checkers type_ = cast("type[object]", type_) if is_type_alias_type(type_): + original_type = type_ # type: ignore[unreachable] type_ = type_.__value__ # type: ignore[unreachable] # unwrap `Annotated[T, ...]` -> `T` @@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object: if is_union(origin): try: - return validate_type(type_=cast("type[object]", type_), value=value) + return validate_type(type_=cast("type[object]", original_type or type_), value=value) except Exception: pass diff --git a/src/llama_stack_client/_response.py b/src/llama_stack_client/_response.py index d7e58fbe..ea35182f 100644 --- a/src/llama_stack_client/_response.py +++ b/src/llama_stack_client/_response.py @@ -144,7 +144,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: return cast( R, cast("type[JSONLDecoder[Any]]", cast_to)( - raw_iterator=self.http_response.iter_bytes(chunk_size=4096), + raw_iterator=self.http_response.iter_bytes(chunk_size=64), line_type=extract_type_arg(cast_to, 0), http_response=self.http_response, ), @@ -154,7 +154,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: return cast( R, cast("type[AsyncJSONLDecoder[Any]]", cast_to)( - raw_iterator=self.http_response.aiter_bytes(chunk_size=4096), + raw_iterator=self.http_response.aiter_bytes(chunk_size=64), line_type=extract_type_arg(cast_to, 0), http_response=self.http_response, ), diff --git a/src/llama_stack_client/_utils/_transform.py b/src/llama_stack_client/_utils/_transform.py index a6b62cad..18afd9d8 100644 --- a/src/llama_stack_client/_utils/_transform.py +++ b/src/llama_stack_client/_utils/_transform.py @@ -25,7 +25,7 @@ is_annotated_type, strip_annotated_type, ) -from .._compat import model_dump, is_typeddict +from .._compat import get_origin, model_dump, is_typeddict _T = TypeVar("_T") @@ -164,9 +164,14 @@ def _transform_recursive( inner_type = annotation stripped_type = strip_annotated_type(inner_type) + origin = get_origin(stripped_type) or stripped_type if is_typeddict(stripped_type) and is_mapping(data): return _transform_typeddict(data, stripped_type) + if origin == dict and is_mapping(data): + items_type = get_args(stripped_type)[1] + return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()} + if ( # List[T] (is_list_type(stripped_type) and is_list(data)) @@ -307,9 +312,14 @@ async def _async_transform_recursive( inner_type = annotation stripped_type = strip_annotated_type(inner_type) + origin = get_origin(stripped_type) or stripped_type if is_typeddict(stripped_type) and is_mapping(data): return await _async_transform_typeddict(data, stripped_type) + if origin == dict and is_mapping(data): + items_type = get_args(stripped_type)[1] + return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()} + if ( # List[T] (is_list_type(stripped_type) and is_list(data)) diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py index 42188633..b5e449c9 100644 --- a/src/llama_stack_client/resources/__init__.py +++ b/src/llama_stack_client/resources/__init__.py @@ -120,6 +120,14 @@ VectorIoResourceWithStreamingResponse, AsyncVectorIoResourceWithStreamingResponse, ) +from .benchmarks import ( + BenchmarksResource, + AsyncBenchmarksResource, + BenchmarksResourceWithRawResponse, + AsyncBenchmarksResourceWithRawResponse, + BenchmarksResourceWithStreamingResponse, + AsyncBenchmarksResourceWithStreamingResponse, +) from .eval_tasks import ( EvalTasksResource, AsyncEvalTasksResource, @@ -324,4 +332,10 @@ "AsyncEvalTasksResourceWithRawResponse", "EvalTasksResourceWithStreamingResponse", "AsyncEvalTasksResourceWithStreamingResponse", + "BenchmarksResource", + "AsyncBenchmarksResource", + "BenchmarksResourceWithRawResponse", + "AsyncBenchmarksResourceWithRawResponse", + "BenchmarksResourceWithStreamingResponse", + "AsyncBenchmarksResourceWithStreamingResponse", ] diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py new file mode 100644 index 00000000..fe05e518 --- /dev/null +++ b/src/llama_stack_client/resources/benchmarks.py @@ -0,0 +1,328 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Type, Union, Iterable, Optional, cast + +import httpx + +from ..types import benchmark_register_params +from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from .._utils import ( + maybe_transform, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._wrappers import DataWrapper +from .._base_client import make_request_options +from ..types.benchmark import Benchmark +from ..types.benchmark_list_response import BenchmarkListResponse + +__all__ = ["BenchmarksResource", "AsyncBenchmarksResource"] + + +class BenchmarksResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> BenchmarksResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return BenchmarksResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> BenchmarksResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return BenchmarksResourceWithStreamingResponse(self) + + def retrieve( + self, + benchmark_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Optional[Benchmark]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return self._get( + f"/v1/eval/benchmarks/{benchmark_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Benchmark, + ) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> BenchmarkListResponse: + return self._get( + "/v1/eval/benchmarks", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=DataWrapper[BenchmarkListResponse]._unwrapper, + ), + cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]), + ) + + def register( + self, + *, + benchmark_id: str, + dataset_id: str, + scoring_functions: List[str], + metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + provider_benchmark_id: str | NotGiven = NOT_GIVEN, + provider_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return self._post( + "/v1/eval/benchmarks", + body=maybe_transform( + { + "benchmark_id": benchmark_id, + "dataset_id": dataset_id, + "scoring_functions": scoring_functions, + "metadata": metadata, + "provider_benchmark_id": provider_benchmark_id, + "provider_id": provider_id, + }, + benchmark_register_params.BenchmarkRegisterParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncBenchmarksResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncBenchmarksResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncBenchmarksResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncBenchmarksResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncBenchmarksResourceWithStreamingResponse(self) + + async def retrieve( + self, + benchmark_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Optional[Benchmark]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return await self._get( + f"/v1/eval/benchmarks/{benchmark_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Benchmark, + ) + + async def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> BenchmarkListResponse: + return await self._get( + "/v1/eval/benchmarks", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=DataWrapper[BenchmarkListResponse]._unwrapper, + ), + cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]), + ) + + async def register( + self, + *, + benchmark_id: str, + dataset_id: str, + scoring_functions: List[str], + metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + provider_benchmark_id: str | NotGiven = NOT_GIVEN, + provider_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return await self._post( + "/v1/eval/benchmarks", + body=await async_maybe_transform( + { + "benchmark_id": benchmark_id, + "dataset_id": dataset_id, + "scoring_functions": scoring_functions, + "metadata": metadata, + "provider_benchmark_id": provider_benchmark_id, + "provider_id": provider_id, + }, + benchmark_register_params.BenchmarkRegisterParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class BenchmarksResourceWithRawResponse: + def __init__(self, benchmarks: BenchmarksResource) -> None: + self._benchmarks = benchmarks + + self.retrieve = to_raw_response_wrapper( + benchmarks.retrieve, + ) + self.list = to_raw_response_wrapper( + benchmarks.list, + ) + self.register = to_raw_response_wrapper( + benchmarks.register, + ) + + +class AsyncBenchmarksResourceWithRawResponse: + def __init__(self, benchmarks: AsyncBenchmarksResource) -> None: + self._benchmarks = benchmarks + + self.retrieve = async_to_raw_response_wrapper( + benchmarks.retrieve, + ) + self.list = async_to_raw_response_wrapper( + benchmarks.list, + ) + self.register = async_to_raw_response_wrapper( + benchmarks.register, + ) + + +class BenchmarksResourceWithStreamingResponse: + def __init__(self, benchmarks: BenchmarksResource) -> None: + self._benchmarks = benchmarks + + self.retrieve = to_streamed_response_wrapper( + benchmarks.retrieve, + ) + self.list = to_streamed_response_wrapper( + benchmarks.list, + ) + self.register = to_streamed_response_wrapper( + benchmarks.register, + ) + + +class AsyncBenchmarksResourceWithStreamingResponse: + def __init__(self, benchmarks: AsyncBenchmarksResource) -> None: + self._benchmarks = benchmarks + + self.retrieve = async_to_streamed_response_wrapper( + benchmarks.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + benchmarks.list, + ) + self.register = async_to_streamed_response_wrapper( + benchmarks.register, + ) diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py index 59457a45..144769f9 100644 --- a/src/llama_stack_client/resources/datasets.py +++ b/src/llama_stack_client/resources/datasets.py @@ -22,7 +22,6 @@ ) from .._wrappers import DataWrapper from .._base_client import make_request_options -from ..types.shared_params.url import URL from ..types.dataset_list_response import DatasetListResponse from ..types.shared_params.param_type import ParamType from ..types.dataset_retrieve_response import DatasetRetrieveResponse @@ -108,7 +107,7 @@ def register( *, dataset_id: str, dataset_schema: Dict[str, ParamType], - url: URL, + url: dataset_register_params.URL, metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, provider_dataset_id: str | NotGiven = NOT_GIVEN, provider_id: str | NotGiven = NOT_GIVEN, @@ -260,7 +259,7 @@ async def register( *, dataset_id: str, dataset_schema: Dict[str, ParamType], - url: URL, + url: dataset_register_params.URL, metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, provider_dataset_id: str | NotGiven = NOT_GIVEN, provider_id: str | NotGiven = NOT_GIVEN, diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py index 7795064a..053d2398 100644 --- a/src/llama_stack_client/resources/eval/eval.py +++ b/src/llama_stack_client/resources/eval/eval.py @@ -14,7 +14,12 @@ JobsResourceWithStreamingResponse, AsyncJobsResourceWithStreamingResponse, ) -from ...types import eval_run_eval_params, eval_evaluate_rows_params +from ...types import ( + eval_run_eval_params, + eval_evaluate_rows_params, + eval_run_eval_alpha_params, + eval_evaluate_rows_alpha_params, +) from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven from ..._utils import ( maybe_transform, @@ -31,7 +36,7 @@ from ...types.job import Job from ..._base_client import make_request_options from ...types.evaluate_response import EvaluateResponse -from ...types.eval_task_config_param import EvalTaskConfigParam +from ...types.benchmark_config_param import BenchmarkConfigParam __all__ = ["EvalResource", "AsyncEvalResource"] @@ -66,7 +71,7 @@ def evaluate_rows( *, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: EvalTaskConfigParam, + task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -102,11 +107,53 @@ def evaluate_rows( cast_to=EvaluateResponse, ) + def evaluate_rows_alpha( + self, + benchmark_id: str, + *, + input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], + scoring_functions: List[str], + task_config: BenchmarkConfigParam, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EvaluateResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return self._post( + f"/v1/eval/benchmarks/{benchmark_id}/evaluations", + body=maybe_transform( + { + "input_rows": input_rows, + "scoring_functions": scoring_functions, + "task_config": task_config, + }, + eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EvaluateResponse, + ) + def run_eval( self, task_id: str, *, - task_config: EvalTaskConfigParam, + task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -135,6 +182,39 @@ def run_eval( cast_to=Job, ) + def run_eval_alpha( + self, + benchmark_id: str, + *, + task_config: BenchmarkConfigParam, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Job: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return self._post( + f"/v1/eval/benchmarks/{benchmark_id}/jobs", + body=maybe_transform({"task_config": task_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Job, + ) + class AsyncEvalResource(AsyncAPIResource): @cached_property @@ -166,7 +246,7 @@ async def evaluate_rows( *, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], - task_config: EvalTaskConfigParam, + task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -202,11 +282,53 @@ async def evaluate_rows( cast_to=EvaluateResponse, ) + async def evaluate_rows_alpha( + self, + benchmark_id: str, + *, + input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], + scoring_functions: List[str], + task_config: BenchmarkConfigParam, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EvaluateResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return await self._post( + f"/v1/eval/benchmarks/{benchmark_id}/evaluations", + body=await async_maybe_transform( + { + "input_rows": input_rows, + "scoring_functions": scoring_functions, + "task_config": task_config, + }, + eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EvaluateResponse, + ) + async def run_eval( self, task_id: str, *, - task_config: EvalTaskConfigParam, + task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -235,6 +357,41 @@ async def run_eval( cast_to=Job, ) + async def run_eval_alpha( + self, + benchmark_id: str, + *, + task_config: BenchmarkConfigParam, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Job: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") + return await self._post( + f"/v1/eval/benchmarks/{benchmark_id}/jobs", + body=await async_maybe_transform( + {"task_config": task_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Job, + ) + class EvalResourceWithRawResponse: def __init__(self, eval: EvalResource) -> None: @@ -243,9 +400,15 @@ def __init__(self, eval: EvalResource) -> None: self.evaluate_rows = to_raw_response_wrapper( eval.evaluate_rows, ) + self.evaluate_rows_alpha = to_raw_response_wrapper( + eval.evaluate_rows_alpha, + ) self.run_eval = to_raw_response_wrapper( eval.run_eval, ) + self.run_eval_alpha = to_raw_response_wrapper( + eval.run_eval_alpha, + ) @cached_property def jobs(self) -> JobsResourceWithRawResponse: @@ -259,9 +422,15 @@ def __init__(self, eval: AsyncEvalResource) -> None: self.evaluate_rows = async_to_raw_response_wrapper( eval.evaluate_rows, ) + self.evaluate_rows_alpha = async_to_raw_response_wrapper( + eval.evaluate_rows_alpha, + ) self.run_eval = async_to_raw_response_wrapper( eval.run_eval, ) + self.run_eval_alpha = async_to_raw_response_wrapper( + eval.run_eval_alpha, + ) @cached_property def jobs(self) -> AsyncJobsResourceWithRawResponse: @@ -275,9 +444,15 @@ def __init__(self, eval: EvalResource) -> None: self.evaluate_rows = to_streamed_response_wrapper( eval.evaluate_rows, ) + self.evaluate_rows_alpha = to_streamed_response_wrapper( + eval.evaluate_rows_alpha, + ) self.run_eval = to_streamed_response_wrapper( eval.run_eval, ) + self.run_eval_alpha = to_streamed_response_wrapper( + eval.run_eval_alpha, + ) @cached_property def jobs(self) -> JobsResourceWithStreamingResponse: @@ -291,9 +466,15 @@ def __init__(self, eval: AsyncEvalResource) -> None: self.evaluate_rows = async_to_streamed_response_wrapper( eval.evaluate_rows, ) + self.evaluate_rows_alpha = async_to_streamed_response_wrapper( + eval.evaluate_rows_alpha, + ) self.run_eval = async_to_streamed_response_wrapper( eval.run_eval, ) + self.run_eval_alpha = async_to_streamed_response_wrapper( + eval.run_eval_alpha, + ) @cached_property def jobs(self) -> AsyncJobsResourceWithStreamingResponse: diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/eval/jobs.py index ba3e0777..d4d13f42 100644 --- a/src/llama_stack_client/resources/eval/jobs.py +++ b/src/llama_stack_client/resources/eval/jobs.py @@ -46,7 +46,7 @@ def retrieve( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -64,12 +64,12 @@ def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._get( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}/result", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -80,7 +80,7 @@ def cancel( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -98,13 +98,13 @@ def cancel( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") extra_headers = {"Accept": "*/*", **(extra_headers or {})} return self._delete( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -115,7 +115,7 @@ def status( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -133,12 +133,12 @@ def status( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return self._get( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -170,7 +170,7 @@ async def retrieve( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -188,12 +188,12 @@ async def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._get( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}/result", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -204,7 +204,7 @@ async def cancel( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -222,13 +222,13 @@ async def cancel( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") extra_headers = {"Accept": "*/*", **(extra_headers or {})} return await self._delete( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -239,7 +239,7 @@ async def status( self, job_id: str, *, - task_id: str, + benchmark_id: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -257,12 +257,12 @@ async def status( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") if not job_id: raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") return await self._get( - f"/v1/eval/tasks/{task_id}/jobs/{job_id}", + f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/llama_stack_client/resources/eval_tasks.py b/src/llama_stack_client/resources/eval_tasks.py index 82a07839..40dbe8f2 100644 --- a/src/llama_stack_client/resources/eval_tasks.py +++ b/src/llama_stack_client/resources/eval_tasks.py @@ -22,8 +22,8 @@ ) from .._wrappers import DataWrapper from .._base_client import make_request_options -from ..types.eval_task import EvalTask -from ..types.eval_task_list_response import EvalTaskListResponse +from ..types.benchmark import Benchmark +from ..types.benchmark_list_response import BenchmarkListResponse __all__ = ["EvalTasksResource", "AsyncEvalTasksResource"] @@ -58,7 +58,7 @@ def retrieve( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Optional[EvalTask]: + ) -> Optional[Benchmark]: """ Args: extra_headers: Send extra headers @@ -76,7 +76,7 @@ def retrieve( options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), - cast_to=EvalTask, + cast_to=Benchmark, ) def list( @@ -88,7 +88,7 @@ def list( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> EvalTaskListResponse: + ) -> BenchmarkListResponse: return self._get( "/v1/eval-tasks", options=make_request_options( @@ -96,9 +96,9 @@ def list( extra_query=extra_query, extra_body=extra_body, timeout=timeout, - post_parser=DataWrapper[EvalTaskListResponse]._unwrapper, + post_parser=DataWrapper[BenchmarkListResponse]._unwrapper, ), - cast_to=cast(Type[EvalTaskListResponse], DataWrapper[EvalTaskListResponse]), + cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]), ) def register( @@ -108,7 +108,7 @@ def register( eval_task_id: str, scoring_functions: List[str], metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, - provider_eval_task_id: str | NotGiven = NOT_GIVEN, + provider_benchmark_id: str | NotGiven = NOT_GIVEN, provider_id: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -136,7 +136,7 @@ def register( "eval_task_id": eval_task_id, "scoring_functions": scoring_functions, "metadata": metadata, - "provider_eval_task_id": provider_eval_task_id, + "provider_benchmark_id": provider_benchmark_id, "provider_id": provider_id, }, eval_task_register_params.EvalTaskRegisterParams, @@ -178,7 +178,7 @@ async def retrieve( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Optional[EvalTask]: + ) -> Optional[Benchmark]: """ Args: extra_headers: Send extra headers @@ -196,7 +196,7 @@ async def retrieve( options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), - cast_to=EvalTask, + cast_to=Benchmark, ) async def list( @@ -208,7 +208,7 @@ async def list( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> EvalTaskListResponse: + ) -> BenchmarkListResponse: return await self._get( "/v1/eval-tasks", options=make_request_options( @@ -216,9 +216,9 @@ async def list( extra_query=extra_query, extra_body=extra_body, timeout=timeout, - post_parser=DataWrapper[EvalTaskListResponse]._unwrapper, + post_parser=DataWrapper[BenchmarkListResponse]._unwrapper, ), - cast_to=cast(Type[EvalTaskListResponse], DataWrapper[EvalTaskListResponse]), + cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]), ) async def register( @@ -228,7 +228,7 @@ async def register( eval_task_id: str, scoring_functions: List[str], metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, - provider_eval_task_id: str | NotGiven = NOT_GIVEN, + provider_benchmark_id: str | NotGiven = NOT_GIVEN, provider_id: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -256,7 +256,7 @@ async def register( "eval_task_id": eval_task_id, "scoring_functions": scoring_functions, "metadata": metadata, - "provider_eval_task_id": provider_eval_task_id, + "provider_benchmark_id": provider_benchmark_id, "provider_id": provider_id, }, eval_task_register_params.EvalTaskRegisterParams, diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py index f3b92a74..2bd7347b 100644 --- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py +++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py @@ -31,7 +31,6 @@ from ..._base_client import make_request_options from ...types.tool_def import ToolDef from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder -from ...types.shared_params.url import URL from ...types.tool_invocation_result import ToolInvocationResult __all__ = ["ToolRuntimeResource", "AsyncToolRuntimeResource"] @@ -103,7 +102,7 @@ def invoke_tool( def list_tools( self, *, - mcp_endpoint: URL | NotGiven = NOT_GIVEN, + mcp_endpoint: tool_runtime_list_tools_params.McpEndpoint | NotGiven = NOT_GIVEN, tool_group_id: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -209,7 +208,7 @@ async def invoke_tool( async def list_tools( self, *, - mcp_endpoint: URL | NotGiven = NOT_GIVEN, + mcp_endpoint: tool_runtime_list_tools_params.McpEndpoint | NotGiven = NOT_GIVEN, tool_group_id: str | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py index 234be628..6a9b79d0 100644 --- a/src/llama_stack_client/resources/toolgroups.py +++ b/src/llama_stack_client/resources/toolgroups.py @@ -23,7 +23,6 @@ from .._wrappers import DataWrapper from .._base_client import make_request_options from ..types.tool_group import ToolGroup -from ..types.shared_params.url import URL from ..types.toolgroup_list_response import ToolgroupListResponse __all__ = ["ToolgroupsResource", "AsyncToolgroupsResource"] @@ -109,7 +108,7 @@ def register( provider_id: str, toolgroup_id: str, args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, - mcp_endpoint: URL | NotGiven = NOT_GIVEN, + mcp_endpoint: toolgroup_register_params.McpEndpoint | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -262,7 +261,7 @@ async def register( provider_id: str, toolgroup_id: str, args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, - mcp_endpoint: URL | NotGiven = NOT_GIVEN, + mcp_endpoint: toolgroup_register_params.McpEndpoint | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index 45824a74..bc94eb13 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -7,7 +7,6 @@ from .model import Model as Model from .trace import Trace as Trace from .shared import ( - URL as URL, Message as Message, Document as Document, ToolCall as ToolCall, @@ -24,6 +23,7 @@ SamplingParams as SamplingParams, BatchCompletion as BatchCompletion, SafetyViolation as SafetyViolation, + ToolCallOrString as ToolCallOrString, CompletionMessage as CompletionMessage, InterleavedContent as InterleavedContent, ToolParamDefinition as ToolParamDefinition, @@ -34,7 +34,7 @@ ) from .shield import Shield as Shield from .tool_def import ToolDef as ToolDef -from .eval_task import EvalTask as EvalTask +from .benchmark import Benchmark as Benchmark from .route_info import RouteInfo as RouteInfo from .scoring_fn import ScoringFn as ScoringFn from .tool_group import ToolGroup as ToolGroup @@ -77,13 +77,14 @@ from .query_chunks_response import QueryChunksResponse as QueryChunksResponse from .query_condition_param import QueryConditionParam as QueryConditionParam from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam -from .eval_task_config_param import EvalTaskConfigParam as EvalTaskConfigParam +from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse from .provider_list_response import ProviderListResponse as ProviderListResponse from .scoring_score_response import ScoringScoreResponse as ScoringScoreResponse from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams +from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams from .eval_task_list_response import EvalTaskListResponse as EvalTaskListResponse from .list_providers_response import ListProvidersResponse as ListProvidersResponse @@ -91,15 +92,17 @@ from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse from .vector_db_list_response import VectorDBListResponse as VectorDBListResponse from .vector_io_insert_params import VectorIoInsertParams as VectorIoInsertParams -from .list_eval_tasks_response import ListEvalTasksResponse as ListEvalTasksResponse +from .list_benchmarks_response import ListBenchmarksResponse as ListBenchmarksResponse from .list_vector_dbs_response import ListVectorDBsResponse as ListVectorDBsResponse from .safety_run_shield_params import SafetyRunShieldParams as SafetyRunShieldParams +from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams from .eval_task_register_params import EvalTaskRegisterParams as EvalTaskRegisterParams from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams +from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams from .inference_completion_params import InferenceCompletionParams as InferenceCompletionParams @@ -115,6 +118,7 @@ from .telemetry_get_span_tree_params import TelemetryGetSpanTreeParams as TelemetryGetSpanTreeParams from .telemetry_query_spans_response import TelemetryQuerySpansResponse as TelemetryQuerySpansResponse from .tool_runtime_list_tools_params import ToolRuntimeListToolsParams as ToolRuntimeListToolsParams +from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams from .list_scoring_functions_response import ListScoringFunctionsResponse as ListScoringFunctionsResponse from .telemetry_query_traces_response import TelemetryQueryTracesResponse as TelemetryQueryTracesResponse from .tool_runtime_invoke_tool_params import ToolRuntimeInvokeToolParams as ToolRuntimeInvokeToolParams diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py index 2ead7bbe..408d6446 100644 --- a/src/llama_stack_client/types/agents/turn.py +++ b/src/llama_stack_client/types/agents/turn.py @@ -6,7 +6,6 @@ from ..._utils import PropertyInfo from ..._models import BaseModel -from ..shared.url import URL from ..inference_step import InferenceStep from ..shield_call_step import ShieldCallStep from ..shared.user_message import UserMessage @@ -24,7 +23,9 @@ "OutputAttachmentContent", "OutputAttachmentContentImageContentItem", "OutputAttachmentContentImageContentItemImage", + "OutputAttachmentContentImageContentItemImageURL", "OutputAttachmentContentTextContentItem", + "OutputAttachmentContentURL", ] InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage] @@ -35,11 +36,15 @@ ] +class OutputAttachmentContentImageContentItemImageURL(BaseModel): + uri: str + + class OutputAttachmentContentImageContentItemImage(BaseModel): data: Optional[str] = None """base64 encoded image data as string""" - url: Optional[URL] = None + url: Optional[OutputAttachmentContentImageContentItemImageURL] = None """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. @@ -62,12 +67,16 @@ class OutputAttachmentContentTextContentItem(BaseModel): """Discriminator type of the content item. Always "text" """ +class OutputAttachmentContentURL(BaseModel): + uri: str + + OutputAttachmentContent: TypeAlias = Union[ str, OutputAttachmentContentImageContentItem, OutputAttachmentContentTextContentItem, List[InterleavedContentItem], - URL, + OutputAttachmentContentURL, ] diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py index fee300dd..92040b56 100644 --- a/src/llama_stack_client/types/agents/turn_create_params.py +++ b/src/llama_stack_client/types/agents/turn_create_params.py @@ -5,7 +5,6 @@ from typing import Dict, List, Union, Iterable from typing_extensions import Literal, Required, TypeAlias, TypedDict -from ..shared_params.url import URL from ..shared_params.user_message import UserMessage from ..shared_params.tool_response_message import ToolResponseMessage from ..shared_params.interleaved_content_item import InterleavedContentItem @@ -17,7 +16,9 @@ "DocumentContent", "DocumentContentImageContentItem", "DocumentContentImageContentItemImage", + "DocumentContentImageContentItemImageURL", "DocumentContentTextContentItem", + "DocumentContentURL", "ToolConfig", "Toolgroup", "ToolgroupUnionMember1", @@ -42,11 +43,15 @@ class TurnCreateParamsBase(TypedDict, total=False): Message: TypeAlias = Union[UserMessage, ToolResponseMessage] +class DocumentContentImageContentItemImageURL(TypedDict, total=False): + uri: Required[str] + + class DocumentContentImageContentItemImage(TypedDict, total=False): data: str """base64 encoded image data as string""" - url: URL + url: DocumentContentImageContentItemImageURL """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. @@ -69,8 +74,16 @@ class DocumentContentTextContentItem(TypedDict, total=False): """Discriminator type of the content item. Always "text" """ +class DocumentContentURL(TypedDict, total=False): + uri: Required[str] + + DocumentContent: TypeAlias = Union[ - str, DocumentContentImageContentItem, DocumentContentTextContentItem, Iterable[InterleavedContentItem], URL + str, + DocumentContentImageContentItem, + DocumentContentTextContentItem, + Iterable[InterleavedContentItem], + DocumentContentURL, ] diff --git a/src/llama_stack_client/types/benchmark.py b/src/llama_stack_client/types/benchmark.py new file mode 100644 index 00000000..3af66f6a --- /dev/null +++ b/src/llama_stack_client/types/benchmark.py @@ -0,0 +1,24 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["Benchmark"] + + +class Benchmark(BaseModel): + dataset_id: str + + identifier: str + + metadata: Dict[str, Union[bool, float, str, List[object], object, None]] + + provider_id: str + + provider_resource_id: str + + scoring_functions: List[str] + + type: Literal["benchmark"] diff --git a/src/llama_stack_client/types/benchmark_config_param.py b/src/llama_stack_client/types/benchmark_config_param.py new file mode 100644 index 00000000..48090c5f --- /dev/null +++ b/src/llama_stack_client/types/benchmark_config_param.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict +from typing_extensions import Literal, Required, TypedDict + +from .eval_candidate_param import EvalCandidateParam +from .scoring_fn_params_param import ScoringFnParamsParam + +__all__ = ["BenchmarkConfigParam"] + + +class BenchmarkConfigParam(TypedDict, total=False): + eval_candidate: Required[EvalCandidateParam] + + scoring_params: Required[Dict[str, ScoringFnParamsParam]] + + type: Required[Literal["benchmark"]] + + num_examples: int diff --git a/src/llama_stack_client/types/benchmark_list_response.py b/src/llama_stack_client/types/benchmark_list_response.py new file mode 100644 index 00000000..b2e8ad2b --- /dev/null +++ b/src/llama_stack_client/types/benchmark_list_response.py @@ -0,0 +1,10 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List +from typing_extensions import TypeAlias + +from .benchmark import Benchmark + +__all__ = ["BenchmarkListResponse"] + +BenchmarkListResponse: TypeAlias = List[Benchmark] diff --git a/src/llama_stack_client/types/benchmark_register_params.py b/src/llama_stack_client/types/benchmark_register_params.py new file mode 100644 index 00000000..def970a1 --- /dev/null +++ b/src/llama_stack_client/types/benchmark_register_params.py @@ -0,0 +1,22 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable +from typing_extensions import Required, TypedDict + +__all__ = ["BenchmarkRegisterParams"] + + +class BenchmarkRegisterParams(TypedDict, total=False): + benchmark_id: Required[str] + + dataset_id: Required[str] + + scoring_functions: Required[List[str]] + + metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + + provider_benchmark_id: str + + provider_id: str diff --git a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py index 99916add..f032c233 100644 --- a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py +++ b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py @@ -1,13 +1,14 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Optional +from typing import Dict, List, Union, Optional +from datetime import datetime from typing_extensions import Literal from .._models import BaseModel from .token_log_probs import TokenLogProbs from .shared.content_delta import ContentDelta -__all__ = ["ChatCompletionResponseStreamChunk", "Event"] +__all__ = ["ChatCompletionResponseStreamChunk", "Event", "Metric"] class Event(BaseModel): @@ -27,6 +28,26 @@ class Event(BaseModel): """Optional reason why generation stopped, if complete""" +class Metric(BaseModel): + metric: str + + span_id: str + + timestamp: datetime + + trace_id: str + + type: Literal["metric"] + + unit: str + + value: float + + attributes: Optional[Dict[str, Union[str, float, bool, None]]] = None + + class ChatCompletionResponseStreamChunk(BaseModel): event: Event """The event containing the new content""" + + metrics: Optional[List[Metric]] = None diff --git a/src/llama_stack_client/types/dataset_list_response.py b/src/llama_stack_client/types/dataset_list_response.py index 0051669b..1dc2afa4 100644 --- a/src/llama_stack_client/types/dataset_list_response.py +++ b/src/llama_stack_client/types/dataset_list_response.py @@ -4,10 +4,13 @@ from typing_extensions import Literal, TypeAlias from .._models import BaseModel -from .shared.url import URL from .shared.param_type import ParamType -__all__ = ["DatasetListResponse", "DatasetListResponseItem"] +__all__ = ["DatasetListResponse", "DatasetListResponseItem", "DatasetListResponseItemURL"] + + +class DatasetListResponseItemURL(BaseModel): + uri: str class DatasetListResponseItem(BaseModel): @@ -23,7 +26,7 @@ class DatasetListResponseItem(BaseModel): type: Literal["dataset"] - url: URL + url: DatasetListResponseItemURL DatasetListResponse: TypeAlias = List[DatasetListResponseItem] diff --git a/src/llama_stack_client/types/dataset_register_params.py b/src/llama_stack_client/types/dataset_register_params.py index 853485a6..1c1cf234 100644 --- a/src/llama_stack_client/types/dataset_register_params.py +++ b/src/llama_stack_client/types/dataset_register_params.py @@ -5,10 +5,9 @@ from typing import Dict, Union, Iterable from typing_extensions import Required, TypedDict -from .shared_params.url import URL from .shared_params.param_type import ParamType -__all__ = ["DatasetRegisterParams"] +__all__ = ["DatasetRegisterParams", "URL"] class DatasetRegisterParams(TypedDict, total=False): @@ -23,3 +22,7 @@ class DatasetRegisterParams(TypedDict, total=False): provider_dataset_id: str provider_id: str + + +class URL(TypedDict, total=False): + uri: Required[str] diff --git a/src/llama_stack_client/types/dataset_retrieve_response.py b/src/llama_stack_client/types/dataset_retrieve_response.py index 31d7ab33..bd819a56 100644 --- a/src/llama_stack_client/types/dataset_retrieve_response.py +++ b/src/llama_stack_client/types/dataset_retrieve_response.py @@ -4,10 +4,13 @@ from typing_extensions import Literal from .._models import BaseModel -from .shared.url import URL from .shared.param_type import ParamType -__all__ = ["DatasetRetrieveResponse"] +__all__ = ["DatasetRetrieveResponse", "URL"] + + +class URL(BaseModel): + uri: str class DatasetRetrieveResponse(BaseModel): diff --git a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py new file mode 100644 index 00000000..9758e814 --- /dev/null +++ b/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, List, Union, Iterable +from typing_extensions import Required, TypedDict + +from .benchmark_config_param import BenchmarkConfigParam + +__all__ = ["EvalEvaluateRowsAlphaParams"] + + +class EvalEvaluateRowsAlphaParams(TypedDict, total=False): + input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]] + + scoring_functions: Required[List[str]] + + task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/eval_evaluate_rows_params.py index 065764b5..86cdde00 100644 --- a/src/llama_stack_client/types/eval_evaluate_rows_params.py +++ b/src/llama_stack_client/types/eval_evaluate_rows_params.py @@ -5,7 +5,7 @@ from typing import Dict, List, Union, Iterable from typing_extensions import Required, TypedDict -from .eval_task_config_param import EvalTaskConfigParam +from .benchmark_config_param import BenchmarkConfigParam __all__ = ["EvalEvaluateRowsParams"] @@ -15,4 +15,4 @@ class EvalEvaluateRowsParams(TypedDict, total=False): scoring_functions: Required[List[str]] - task_config: Required[EvalTaskConfigParam] + task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/eval_run_eval_alpha_params.py new file mode 100644 index 00000000..3ca2521a --- /dev/null +++ b/src/llama_stack_client/types/eval_run_eval_alpha_params.py @@ -0,0 +1,13 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +from .benchmark_config_param import BenchmarkConfigParam + +__all__ = ["EvalRunEvalAlphaParams"] + + +class EvalRunEvalAlphaParams(TypedDict, total=False): + task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/eval_run_eval_params.py index 9ee91af8..a5715f29 100644 --- a/src/llama_stack_client/types/eval_run_eval_params.py +++ b/src/llama_stack_client/types/eval_run_eval_params.py @@ -4,10 +4,10 @@ from typing_extensions import Required, TypedDict -from .eval_task_config_param import EvalTaskConfigParam +from .benchmark_config_param import BenchmarkConfigParam __all__ = ["EvalRunEvalParams"] class EvalRunEvalParams(TypedDict, total=False): - task_config: Required[EvalTaskConfigParam] + task_config: Required[BenchmarkConfigParam] diff --git a/src/llama_stack_client/types/eval_task_list_response.py b/src/llama_stack_client/types/eval_task_list_response.py index 11646563..c1260673 100644 --- a/src/llama_stack_client/types/eval_task_list_response.py +++ b/src/llama_stack_client/types/eval_task_list_response.py @@ -3,8 +3,8 @@ from typing import List from typing_extensions import TypeAlias -from .eval_task import EvalTask +from .benchmark import Benchmark __all__ = ["EvalTaskListResponse"] -EvalTaskListResponse: TypeAlias = List[EvalTask] +EvalTaskListResponse: TypeAlias = List[Benchmark] diff --git a/src/llama_stack_client/types/eval_task_register_params.py b/src/llama_stack_client/types/eval_task_register_params.py index 417bc2cd..26934c67 100644 --- a/src/llama_stack_client/types/eval_task_register_params.py +++ b/src/llama_stack_client/types/eval_task_register_params.py @@ -17,6 +17,6 @@ class EvalTaskRegisterParams(TypedDict, total=False): metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] - provider_eval_task_id: str + provider_benchmark_id: str provider_id: str diff --git a/src/llama_stack_client/types/event_param.py b/src/llama_stack_client/types/event_param.py index 7505d6f7..500e4a24 100644 --- a/src/llama_stack_client/types/event_param.py +++ b/src/llama_stack_client/types/event_param.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, Union, Iterable +from typing import Dict, Union from datetime import datetime from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict @@ -32,7 +32,7 @@ class UnstructuredLogEvent(TypedDict, total=False): type: Required[Literal["unstructured_log"]] - attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + attributes: Dict[str, Union[str, float, bool, None]] class MetricEvent(TypedDict, total=False): @@ -50,7 +50,7 @@ class MetricEvent(TypedDict, total=False): value: Required[float] - attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + attributes: Dict[str, Union[str, float, bool, None]] class StructuredLogEventPayloadSpanStartPayload(TypedDict, total=False): @@ -83,7 +83,7 @@ class StructuredLogEvent(TypedDict, total=False): type: Required[Literal["structured_log"]] - attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + attributes: Dict[str, Union[str, float, bool, None]] EventParam: TypeAlias = Union[UnstructuredLogEvent, MetricEvent, StructuredLogEvent] diff --git a/src/llama_stack_client/types/list_benchmarks_response.py b/src/llama_stack_client/types/list_benchmarks_response.py new file mode 100644 index 00000000..4185f3d1 --- /dev/null +++ b/src/llama_stack_client/types/list_benchmarks_response.py @@ -0,0 +1,11 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + + +from .._models import BaseModel +from .benchmark_list_response import BenchmarkListResponse + +__all__ = ["ListBenchmarksResponse"] + + +class ListBenchmarksResponse(BaseModel): + data: BenchmarkListResponse diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py index 075a664d..0fe46810 100644 --- a/src/llama_stack_client/types/shared/__init__.py +++ b/src/llama_stack_client/types/shared/__init__.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from .url import URL as URL from .message import Message as Message from .document import Document as Document from .tool_call import ToolCall as ToolCall @@ -19,6 +18,7 @@ from .safety_violation import SafetyViolation as SafetyViolation from .completion_message import CompletionMessage as CompletionMessage from .interleaved_content import InterleavedContent as InterleavedContent +from .tool_call_or_string import ToolCallOrString as ToolCallOrString from .tool_param_definition import ToolParamDefinition as ToolParamDefinition from .tool_response_message import ToolResponseMessage as ToolResponseMessage from .query_generator_config import QueryGeneratorConfig as QueryGeneratorConfig diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py index e8c5071e..2d0002a9 100644 --- a/src/llama_stack_client/types/shared/chat_completion_response.py +++ b/src/llama_stack_client/types/shared/chat_completion_response.py @@ -1,12 +1,32 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Optional +from typing import Dict, List, Union, Optional +from datetime import datetime +from typing_extensions import Literal from ..._models import BaseModel from ..token_log_probs import TokenLogProbs from .completion_message import CompletionMessage -__all__ = ["ChatCompletionResponse"] +__all__ = ["ChatCompletionResponse", "Metric"] + + +class Metric(BaseModel): + metric: str + + span_id: str + + timestamp: datetime + + trace_id: str + + type: Literal["metric"] + + unit: str + + value: float + + attributes: Optional[Dict[str, Union[str, float, bool, None]]] = None class ChatCompletionResponse(BaseModel): @@ -15,3 +35,5 @@ class ChatCompletionResponse(BaseModel): logprobs: Optional[List[TokenLogProbs]] = None """Optional log probabilities for generated tokens""" + + metrics: Optional[List[Metric]] = None diff --git a/src/llama_stack_client/types/shared/content_delta.py b/src/llama_stack_client/types/shared/content_delta.py index 18207c75..ae036ad8 100644 --- a/src/llama_stack_client/types/shared/content_delta.py +++ b/src/llama_stack_client/types/shared/content_delta.py @@ -5,9 +5,9 @@ from ..._utils import PropertyInfo from ..._models import BaseModel -from .tool_call import ToolCall +from .tool_call_or_string import ToolCallOrString -__all__ = ["ContentDelta", "TextDelta", "ImageDelta", "ToolCallDelta", "ToolCallDeltaToolCall"] +__all__ = ["ContentDelta", "TextDelta", "ImageDelta", "ToolCallDelta"] class TextDelta(BaseModel): @@ -22,13 +22,10 @@ class ImageDelta(BaseModel): type: Literal["image"] -ToolCallDeltaToolCall: TypeAlias = Union[str, ToolCall] - - class ToolCallDelta(BaseModel): parse_status: Literal["started", "in_progress", "failed", "succeeded"] - tool_call: ToolCallDeltaToolCall + tool_call: ToolCallOrString type: Literal["tool_call"] diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py index 1282bd0a..b9bfa898 100644 --- a/src/llama_stack_client/types/shared/document.py +++ b/src/llama_stack_client/types/shared/document.py @@ -3,18 +3,29 @@ from typing import Dict, List, Union, Optional from typing_extensions import Literal, TypeAlias -from .url import URL from ..._models import BaseModel from .interleaved_content_item import InterleavedContentItem -__all__ = ["Document", "Content", "ContentImageContentItem", "ContentImageContentItemImage", "ContentTextContentItem"] +__all__ = [ + "Document", + "Content", + "ContentImageContentItem", + "ContentImageContentItemImage", + "ContentImageContentItemImageURL", + "ContentTextContentItem", + "ContentURL", +] + + +class ContentImageContentItemImageURL(BaseModel): + uri: str class ContentImageContentItemImage(BaseModel): data: Optional[str] = None """base64 encoded image data as string""" - url: Optional[URL] = None + url: Optional[ContentImageContentItemImageURL] = None """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. @@ -37,7 +48,13 @@ class ContentTextContentItem(BaseModel): """Discriminator type of the content item. Always "text" """ -Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, List[InterleavedContentItem], URL] +class ContentURL(BaseModel): + uri: str + + +Content: TypeAlias = Union[ + str, ContentImageContentItem, ContentTextContentItem, List[InterleavedContentItem], ContentURL +] class Document(BaseModel): diff --git a/src/llama_stack_client/types/shared/interleaved_content.py b/src/llama_stack_client/types/shared/interleaved_content.py index 02a9b43e..dc496150 100644 --- a/src/llama_stack_client/types/shared/interleaved_content.py +++ b/src/llama_stack_client/types/shared/interleaved_content.py @@ -3,18 +3,27 @@ from typing import List, Union, Optional from typing_extensions import Literal, TypeAlias -from .url import URL from ..._models import BaseModel from .interleaved_content_item import InterleavedContentItem -__all__ = ["InterleavedContent", "ImageContentItem", "ImageContentItemImage", "TextContentItem"] +__all__ = [ + "InterleavedContent", + "ImageContentItem", + "ImageContentItemImage", + "ImageContentItemImageURL", + "TextContentItem", +] + + +class ImageContentItemImageURL(BaseModel): + uri: str class ImageContentItemImage(BaseModel): data: Optional[str] = None """base64 encoded image data as string""" - url: Optional[URL] = None + url: Optional[ImageContentItemImageURL] = None """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. diff --git a/src/llama_stack_client/types/shared/interleaved_content_item.py b/src/llama_stack_client/types/shared/interleaved_content_item.py index c7030b1c..8a3238b8 100644 --- a/src/llama_stack_client/types/shared/interleaved_content_item.py +++ b/src/llama_stack_client/types/shared/interleaved_content_item.py @@ -3,18 +3,27 @@ from typing import Union, Optional from typing_extensions import Literal, Annotated, TypeAlias -from .url import URL from ..._utils import PropertyInfo from ..._models import BaseModel -__all__ = ["InterleavedContentItem", "ImageContentItem", "ImageContentItemImage", "TextContentItem"] +__all__ = [ + "InterleavedContentItem", + "ImageContentItem", + "ImageContentItemImage", + "ImageContentItemImageURL", + "TextContentItem", +] + + +class ImageContentItemImageURL(BaseModel): + uri: str class ImageContentItemImage(BaseModel): data: Optional[str] = None """base64 encoded image data as string""" - url: Optional[URL] = None + url: Optional[ImageContentItemImageURL] = None """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. diff --git a/src/llama_stack_client/types/shared/tool_call_or_string.py b/src/llama_stack_client/types/shared/tool_call_or_string.py new file mode 100644 index 00000000..f52a0d98 --- /dev/null +++ b/src/llama_stack_client/types/shared/tool_call_or_string.py @@ -0,0 +1,10 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union +from typing_extensions import TypeAlias + +from .tool_call import ToolCall + +__all__ = ["ToolCallOrString"] + +ToolCallOrString: TypeAlias = Union[str, ToolCall] diff --git a/src/llama_stack_client/types/shared_params/__init__.py b/src/llama_stack_client/types/shared_params/__init__.py index 8c2041a6..d647c238 100644 --- a/src/llama_stack_client/types/shared_params/__init__.py +++ b/src/llama_stack_client/types/shared_params/__init__.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from .url import URL as URL from .message import Message as Message from .document import Document as Document from .tool_call import ToolCall as ToolCall diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py index fd464554..fd3c3df1 100644 --- a/src/llama_stack_client/types/shared_params/document.py +++ b/src/llama_stack_client/types/shared_params/document.py @@ -5,17 +5,28 @@ from typing import Dict, Union, Iterable from typing_extensions import Literal, Required, TypeAlias, TypedDict -from .url import URL from .interleaved_content_item import InterleavedContentItem -__all__ = ["Document", "Content", "ContentImageContentItem", "ContentImageContentItemImage", "ContentTextContentItem"] +__all__ = [ + "Document", + "Content", + "ContentImageContentItem", + "ContentImageContentItemImage", + "ContentImageContentItemImageURL", + "ContentTextContentItem", + "ContentURL", +] + + +class ContentImageContentItemImageURL(TypedDict, total=False): + uri: Required[str] class ContentImageContentItemImage(TypedDict, total=False): data: str """base64 encoded image data as string""" - url: URL + url: ContentImageContentItemImageURL """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. @@ -38,7 +49,13 @@ class ContentTextContentItem(TypedDict, total=False): """Discriminator type of the content item. Always "text" """ -Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], URL] +class ContentURL(TypedDict, total=False): + uri: Required[str] + + +Content: TypeAlias = Union[ + str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], ContentURL +] class Document(TypedDict, total=False): diff --git a/src/llama_stack_client/types/shared_params/interleaved_content.py b/src/llama_stack_client/types/shared_params/interleaved_content.py index 8d5605fb..5d045a20 100644 --- a/src/llama_stack_client/types/shared_params/interleaved_content.py +++ b/src/llama_stack_client/types/shared_params/interleaved_content.py @@ -5,17 +5,26 @@ from typing import Union, Iterable from typing_extensions import Literal, Required, TypeAlias, TypedDict -from .url import URL from .interleaved_content_item import InterleavedContentItem -__all__ = ["InterleavedContent", "ImageContentItem", "ImageContentItemImage", "TextContentItem"] +__all__ = [ + "InterleavedContent", + "ImageContentItem", + "ImageContentItemImage", + "ImageContentItemImageURL", + "TextContentItem", +] + + +class ImageContentItemImageURL(TypedDict, total=False): + uri: Required[str] class ImageContentItemImage(TypedDict, total=False): data: str """base64 encoded image data as string""" - url: URL + url: ImageContentItemImageURL """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. diff --git a/src/llama_stack_client/types/shared_params/interleaved_content_item.py b/src/llama_stack_client/types/shared_params/interleaved_content_item.py index acb7e6f1..b5c0bcc1 100644 --- a/src/llama_stack_client/types/shared_params/interleaved_content_item.py +++ b/src/llama_stack_client/types/shared_params/interleaved_content_item.py @@ -5,16 +5,24 @@ from typing import Union from typing_extensions import Literal, Required, TypeAlias, TypedDict -from .url import URL +__all__ = [ + "InterleavedContentItem", + "ImageContentItem", + "ImageContentItemImage", + "ImageContentItemImageURL", + "TextContentItem", +] -__all__ = ["InterleavedContentItem", "ImageContentItem", "ImageContentItemImage", "TextContentItem"] + +class ImageContentItemImageURL(TypedDict, total=False): + uri: Required[str] class ImageContentItemImage(TypedDict, total=False): data: str """base64 encoded image data as string""" - url: URL + url: ImageContentItemImageURL """A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits. diff --git a/src/llama_stack_client/types/tool_group.py b/src/llama_stack_client/types/tool_group.py index 82d2e057..480d1942 100644 --- a/src/llama_stack_client/types/tool_group.py +++ b/src/llama_stack_client/types/tool_group.py @@ -4,9 +4,12 @@ from typing_extensions import Literal from .._models import BaseModel -from .shared.url import URL -__all__ = ["ToolGroup"] +__all__ = ["ToolGroup", "McpEndpoint"] + + +class McpEndpoint(BaseModel): + uri: str class ToolGroup(BaseModel): @@ -20,4 +23,4 @@ class ToolGroup(BaseModel): args: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None - mcp_endpoint: Optional[URL] = None + mcp_endpoint: Optional[McpEndpoint] = None diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_params.py b/src/llama_stack_client/types/tool_runtime_list_tools_params.py index 7db74244..99da7533 100644 --- a/src/llama_stack_client/types/tool_runtime_list_tools_params.py +++ b/src/llama_stack_client/types/tool_runtime_list_tools_params.py @@ -2,14 +2,16 @@ from __future__ import annotations -from typing_extensions import TypedDict +from typing_extensions import Required, TypedDict -from .shared_params.url import URL - -__all__ = ["ToolRuntimeListToolsParams"] +__all__ = ["ToolRuntimeListToolsParams", "McpEndpoint"] class ToolRuntimeListToolsParams(TypedDict, total=False): - mcp_endpoint: URL + mcp_endpoint: McpEndpoint tool_group_id: str + + +class McpEndpoint(TypedDict, total=False): + uri: Required[str] diff --git a/src/llama_stack_client/types/toolgroup_register_params.py b/src/llama_stack_client/types/toolgroup_register_params.py index 1184be85..8cb7af7f 100644 --- a/src/llama_stack_client/types/toolgroup_register_params.py +++ b/src/llama_stack_client/types/toolgroup_register_params.py @@ -5,9 +5,7 @@ from typing import Dict, Union, Iterable from typing_extensions import Required, TypedDict -from .shared_params.url import URL - -__all__ = ["ToolgroupRegisterParams"] +__all__ = ["ToolgroupRegisterParams", "McpEndpoint"] class ToolgroupRegisterParams(TypedDict, total=False): @@ -17,4 +15,8 @@ class ToolgroupRegisterParams(TypedDict, total=False): args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] - mcp_endpoint: URL + mcp_endpoint: McpEndpoint + + +class McpEndpoint(TypedDict, total=False): + uri: Required[str] diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/eval/test_jobs.py index beb290a0..f9b85a08 100644 --- a/tests/api_resources/eval/test_jobs.py +++ b/tests/api_resources/eval/test_jobs.py @@ -22,7 +22,7 @@ class TestJobs: def test_method_retrieve(self, client: LlamaStackClient) -> None: job = client.eval.jobs.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert_matches_type(EvaluateResponse, job, path=["response"]) @@ -30,7 +30,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None: def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: response = client.eval.jobs.with_raw_response.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -42,7 +42,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: with client.eval.jobs.with_streaming_response.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -54,23 +54,23 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: @parametrize def test_path_params_retrieve(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.jobs.with_raw_response.retrieve( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): client.eval.jobs.with_raw_response.retrieve( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) @parametrize def test_method_cancel(self, client: LlamaStackClient) -> None: job = client.eval.jobs.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert job is None @@ -78,7 +78,7 @@ def test_method_cancel(self, client: LlamaStackClient) -> None: def test_raw_response_cancel(self, client: LlamaStackClient) -> None: response = client.eval.jobs.with_raw_response.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -90,7 +90,7 @@ def test_raw_response_cancel(self, client: LlamaStackClient) -> None: def test_streaming_response_cancel(self, client: LlamaStackClient) -> None: with client.eval.jobs.with_streaming_response.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -102,23 +102,23 @@ def test_streaming_response_cancel(self, client: LlamaStackClient) -> None: @parametrize def test_path_params_cancel(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.jobs.with_raw_response.cancel( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): client.eval.jobs.with_raw_response.cancel( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) @parametrize def test_method_status(self, client: LlamaStackClient) -> None: job = client.eval.jobs.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert_matches_type(Optional[JobStatusResponse], job, path=["response"]) @@ -126,7 +126,7 @@ def test_method_status(self, client: LlamaStackClient) -> None: def test_raw_response_status(self, client: LlamaStackClient) -> None: response = client.eval.jobs.with_raw_response.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -138,7 +138,7 @@ def test_raw_response_status(self, client: LlamaStackClient) -> None: def test_streaming_response_status(self, client: LlamaStackClient) -> None: with client.eval.jobs.with_streaming_response.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -150,16 +150,16 @@ def test_streaming_response_status(self, client: LlamaStackClient) -> None: @parametrize def test_path_params_status(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.jobs.with_raw_response.status( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): client.eval.jobs.with_raw_response.status( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) @@ -170,7 +170,7 @@ class TestAsyncJobs: async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: job = await async_client.eval.jobs.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert_matches_type(EvaluateResponse, job, path=["response"]) @@ -178,7 +178,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.jobs.with_raw_response.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -190,7 +190,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.jobs.with_streaming_response.retrieve( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -202,23 +202,23 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl @parametrize async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.jobs.with_raw_response.retrieve( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): await async_client.eval.jobs.with_raw_response.retrieve( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) @parametrize async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None: job = await async_client.eval.jobs.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert job is None @@ -226,7 +226,7 @@ async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None: async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.jobs.with_raw_response.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -238,7 +238,7 @@ async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.jobs.with_streaming_response.cancel( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -250,23 +250,23 @@ async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClie @parametrize async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.jobs.with_raw_response.cancel( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): await async_client.eval.jobs.with_raw_response.cancel( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) @parametrize async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None: job = await async_client.eval.jobs.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert_matches_type(Optional[JobStatusResponse], job, path=["response"]) @@ -274,7 +274,7 @@ async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None: async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.jobs.with_raw_response.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) assert response.is_closed is True @@ -286,7 +286,7 @@ async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.jobs.with_streaming_response.status( job_id="job_id", - task_id="task_id", + benchmark_id="benchmark_id", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -298,14 +298,14 @@ async def test_streaming_response_status(self, async_client: AsyncLlamaStackClie @parametrize async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.jobs.with_raw_response.status( job_id="job_id", - task_id="", + benchmark_id="", ) with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): await async_client.eval.jobs.with_raw_response.status( job_id="", - task_id="task_id", + benchmark_id="benchmark_id", ) diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py new file mode 100644 index 00000000..03aceead --- /dev/null +++ b/tests/api_resources/test_benchmarks.py @@ -0,0 +1,246 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, Optional, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types import Benchmark, BenchmarkListResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestBenchmarks: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_retrieve(self, client: LlamaStackClient) -> None: + benchmark = client.benchmarks.retrieve( + "benchmark_id", + ) + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: + response = client.benchmarks.with_raw_response.retrieve( + "benchmark_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = response.parse() + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: + with client.benchmarks.with_streaming_response.retrieve( + "benchmark_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = response.parse() + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + client.benchmarks.with_raw_response.retrieve( + "", + ) + + @parametrize + def test_method_list(self, client: LlamaStackClient) -> None: + benchmark = client.benchmarks.list() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: LlamaStackClient) -> None: + response = client.benchmarks.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = response.parse() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: LlamaStackClient) -> None: + with client.benchmarks.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = response.parse() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_register(self, client: LlamaStackClient) -> None: + benchmark = client.benchmarks.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) + assert benchmark is None + + @parametrize + def test_method_register_with_all_params(self, client: LlamaStackClient) -> None: + benchmark = client.benchmarks.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + metadata={"foo": True}, + provider_benchmark_id="provider_benchmark_id", + provider_id="provider_id", + ) + assert benchmark is None + + @parametrize + def test_raw_response_register(self, client: LlamaStackClient) -> None: + response = client.benchmarks.with_raw_response.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = response.parse() + assert benchmark is None + + @parametrize + def test_streaming_response_register(self, client: LlamaStackClient) -> None: + with client.benchmarks.with_streaming_response.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = response.parse() + assert benchmark is None + + assert cast(Any, response.is_closed) is True + + +class TestAsyncBenchmarks: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + benchmark = await async_client.benchmarks.retrieve( + "benchmark_id", + ) + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.benchmarks.with_raw_response.retrieve( + "benchmark_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = await response.parse() + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.benchmarks.with_streaming_response.retrieve( + "benchmark_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = await response.parse() + assert_matches_type(Optional[Benchmark], benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + await async_client.benchmarks.with_raw_response.retrieve( + "", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None: + benchmark = await async_client.benchmarks.list() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.benchmarks.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = await response.parse() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.benchmarks.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = await response.parse() + assert_matches_type(BenchmarkListResponse, benchmark, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None: + benchmark = await async_client.benchmarks.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) + assert benchmark is None + + @parametrize + async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + benchmark = await async_client.benchmarks.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + metadata={"foo": True}, + provider_benchmark_id="provider_benchmark_id", + provider_id="provider_id", + ) + assert benchmark is None + + @parametrize + async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.benchmarks.with_raw_response.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + benchmark = await response.parse() + assert benchmark is None + + @parametrize + async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.benchmarks.with_streaming_response.register( + benchmark_id="benchmark_id", + dataset_id="dataset_id", + scoring_functions=["string"], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + benchmark = await response.parse() + assert benchmark is None + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py index 52556bf2..de5d0cac 100644 --- a/tests/api_resources/test_eval.py +++ b/tests/api_resources/test_eval.py @@ -32,6 +32,12 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -57,6 +63,15 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> "role": "system", }, }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, "type": "benchmark", "num_examples": 0, }, @@ -75,6 +90,12 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -96,6 +117,12 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) as response: @@ -120,6 +147,149 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + @parametrize + def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: + eval = client.eval.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None: + eval = client.eval.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "strategy": {"type": "greedy"}, + "max_tokens": 0, + "repetition_penalty": 0, + }, + "type": "model", + "system_message": { + "content": "string", + "role": "system", + }, + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, + "type": "benchmark", + "num_examples": 0, + }, + ) + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: + response = client.eval.with_raw_response.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + eval = response.parse() + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: + with client.eval.with_streaming_response.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + eval = response.parse() + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + client.eval.with_raw_response.evaluate_rows_alpha( + benchmark_id="", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -134,6 +304,12 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -157,6 +333,15 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None "role": "system", }, }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, "type": "benchmark", "num_examples": 0, }, @@ -173,6 +358,12 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -192,6 +383,12 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) as response: @@ -214,6 +411,139 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + @parametrize + def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: + eval = client.eval.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None: + eval = client.eval.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "strategy": {"type": "greedy"}, + "max_tokens": 0, + "repetition_penalty": 0, + }, + "type": "model", + "system_message": { + "content": "string", + "role": "system", + }, + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, + "type": "benchmark", + "num_examples": 0, + }, + ) + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: + response = client.eval.with_raw_response.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + eval = response.parse() + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None: + with client.eval.with_streaming_response.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + eval = response.parse() + assert_matches_type(Job, eval, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + client.eval.with_raw_response.run_eval_alpha( + benchmark_id="", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -234,6 +564,12 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -259,6 +595,15 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla "role": "system", }, }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, "type": "benchmark", "num_examples": 0, }, @@ -277,6 +622,12 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -298,6 +649,12 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) as response: @@ -322,6 +679,149 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + @parametrize + async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: + eval = await async_client.eval.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + eval = await async_client.eval.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "strategy": {"type": "greedy"}, + "max_tokens": 0, + "repetition_penalty": 0, + }, + "type": "model", + "system_message": { + "content": "string", + "role": "system", + }, + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, + "type": "benchmark", + "num_examples": 0, + }, + ) + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.eval.with_raw_response.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + eval = await response.parse() + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + @parametrize + async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.eval.with_streaming_response.evaluate_rows_alpha( + benchmark_id="benchmark_id", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + eval = await response.parse() + assert_matches_type(EvaluateResponse, eval, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + await async_client.eval.with_raw_response.evaluate_rows_alpha( + benchmark_id="", + input_rows=[{"foo": True}], + scoring_functions=["string"], + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -336,6 +836,12 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -359,6 +865,15 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta "role": "system", }, }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, "type": "benchmark", "num_examples": 0, }, @@ -375,6 +890,12 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) @@ -394,6 +915,12 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) as response: @@ -416,6 +943,139 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - "sampling_params": {"strategy": {"type": "greedy"}}, "type": "model", }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + @parametrize + async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: + eval = await async_client.eval.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + eval = await async_client.eval.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": { + "strategy": {"type": "greedy"}, + "max_tokens": 0, + "repetition_penalty": 0, + }, + "type": "model", + "system_message": { + "content": "string", + "role": "system", + }, + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + "aggregation_functions": ["average"], + "judge_score_regexes": ["string"], + "prompt_template": "prompt_template", + } + }, + "type": "benchmark", + "num_examples": 0, + }, + ) + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.eval.with_raw_response.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + eval = await response.parse() + assert_matches_type(Job, eval, path=["response"]) + + @parametrize + async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.eval.with_streaming_response.run_eval_alpha( + benchmark_id="benchmark_id", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, + "type": "benchmark", + }, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + eval = await response.parse() + assert_matches_type(Job, eval, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): + await async_client.eval.with_raw_response.run_eval_alpha( + benchmark_id="", + task_config={ + "eval_candidate": { + "model": "model", + "sampling_params": {"strategy": {"type": "greedy"}}, + "type": "model", + }, + "scoring_params": { + "foo": { + "judge_model": "judge_model", + "type": "llm_as_judge", + } + }, "type": "benchmark", }, ) diff --git a/tests/api_resources/test_eval_tasks.py b/tests/api_resources/test_eval_tasks.py index 5b18621b..6ca2f2c4 100644 --- a/tests/api_resources/test_eval_tasks.py +++ b/tests/api_resources/test_eval_tasks.py @@ -9,7 +9,7 @@ from tests.utils import assert_matches_type from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient -from llama_stack_client.types import EvalTask, EvalTaskListResponse +from llama_stack_client.types import Benchmark, BenchmarkListResponse base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -22,7 +22,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None: eval_task = client.eval_tasks.retrieve( "eval_task_id", ) - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) @parametrize def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: @@ -33,7 +33,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = response.parse() - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) @parametrize def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: @@ -44,7 +44,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = response.parse() - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) assert cast(Any, response.is_closed) is True @@ -58,7 +58,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None: @parametrize def test_method_list(self, client: LlamaStackClient) -> None: eval_task = client.eval_tasks.list() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) @parametrize def test_raw_response_list(self, client: LlamaStackClient) -> None: @@ -67,7 +67,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None: assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = response.parse() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) @parametrize def test_streaming_response_list(self, client: LlamaStackClient) -> None: @@ -76,7 +76,7 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None: assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = response.parse() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) assert cast(Any, response.is_closed) is True @@ -96,7 +96,7 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None eval_task_id="eval_task_id", scoring_functions=["string"], metadata={"foo": True}, - provider_eval_task_id="provider_eval_task_id", + provider_benchmark_id="provider_benchmark_id", provider_id="provider_id", ) assert eval_task is None @@ -138,7 +138,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non eval_task = await async_client.eval_tasks.retrieve( "eval_task_id", ) - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) @parametrize async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: @@ -149,7 +149,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = await response.parse() - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) @parametrize async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: @@ -160,7 +160,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = await response.parse() - assert_matches_type(Optional[EvalTask], eval_task, path=["response"]) + assert_matches_type(Optional[Benchmark], eval_task, path=["response"]) assert cast(Any, response.is_closed) is True @@ -174,7 +174,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) - @parametrize async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None: eval_task = await async_client.eval_tasks.list() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) @parametrize async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None: @@ -183,7 +183,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = await response.parse() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) @parametrize async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None: @@ -192,7 +192,7 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient assert response.http_request.headers.get("X-Stainless-Lang") == "python" eval_task = await response.parse() - assert_matches_type(EvalTaskListResponse, eval_task, path=["response"]) + assert_matches_type(BenchmarkListResponse, eval_task, path=["response"]) assert cast(Any, response.is_closed) is True @@ -212,7 +212,7 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta eval_task_id="eval_task_id", scoring_functions=["string"], metadata={"foo": True}, - provider_eval_task_id="provider_eval_task_id", + provider_benchmark_id="provider_benchmark_id", provider_id="provider_id", ) assert eval_task is None diff --git a/tests/api_resources/test_telemetry.py b/tests/api_resources/test_telemetry.py index 99886c2d..4f3c81d4 100644 --- a/tests/api_resources/test_telemetry.py +++ b/tests/api_resources/test_telemetry.py @@ -182,7 +182,7 @@ def test_method_log_event_with_all_params(self, client: LlamaStackClient) -> Non "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"), "trace_id": "trace_id", "type": "unstructured_log", - "attributes": {"foo": True}, + "attributes": {"foo": "string"}, }, ttl_seconds=0, ) @@ -577,7 +577,7 @@ async def test_method_log_event_with_all_params(self, async_client: AsyncLlamaSt "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"), "trace_id": "trace_id", "type": "unstructured_log", - "attributes": {"foo": True}, + "attributes": {"foo": "string"}, }, ttl_seconds=0, ) diff --git a/tests/test_client.py b/tests/test_client.py index 3ea5f0b7..f282f616 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -23,6 +23,7 @@ from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient, APIResponseValidationError from llama_stack_client._types import Omit +from llama_stack_client._utils import maybe_transform from llama_stack_client._models import BaseModel, FinalRequestOptions from llama_stack_client._constants import RAW_RESPONSE_HEADER from llama_stack_client._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError @@ -32,6 +33,7 @@ BaseClient, make_request_options, ) +from llama_stack_client.types.inference_chat_completion_params import InferenceChatCompletionParamsNonStreaming from .utils import update_env @@ -686,14 +688,17 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No "/v1/inference/chat-completion", body=cast( object, - dict( - messages=[ - { - "content": "string", - "role": "user", - } - ], - model_id="model_id", + maybe_transform( + dict( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model_id="model_id", + ), + InferenceChatCompletionParamsNonStreaming, ), ), cast_to=httpx.Response, @@ -712,14 +717,17 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non "/v1/inference/chat-completion", body=cast( object, - dict( - messages=[ - { - "content": "string", - "role": "user", - } - ], - model_id="model_id", + maybe_transform( + dict( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model_id="model_id", + ), + InferenceChatCompletionParamsNonStreaming, ), ), cast_to=httpx.Response, @@ -1474,14 +1482,17 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) "/v1/inference/chat-completion", body=cast( object, - dict( - messages=[ - { - "content": "string", - "role": "user", - } - ], - model_id="model_id", + maybe_transform( + dict( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model_id="model_id", + ), + InferenceChatCompletionParamsNonStreaming, ), ), cast_to=httpx.Response, @@ -1500,14 +1511,17 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) "/v1/inference/chat-completion", body=cast( object, - dict( - messages=[ - { - "content": "string", - "role": "user", - } - ], - model_id="model_id", + maybe_transform( + dict( + messages=[ + { + "content": "string", + "role": "user", + } + ], + model_id="model_id", + ), + InferenceChatCompletionParamsNonStreaming, ), ), cast_to=httpx.Response, diff --git a/tests/test_transform.py b/tests/test_transform.py index 364c685e..8ceafb36 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -2,7 +2,7 @@ import io import pathlib -from typing import Any, List, Union, TypeVar, Iterable, Optional, cast +from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast from datetime import date, datetime from typing_extensions import Required, Annotated, TypedDict @@ -388,6 +388,15 @@ def my_iter() -> Iterable[Baz8]: } +@parametrize +@pytest.mark.asyncio +async def test_dictionary_items(use_async: bool) -> None: + class DictItems(TypedDict): + foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")] + + assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}} + + class TypedDictIterableUnionStr(TypedDict): foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]