88
99from __future__ import annotations
1010
11- from typing import Dict , Iterable , Optional
11+ from typing import Dict , Iterable
1212
1313import httpx
1414
2020 JobsResourceWithStreamingResponse ,
2121 AsyncJobsResourceWithStreamingResponse ,
2222)
23- from ...._types import Body , Omit , Query , Headers , NotGiven , SequenceNotStr , omit , not_given
23+ from ...._types import Body , Query , Headers , NotGiven , SequenceNotStr , not_given
2424from ...._utils import maybe_transform , async_maybe_transform
2525from ...._compat import cached_property
2626from ...._resource import SyncAPIResource , AsyncAPIResource
@@ -164,9 +164,7 @@ def run_eval(
164164 self ,
165165 benchmark_id : str ,
166166 * ,
167- eval_candidate : eval_run_eval_params .EvalCandidate ,
168- num_examples : Optional [int ] | Omit = omit ,
169- scoring_params : Dict [str , eval_run_eval_params .ScoringParams ] | Omit = omit ,
167+ benchmark_config : BenchmarkConfigParam ,
170168 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
171169 # The extra values given here take precedence over values defined on the client or passed to this method.
172170 extra_headers : Headers | None = None ,
@@ -178,13 +176,7 @@ def run_eval(
178176 Run an evaluation on a benchmark.
179177
180178 Args:
181- eval_candidate: A model candidate for evaluation.
182-
183- num_examples: Number of examples to evaluate (useful for testing), if not provided, all
184- examples in the dataset will be evaluated
185-
186- scoring_params: Map between scoring function id and parameters for each scoring function you
187- want to run
179+ benchmark_config: A benchmark configuration for evaluation.
188180
189181 extra_headers: Send extra headers
190182
@@ -198,14 +190,7 @@ def run_eval(
198190 raise ValueError (f"Expected a non-empty value for `benchmark_id` but received { benchmark_id !r} " )
199191 return self ._post (
200192 f"/v1alpha/eval/benchmarks/{ benchmark_id } /jobs" ,
201- body = maybe_transform (
202- {
203- "eval_candidate" : eval_candidate ,
204- "num_examples" : num_examples ,
205- "scoring_params" : scoring_params ,
206- },
207- eval_run_eval_params .EvalRunEvalParams ,
208- ),
193+ body = maybe_transform ({"benchmark_config" : benchmark_config }, eval_run_eval_params .EvalRunEvalParams ),
209194 options = make_request_options (
210195 extra_headers = extra_headers , extra_query = extra_query , extra_body = extra_body , timeout = timeout
211196 ),
@@ -216,9 +201,7 @@ def run_eval_alpha(
216201 self ,
217202 benchmark_id : str ,
218203 * ,
219- eval_candidate : eval_run_eval_alpha_params .EvalCandidate ,
220- num_examples : Optional [int ] | Omit = omit ,
221- scoring_params : Dict [str , eval_run_eval_alpha_params .ScoringParams ] | Omit = omit ,
204+ benchmark_config : BenchmarkConfigParam ,
222205 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
223206 # The extra values given here take precedence over values defined on the client or passed to this method.
224207 extra_headers : Headers | None = None ,
@@ -230,13 +213,7 @@ def run_eval_alpha(
230213 Run an evaluation on a benchmark.
231214
232215 Args:
233- eval_candidate: A model candidate for evaluation.
234-
235- num_examples: Number of examples to evaluate (useful for testing), if not provided, all
236- examples in the dataset will be evaluated
237-
238- scoring_params: Map between scoring function id and parameters for each scoring function you
239- want to run
216+ benchmark_config: A benchmark configuration for evaluation.
240217
241218 extra_headers: Send extra headers
242219
@@ -251,12 +228,7 @@ def run_eval_alpha(
251228 return self ._post (
252229 f"/v1alpha/eval/benchmarks/{ benchmark_id } /jobs" ,
253230 body = maybe_transform (
254- {
255- "eval_candidate" : eval_candidate ,
256- "num_examples" : num_examples ,
257- "scoring_params" : scoring_params ,
258- },
259- eval_run_eval_alpha_params .EvalRunEvalAlphaParams ,
231+ {"benchmark_config" : benchmark_config }, eval_run_eval_alpha_params .EvalRunEvalAlphaParams
260232 ),
261233 options = make_request_options (
262234 extra_headers = extra_headers , extra_query = extra_query , extra_body = extra_body , timeout = timeout
@@ -385,9 +357,7 @@ async def run_eval(
385357 self ,
386358 benchmark_id : str ,
387359 * ,
388- eval_candidate : eval_run_eval_params .EvalCandidate ,
389- num_examples : Optional [int ] | Omit = omit ,
390- scoring_params : Dict [str , eval_run_eval_params .ScoringParams ] | Omit = omit ,
360+ benchmark_config : BenchmarkConfigParam ,
391361 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
392362 # The extra values given here take precedence over values defined on the client or passed to this method.
393363 extra_headers : Headers | None = None ,
@@ -399,13 +369,7 @@ async def run_eval(
399369 Run an evaluation on a benchmark.
400370
401371 Args:
402- eval_candidate: A model candidate for evaluation.
403-
404- num_examples: Number of examples to evaluate (useful for testing), if not provided, all
405- examples in the dataset will be evaluated
406-
407- scoring_params: Map between scoring function id and parameters for each scoring function you
408- want to run
372+ benchmark_config: A benchmark configuration for evaluation.
409373
410374 extra_headers: Send extra headers
411375
@@ -420,12 +384,7 @@ async def run_eval(
420384 return await self ._post (
421385 f"/v1alpha/eval/benchmarks/{ benchmark_id } /jobs" ,
422386 body = await async_maybe_transform (
423- {
424- "eval_candidate" : eval_candidate ,
425- "num_examples" : num_examples ,
426- "scoring_params" : scoring_params ,
427- },
428- eval_run_eval_params .EvalRunEvalParams ,
387+ {"benchmark_config" : benchmark_config }, eval_run_eval_params .EvalRunEvalParams
429388 ),
430389 options = make_request_options (
431390 extra_headers = extra_headers , extra_query = extra_query , extra_body = extra_body , timeout = timeout
@@ -437,9 +396,7 @@ async def run_eval_alpha(
437396 self ,
438397 benchmark_id : str ,
439398 * ,
440- eval_candidate : eval_run_eval_alpha_params .EvalCandidate ,
441- num_examples : Optional [int ] | Omit = omit ,
442- scoring_params : Dict [str , eval_run_eval_alpha_params .ScoringParams ] | Omit = omit ,
399+ benchmark_config : BenchmarkConfigParam ,
443400 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
444401 # The extra values given here take precedence over values defined on the client or passed to this method.
445402 extra_headers : Headers | None = None ,
@@ -451,13 +408,7 @@ async def run_eval_alpha(
451408 Run an evaluation on a benchmark.
452409
453410 Args:
454- eval_candidate: A model candidate for evaluation.
455-
456- num_examples: Number of examples to evaluate (useful for testing), if not provided, all
457- examples in the dataset will be evaluated
458-
459- scoring_params: Map between scoring function id and parameters for each scoring function you
460- want to run
411+ benchmark_config: A benchmark configuration for evaluation.
461412
462413 extra_headers: Send extra headers
463414
@@ -472,12 +423,7 @@ async def run_eval_alpha(
472423 return await self ._post (
473424 f"/v1alpha/eval/benchmarks/{ benchmark_id } /jobs" ,
474425 body = await async_maybe_transform (
475- {
476- "eval_candidate" : eval_candidate ,
477- "num_examples" : num_examples ,
478- "scoring_params" : scoring_params ,
479- },
480- eval_run_eval_alpha_params .EvalRunEvalAlphaParams ,
426+ {"benchmark_config" : benchmark_config }, eval_run_eval_alpha_params .EvalRunEvalAlphaParams
481427 ),
482428 options = make_request_options (
483429 extra_headers = extra_headers , extra_query = extra_query , extra_body = extra_body , timeout = timeout
0 commit comments