Skip to content

Commit 1deab1b

Browse files
authored
[ENH] OpenAI use tools instead of functions (#391)
* [ENH] more general `_requires_chatml` * [ENH] update OpenAI to using tools instead of functions
1 parent 9c46d20 commit 1deab1b

File tree

5 files changed

+111
-79
lines changed

5 files changed

+111
-79
lines changed

src/alpaca_eval/decoders/openai.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,15 @@ def _openai_completion_helper(
237237
else:
238238
choices[i]["text"] = choice.message.content
239239

240+
# backward compatibility for function calls # TODO: remove once function calls are removed
240241
if choice.message.function_call:
241242
# currently we only use function calls to get a JSON object => return raw text of json
242243
choices[i]["text"] = choice.message.function_call.arguments
243244

245+
if choice.message.tool_calls is not None:
246+
# currently we only use function calls to get a JSON object => return raw text of json
247+
choices[i]["text"] = choice.message.tool_calls[0].function.arguments
248+
244249
else:
245250
completion_batch = client.completions.create(prompt=prompt_batch, **curr_kwargs)
246251
choices = completion_batch.choices
@@ -290,7 +295,8 @@ def _openai_completion_helper(
290295
def _requires_chatml(model: str) -> bool:
291296
"""Whether a model requires the ChatML format."""
292297
# TODO: this should ideally be an OpenAI function... Maybe it already exists?
293-
return ("turbo" in model or "gpt-4" in model) and "instruct" not in model
298+
not_chatml = ("instruct" in model) or ("gpt-3" in model and "turbo" not in model) or (model.startswith("text-"))
299+
return not not_chatml
294300

295301

296302
def _get_price_per_token(model, price_per_token=None):

src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,37 @@ alpaca_eval_cot_gpt4_turbo_fn:
55
model_name: "gpt-4-1106-preview"
66
max_tokens: 300
77
temperature: 0
8-
function_call:
9-
name: "make_partial_leaderboard"
10-
functions:
11-
- name: "make_partial_leaderboard"
12-
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
13-
parameters:
14-
type: "object"
15-
properties:
16-
concise_explanation:
17-
type: "string"
18-
description: "A concise explanation for the ranking of the current models."
19-
ordered_models:
20-
type: "array"
21-
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
22-
items:
23-
type: "object"
24-
properties:
25-
model:
26-
type: "string"
27-
description: "The name of the model"
28-
rank:
29-
type: "number"
30-
description: "Order of preference of the model, 1 has the best output"
31-
"required": [ "ordered_models" ]
8+
tool_choice:
9+
type: function
10+
function:
11+
name: "make_partial_leaderboard"
12+
tools:
13+
- type: function
14+
function:
15+
name: "make_partial_leaderboard"
16+
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
17+
parameters:
18+
type: "object"
19+
properties:
20+
concise_explanation:
21+
type: "string"
22+
description: "A concise explanation for the ranking of the current models."
23+
ordered_models:
24+
type: "array"
25+
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
26+
items:
27+
type: "object"
28+
properties:
29+
model:
30+
type: "string"
31+
description: "The name of the model"
32+
rank:
33+
type: "number"
34+
description: "Order of preference of the model, 1 has the best output"
35+
additionalProperties: false
36+
required: [ "model", "rank" ]
37+
additionalProperties: false
38+
required: [ "ordered_models" ]
3239
fn_completion_parser: "pipeline_meta_parser"
3340
completion_parser_kwargs:
3441
parsers_to_kwargs:

src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,34 @@ alpaca_eval_gpt4_fn:
55
model_name: "gpt-4"
66
max_tokens: 100
77
temperature: 0
8-
function_call:
9-
name: "make_leaderboard"
10-
functions:
11-
- name: "make_leaderboard"
12-
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
13-
parameters:
14-
type: "object"
15-
properties:
16-
ordered_models:
17-
type: "array"
18-
description: "A list of models ordered by the preference of their outputs"
19-
items:
20-
type: "object"
21-
properties:
22-
model:
23-
type: "string"
24-
description: "The name of the model"
25-
rank:
26-
type: "number"
27-
description: "Order of preference of the model, 1 has the best output"
28-
"required": [ "ordered_models" ]
8+
tool_choice:
9+
type: function
10+
function:
11+
name: "make_leaderboard"
12+
tools:
13+
- type: function
14+
function:
15+
name: "make_leaderboard"
16+
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
17+
parameters:
18+
type: "object"
19+
properties:
20+
ordered_models:
21+
type: "array"
22+
description: "A list of models ordered by the preference of their outputs"
23+
items:
24+
type: "object"
25+
properties:
26+
model:
27+
type: "string"
28+
description: "The name of the model"
29+
rank:
30+
type: "number"
31+
description: "Order of preference of the model, 1 has the best output"
32+
additionalProperties: false
33+
required: [ "model", "rank" ]
34+
additionalProperties: false
35+
required: [ "ordered_models" ]
2936
fn_completion_parser: "pipeline_meta_parser"
3037
completion_parser_kwargs:
3138
parsers_to_kwargs:

src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,34 @@ alpaca_eval_gpt4_turbo_fn:
55
model_name: "gpt-4-1106-preview"
66
max_tokens: 100
77
temperature: 0
8-
function_call:
9-
name: "make_partial_leaderboard"
10-
functions:
11-
- name: "make_partial_leaderboard"
12-
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
13-
parameters:
14-
type: "object"
15-
properties:
16-
ordered_models:
17-
type: "array"
18-
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
19-
items:
20-
type: "object"
21-
properties:
22-
model:
23-
type: "string"
24-
description: "The name of the model"
25-
rank:
26-
type: "number"
27-
description: "Order of preference of the model, 1 has the best output"
28-
"required": [ "ordered_models" ]
8+
tool_choice:
9+
type: function
10+
function:
11+
name: "make_partial_leaderboard"
12+
tools:
13+
- type: function
14+
function:
15+
name: "make_partial_leaderboard"
16+
description: "Make a leaderboard of models given a list of the models ordered by the preference of their outputs."
17+
parameters:
18+
type: "object"
19+
properties:
20+
ordered_models:
21+
type: "array"
22+
description: "A list of models ordered by the preference of their outputs. The first model in the list has the best output."
23+
items:
24+
type: "object"
25+
properties:
26+
model:
27+
type: "string"
28+
description: "The name of the model"
29+
rank:
30+
type: "number"
31+
description: "Order of preference of the model, 1 has the best output"
32+
additionalProperties: false
33+
required: [ "model", "rank" ]
34+
additionalProperties: false
35+
required: [ "ordered_models" ]
2936
fn_completion_parser: "pipeline_meta_parser"
3037
completion_parser_kwargs:
3138
parsers_to_kwargs:

src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,23 @@ chatgpt_fn:
55
model_name: "gpt-3.5-turbo-16k-0613"
66
max_tokens: 50
77
temperature: 0
8-
function_call:
9-
name: "print_best_model"
10-
functions:
11-
- name: "print_best_model"
12-
description: "Print the best model given the preferred output."
13-
parameters:
14-
type: "object"
15-
properties:
16-
best_output:
17-
type: "string"
18-
description: "Name of the best output, should be 'Output (a)' or 'Output (b)'"
19-
"required": [ "best_output" ]
8+
tool_choice:
9+
type: function
10+
function:
11+
name: "print_best_model"
12+
tools:
13+
- type: function
14+
function:
15+
name: "print_best_model"
16+
description: "Print the best model given the preferred output."
17+
parameters:
18+
type: "object"
19+
properties:
20+
best_output:
21+
type: "string"
22+
description: "Name of the best output, should be 'Output (a)' or 'Output (b)'"
23+
additionalProperties: false
24+
required: [ "best_output" ]
2025
completion_parser_kwargs:
2126
outputs_to_match:
2227
1: '(?i)output \(a\)'

0 commit comments

Comments
 (0)