11
11
import pandas as pd
12
12
13
13
# First Party
14
- from instructlab .eval .ragas import ModelConfig , RagasEvaluator , RunConfig , Sample
14
+ from instructlab .eval .ragas import ModelConfig , RagasEvaluator , RunConfig
15
15
16
16
17
17
class TestRagasEvaluator (unittest .TestCase ):
18
- @patch ("instructlab.eval.ragas.get_openai_client" )
19
- def test_generate_answers_from_model (self , mock_get_openai_client ):
18
+ def test_generate_answers_from_model (self ):
20
19
# mock the OpenAI client to always return "london" for chat completions
20
+ user_input = "What is the capital of France?"
21
+ model_response = "London"
21
22
mock_client = MagicMock ()
22
23
mock_response = MagicMock ()
23
- mock_response .choices [ 0 ]. message . content = "London"
24
+ mock_response .choices = [ MagicMock ( message = MagicMock ( content = model_response ))]
24
25
mock_client .chat .completions .create .return_value = mock_response
25
- mock_get_openai_client .return_value = mock_client
26
26
27
27
# get answers
28
- questions = pd .DataFrame ({"user_input" : ["What is the capital of France?" ]})
28
+ questions = pd .DataFrame ({"user_input" : [user_input ]})
29
29
student_model = ModelConfig (
30
- base_url = "https://your.model.endpoint.com" ,
31
- model_name = "jeeves-512B" ,
32
- api_key = "test-api-key" ,
30
+ model_name = "super-jeeves-8x700B" ,
33
31
)
34
32
evaluator = RagasEvaluator ()
35
- result_df = evaluator ._generate_answers_from_model (questions , student_model )
33
+ result_df = evaluator ._generate_answers_from_model (
34
+ questions , student_model , mock_client
35
+ )
36
36
37
37
# what we expect to see
38
38
expected_df = questions .copy ()
39
- expected_df ["response" ] = ["London" ]
39
+ expected_df ["response" ] = [model_response ]
40
40
41
41
# perform the assertions
42
42
pd .testing .assert_frame_equal (result_df , expected_df )
43
- mock_get_openai_client .assert_called_once_with (
44
- model_api_base = student_model .base_url , api_key = student_model .api_key
45
- )
46
43
mock_client .chat .completions .create .assert_called_once_with (
47
- messages = [student_model .system_prompt , "What is the capital of France?" ],
44
+ messages = [student_model .system_prompt , user_input ],
48
45
model = student_model .model_name ,
49
46
seed = 42 ,
50
47
max_tokens = student_model .max_tokens ,
51
48
temperature = student_model .temperature ,
52
49
)
53
50
51
+ @patch ("instructlab.eval.ragas.ChatOpenAI" )
54
52
@patch ("instructlab.eval.ragas.read_json" )
55
53
@patch ("instructlab.eval.ragas.evaluate" )
56
- @patch ("instructlab.eval.ragas.ChatOpenAI" )
57
54
@patch .object (RagasEvaluator , "_generate_answers_from_model" )
58
55
@patch .object (RagasEvaluator , "_get_metrics" )
59
56
def test_run (
60
57
self ,
61
58
mock_get_metrics : MagicMock ,
62
59
mock_generate_answers_from_model : MagicMock ,
63
- mock_ChatOpenAI : MagicMock ,
64
60
mock_evaluate : MagicMock ,
65
61
mock_read_json : MagicMock ,
62
+ mock_ChatOpenAI : MagicMock ,
66
63
):
67
64
########################################################################
68
65
# SETUP EVERYTHING WE NEED FOR THE TESTS
@@ -74,16 +71,20 @@ def test_run(
74
71
student_model_response = "Paris"
75
72
user_question = "What is the capital of France?"
76
73
golden_answer = "The capital of France is Paris."
74
+ metric = "mocked-metric"
75
+ metric_score = 4.0
77
76
base_ds = [{"user_input" : user_question , "reference" : golden_answer }]
78
- mocked_metric = "mocked-metric"
79
- mocked_metric_score = 4.0
77
+ student_model = ModelConfig (
78
+ model_name = "super-jeeves-8x700B" ,
79
+ )
80
+ run_config = RunConfig (max_retries = 3 , max_wait = 60 , seed = 42 , timeout = 30 )
80
81
81
82
# The following section takes care of mocking function return calls.
82
83
# Ragas is tricky because it has some complex data structures under the hood,
83
84
# so what we have to do is configure the intermediate outputs that we expect
84
85
# to receive from Ragas.
85
86
86
- mock_get_metrics .return_value = [mocked_metric ]
87
+ mock_get_metrics .return_value = [metric ]
87
88
interim_df = DataFrame (
88
89
{
89
90
"user_input" : [user_question ],
@@ -93,7 +94,12 @@ def test_run(
93
94
)
94
95
mock_generate_answers_from_model .return_value = interim_df .copy ()
95
96
mocked_evaluation_ds = EvaluationDataset .from_pandas (interim_df )
96
- mock_ChatOpenAI .return_value = MagicMock ()
97
+ mock_client = MagicMock ()
98
+ mock_response = MagicMock ()
99
+ mock_response .choices = [
100
+ MagicMock (message = MagicMock (content = student_model_response ))
101
+ ]
102
+ mock_client .chat .completions .create .return_value = mock_response
97
103
98
104
# Ragas requires this value to instantiate an EvaluationResult object, so we must provide it.
99
105
# It isn't functionally used for our purposes though.
@@ -109,29 +115,20 @@ def test_run(
109
115
)
110
116
}
111
117
mock_evaluate .return_value = EvaluationResult (
112
- scores = [{mocked_metric : mocked_metric_score }],
118
+ scores = [{metric : metric_score }],
113
119
dataset = mocked_evaluation_ds ,
114
120
ragas_traces = _unimportant_ragas_traces ,
115
121
)
116
122
117
- ########################################################################
118
- # Run the tests
119
- ########################################################################
120
-
121
- # Configure all other inputs that Ragas does not depend on for proper mocking
122
- student_model = ModelConfig (
123
- base_url = "https://api.openai.com" ,
124
- model_name = "pt-3.5-turbo" ,
125
- api_key = "test-api-key" ,
126
- )
127
- run_config = RunConfig (max_retries = 3 , max_wait = 60 , seed = 42 , timeout = 30 )
128
- evaluator = RagasEvaluator ()
129
-
130
123
########################################################################
131
124
# Test case: directly passing a dataset
132
125
########################################################################
126
+ evaluator = RagasEvaluator ()
133
127
result = evaluator .run (
134
- dataset = base_ds , student_model = student_model , run_config = run_config
128
+ dataset = base_ds ,
129
+ student_model = student_model ,
130
+ run_config = run_config ,
131
+ openai_client = mock_client ,
135
132
)
136
133
137
134
self .assertIsInstance (result , EvaluationResult )
@@ -142,11 +139,13 @@ def test_run(
142
139
########################################################################
143
140
# Test case: passing a dataset in via Path to JSONL file
144
141
########################################################################
142
+ evaluator = RagasEvaluator ()
145
143
mock_read_json .return_value = DataFrame (base_ds )
146
144
result = evaluator .run (
147
145
dataset = Path ("dummy_path.jsonl" ),
148
146
student_model = student_model ,
149
147
run_config = run_config ,
148
+ openai_client = mock_client ,
150
149
)
151
150
152
151
self .assertIsInstance (result , EvaluationResult )
@@ -156,6 +155,24 @@ def test_run(
156
155
mock_generate_answers_from_model .assert_called ()
157
156
mock_evaluate .assert_called ()
158
157
158
+ ########################################################################
159
+ # Test case: using the instance attributes
160
+ ########################################################################
161
+ evaluator = RagasEvaluator (
162
+ student_model = student_model ,
163
+ openai_client = mock_client ,
164
+ run_config = run_config ,
165
+ )
166
+ mock_read_json .return_value = DataFrame (base_ds )
167
+ result = evaluator .run (dataset = Path ("dummy_path.jsonl" ))
168
+
169
+ self .assertIsInstance (result , EvaluationResult )
170
+ mock_read_json .assert_called_with (
171
+ Path ("dummy_path.jsonl" ), orient = "records" , lines = True
172
+ )
173
+ mock_generate_answers_from_model .assert_called ()
174
+ mock_evaluate .assert_called ()
175
+
159
176
160
177
if __name__ == "__main__" :
161
178
unittest .main ()
0 commit comments