Skip to content

Commit a15ad81

Browse files
author
b.nativi
committed
external tests for AnswerCorrectness, ContextPrecision, ContextRecall part 2
1 parent 1ee343e commit a15ad81

File tree

3 files changed

+460
-4
lines changed

3 files changed

+460
-4
lines changed

api/valor_api/schemas/types.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ def _match_annotation_to_implied_task_type(
9494
and annotation.context_list is None
9595
):
9696
implied_type = ["embedding"]
97-
# text generation tasks only support text and optionally context_list
97+
# text generation tasks only support text and context_list, although some metrics only use text or context_list
9898
elif (
99-
annotation.text is not None
99+
(annotation.text is not None or annotation.context_list is not None)
100100
and not annotation.labels
101101
and annotation.bounding_box is None
102102
and annotation.polygon is None

integration_tests/external/conftest.py

Lines changed: 254 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,90 @@
33
from valor import Annotation, Datum, GroundTruth, Prediction
44

55

6+
@pytest.fixture
7+
def answer_correctness_q0() -> Datum:
8+
return Datum(
9+
uid="uid0",
10+
text="""Did John Adams get along with Alexander Hamilton?""",
11+
metadata={
12+
"category": "history",
13+
},
14+
)
15+
16+
17+
@pytest.fixture
18+
def answer_correctness_q1() -> Datum:
19+
return Datum(
20+
uid="uid1",
21+
text="""Did Lincoln win the election of 1860?""",
22+
metadata={
23+
"category": "history",
24+
},
25+
)
26+
27+
28+
@pytest.fixture
29+
def answer_correctness_datums(
30+
answer_correctness_q0: Datum,
31+
answer_correctness_q1: Datum,
32+
) -> list[Datum]:
33+
return [answer_correctness_q0, answer_correctness_q1]
34+
35+
36+
@pytest.fixture
37+
def answer_correctness_predictions() -> list[str]:
38+
return [
39+
"""John Adams and Alexander Hamilton did not get along. John Adams and Alexander Hamilton were both federalists.""",
40+
"""Lincoln won the election of 1860.""",
41+
]
42+
43+
44+
@pytest.fixture
45+
def answer_correctness_groundtruths() -> list[str]:
46+
return [
47+
"""John Adams and Alexander Hamilton did not get along. John Adams and Alexander Hamilton held opposing views on the role of the federal government.""",
48+
"""Lincoln won the election of 1860.""",
49+
]
50+
51+
52+
@pytest.fixture
53+
def answer_correctness_gt_questions(
54+
answer_correctness_datums: list[Datum],
55+
answer_correctness_groundtruths: list[str],
56+
) -> list[GroundTruth]:
57+
assert len(answer_correctness_datums) == len(
58+
answer_correctness_groundtruths
59+
)
60+
return [
61+
GroundTruth(
62+
datum=answer_correctness_datums[i],
63+
annotations=[Annotation(text=answer_correctness_groundtruths[i])],
64+
)
65+
for i in range(len(answer_correctness_datums))
66+
]
67+
68+
69+
@pytest.fixture
70+
def answer_correctness_pred_answers(
71+
answer_correctness_datums: list[Datum],
72+
answer_correctness_predictions: list[str],
73+
) -> list[GroundTruth]:
74+
assert len(answer_correctness_datums) == len(
75+
answer_correctness_predictions
76+
)
77+
return [
78+
Prediction(
79+
datum=answer_correctness_datums[i],
80+
annotations=[
81+
Annotation(
82+
text=answer_correctness_predictions[i],
83+
)
84+
],
85+
)
86+
for i in range(len(answer_correctness_datums))
87+
]
88+
89+
690
@pytest.fixture
791
def answer_relevance_q0() -> Datum:
892
return Datum(
@@ -229,6 +313,174 @@ def coherence_pred_answers(
229313
]
230314

231315

316+
@pytest.fixture
317+
def context_precision_q0() -> Datum:
318+
return Datum(
319+
uid="uid0",
320+
text="""What are some foods that Lewis Hamilton likes?""",
321+
)
322+
323+
324+
@pytest.fixture
325+
def context_precision_q1() -> Datum:
326+
return Datum(
327+
uid="uid1",
328+
text="""Name the first and third United States presidents.""",
329+
)
330+
331+
332+
@pytest.fixture
333+
def context_precision_datums(
334+
context_precision_q0: Datum,
335+
context_precision_q1: Datum,
336+
) -> list[Datum]:
337+
return [context_precision_q0, context_precision_q1]
338+
339+
340+
@pytest.fixture
341+
def context_precision_groundtruths() -> list[str]:
342+
return [
343+
"""Lewis Hamilton likes spicy wings.""",
344+
"""The first president of the United States was George Washington. The third president of the United States was Thomas Jefferson.""",
345+
]
346+
347+
348+
@pytest.fixture
349+
def context_precision_context_list() -> list[list[str]]:
350+
return [
351+
[
352+
"""Lewis Hamilton is an F1 driver.""",
353+
"""Lewis Hamilton likes spicy wings.""",
354+
"""The F1 driver with the most wins of all time is Lewis Hamilton.""",
355+
"""Taylor Swift likes chicken tenders.""",
356+
],
357+
[
358+
"""The first president of the United States was George Washington.""",
359+
"""The second president of the United States was John Adams.""",
360+
"""The third president of the United States was Thomas Jefferson.""",
361+
"""The fourth president of the United States was James Madison.""",
362+
],
363+
]
364+
365+
366+
@pytest.fixture
367+
def context_precision_gt_questions(
368+
context_precision_datums: list[Datum],
369+
context_precision_groundtruths: list[str],
370+
) -> list[GroundTruth]:
371+
assert len(context_precision_datums) == len(context_precision_groundtruths)
372+
return [
373+
GroundTruth(
374+
datum=context_precision_datums[i],
375+
annotations=[Annotation(text=context_precision_groundtruths[i])],
376+
)
377+
for i in range(len(context_precision_datums))
378+
]
379+
380+
381+
@pytest.fixture
382+
def context_precision_pred_answers(
383+
context_precision_datums: list[Datum],
384+
context_precision_context_list: list[list[str]],
385+
) -> list[GroundTruth]:
386+
assert len(context_precision_datums) == len(context_precision_context_list)
387+
return [
388+
Prediction(
389+
datum=context_precision_datums[i],
390+
annotations=[
391+
Annotation(
392+
context_list=context_precision_context_list[i],
393+
)
394+
],
395+
)
396+
for i in range(len(context_precision_datums))
397+
]
398+
399+
400+
@pytest.fixture
401+
def context_recall_q0() -> Datum:
402+
return Datum(
403+
uid="uid0",
404+
)
405+
406+
407+
@pytest.fixture
408+
def context_recall_q1() -> Datum:
409+
return Datum(
410+
uid="uid1",
411+
)
412+
413+
414+
@pytest.fixture
415+
def context_recall_datums(
416+
context_recall_q0: Datum,
417+
context_recall_q1: Datum,
418+
) -> list[Datum]:
419+
return [context_recall_q0, context_recall_q1]
420+
421+
422+
@pytest.fixture
423+
def context_recall_groundtruths() -> list[str]:
424+
return [
425+
"""Lewis Hamilton likes spicy wings. Taylor Swift likes chicken tenders.""",
426+
"""The first U.S. president was George Washington. The second U.S. president was John Adams. The third U.S. president was Thomas Jefferson.""",
427+
]
428+
429+
430+
@pytest.fixture
431+
def context_recall_context_list() -> list[list[str]]:
432+
return [
433+
[
434+
"""Lewis Hamilton is an F1 driver.""",
435+
"""Lewis Hamilton likes spicy wings.""",
436+
],
437+
[
438+
"""The first president of the United States was George Washington.""",
439+
"""The second president of the United States was John Adams.""",
440+
"""The third president of the United States was Thomas Jefferson.""",
441+
"""The fourth president of the United States was James Madison.""",
442+
],
443+
]
444+
445+
446+
@pytest.fixture
447+
def context_recall_gt_questions(
448+
context_recall_datums: list[Datum],
449+
context_recall_groundtruths: list[str],
450+
) -> list[GroundTruth]:
451+
assert len(context_recall_datums) == len(context_recall_groundtruths)
452+
return [
453+
GroundTruth(
454+
datum=context_recall_datums[i],
455+
annotations=[
456+
Annotation(
457+
text=context_recall_groundtruths[i],
458+
)
459+
],
460+
)
461+
for i in range(len(context_recall_datums))
462+
]
463+
464+
465+
@pytest.fixture
466+
def context_recall_pred_answers(
467+
context_recall_datums: list[Datum],
468+
context_recall_context_list: list[list[str]],
469+
) -> list[GroundTruth]:
470+
assert len(context_recall_datums) == len(context_recall_context_list)
471+
return [
472+
Prediction(
473+
datum=context_recall_datums[i],
474+
annotations=[
475+
Annotation(
476+
context_list=context_recall_context_list[i],
477+
)
478+
],
479+
)
480+
for i in range(len(context_recall_datums))
481+
]
482+
483+
232484
@pytest.fixture
233485
def context_relevance_q0() -> Datum:
234486
return Datum(
@@ -268,7 +520,7 @@ def context_relevance_context_list() -> list[list[str]]:
268520
"""Lewis Hamilton is an F1 driver.""",
269521
"""Lewis Hamilton likes spicy wings.""",
270522
"""The F1 driver with the most wins of all time is Lewis Hamilton.""",
271-
"""Taylor Swift likes chicken tendors.""",
523+
"""Taylor Swift likes chicken tenders.""",
272524
],
273525
[
274526
"""The first president of the United States was George Washington.""",
@@ -354,7 +606,7 @@ def faithfulness_context_list() -> list[list[str]]:
354606
"""Lewis Hamilton is an F1 driver.""",
355607
"""Lewis Hamilton likes spicy wings.""",
356608
"""The F1 driver with the most wins of all time is Lewis Hamilton.""",
357-
"""Taylor Swift likes chicken tendors.""",
609+
"""Taylor Swift likes chicken tenders.""",
358610
],
359611
[
360612
"""George Washington's favorite color was yellow.""",

0 commit comments

Comments
 (0)