Skip to content

Commit 42b3789

Browse files
davidsbatistaanakin87julian-risch
authored
fix: DocumentRecallEvaluator changing division and adding checks for emptiness of documents (#9380)
* changing division and adding checks for emptiness of documents * adding release notes * adding tests * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Stefano Fiorucci <[email protected]> * attending PR comments * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml * Update releasenotes/notes/updated-doc-recall-eval-uniqueness-59b09082cf8e7593.yaml Co-authored-by: Julian Risch <[email protected]> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <[email protected]> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <[email protected]> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <[email protected]> * Update haystack/components/evaluators/document_recall.py Co-authored-by: Julian Risch <[email protected]> * adding tests * linting --------- Co-authored-by: Stefano Fiorucci <[email protected]> Co-authored-by: Julian Risch <[email protected]>
1 parent aeea3b2 commit 42b3789

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

haystack/components/evaluators/document_recall.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
from enum import Enum
66
from typing import Any, Dict, List, Union
77

8-
from haystack import component, default_to_dict
8+
from haystack import component, default_to_dict, logging
99
from haystack.dataclasses import Document
1010

11+
logger = logging.getLogger(__name__)
12+
1113

1214
class RecallMode(Enum):
1315
"""
@@ -97,7 +99,21 @@ def _recall_multi_hit(ground_truth_documents: List[Document], retrieved_document
9799
unique_retrievals = {p.content for p in retrieved_documents}
98100
retrieved_ground_truths = unique_truths.intersection(unique_retrievals)
99101

100-
return len(retrieved_ground_truths) / len(ground_truth_documents)
102+
if not unique_truths or unique_truths == {""}:
103+
logger.warning(
104+
"There are no ground truth documents or all of them have an empty string as content. "
105+
"Score will be set to 0."
106+
)
107+
return 0.0
108+
109+
if not unique_retrievals or unique_retrievals == {""}:
110+
logger.warning(
111+
"There are no retrieved documents or all of them have an empty string as content. "
112+
"Score will be set to 0."
113+
)
114+
return 0.0
115+
116+
return len(retrieved_ground_truths) / len(unique_truths)
101117

102118
@component.output_types(score=float, individual_scores=List[float])
103119
def run(
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
enhancements:
3+
- |
4+
The `DocumentRecallEvaluator` was updated. Now, when in `MULTI_HIT` mode, the division is over the unique ground truth documents instead of the total number of ground truth documents.
5+
We also added checks for emptiness. If there are no retrieved documents or all of them have an empty string as content, we return 0.0 and log a warning. Likewise, if there are no ground truth documents or all of them have an empty string as content, we return 0.0 and log a warning.

test/components/evaluators/test_document_recall.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ def test_init_with_unknown_mode_string():
1313
DocumentRecallEvaluator(mode="unknown_mode")
1414

1515

16+
def test_init_with_string_mode():
17+
evaluator = DocumentRecallEvaluator(mode="single_hit")
18+
assert evaluator.mode == RecallMode.SINGLE_HIT
19+
20+
evaluator = DocumentRecallEvaluator(mode="multi_hit")
21+
assert evaluator.mode == RecallMode.MULTI_HIT
22+
23+
1624
class TestDocumentRecallEvaluatorSingleHit:
1725
@pytest.fixture
1826
def evaluator(self):
@@ -186,3 +194,27 @@ def test_from_dict(self):
186194
}
187195
new_evaluator = default_from_dict(DocumentRecallEvaluator, data)
188196
assert new_evaluator.mode == RecallMode.MULTI_HIT
197+
198+
def test_empty_ground_truth_documents(self, evaluator):
199+
ground_truth_documents = [[]]
200+
retrieved_documents = [[Document(content="test")]]
201+
score = evaluator.run(ground_truth_documents, retrieved_documents)
202+
assert score == {"individual_scores": [0.0], "score": 0.0}
203+
204+
def test_empty_retrieved_documents(self, evaluator):
205+
ground_truth_documents = [[Document(content="test")]]
206+
retrieved_documents = [[]]
207+
score = evaluator.run(ground_truth_documents, retrieved_documents)
208+
assert score == {"individual_scores": [0.0], "score": 0.0}
209+
210+
def test_empty_string_ground_truth_documents(self, evaluator):
211+
ground_truth_documents = [[Document(content="")]]
212+
retrieved_documents = [[Document(content="test")]]
213+
score = evaluator.run(ground_truth_documents, retrieved_documents)
214+
assert score == {"individual_scores": [0.0], "score": 0.0}
215+
216+
def test_empty_string_retrieved_documents(self, evaluator):
217+
ground_truth_documents = [[Document(content="test")]]
218+
retrieved_documents = [[Document(content="")]]
219+
score = evaluator.run(ground_truth_documents, retrieved_documents)
220+
assert score == {"individual_scores": [0.0], "score": 0.0}

0 commit comments

Comments
 (0)