Skip to content

Commit bc30105

Browse files
authored
test: reorganize docstore test suite to isolate dataframe tests (#8684)
* reorganize docstore test suite to isolate dataframe tests * improve docstring * include FilterDocumentsTestWithDataframe in InMemoryDocumentStore tests
1 parent 5539f6c commit bc30105

File tree

3 files changed

+152
-106
lines changed

3 files changed

+152
-106
lines changed

haystack/testing/document_store.py

Lines changed: 145 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -174,74 +174,86 @@ def test_delete_documents_non_existing_document(self, document_store: DocumentSt
174174
assert document_store.count_documents() == 1
175175

176176

177-
class FilterableDocsFixtureMixin:
177+
def create_filterable_docs(include_dataframe_docs: bool = False) -> List[Document]:
178178
"""
179-
Mixin class that adds a filterable_docs() fixture to a test class.
179+
Create a list of filterable documents to be used in the filterable_docs and filterable_docs_with_dataframe fixtures.
180180
"""
181181

182-
@pytest.fixture
183-
def filterable_docs(self) -> List[Document]:
184-
"""Fixture that returns a list of Documents that can be used to test filtering."""
185-
documents = []
186-
for i in range(3):
187-
documents.append(
188-
Document(
189-
content=f"A Foo Document {i}",
190-
meta={
191-
"name": f"name_{i}",
192-
"page": "100",
193-
"chapter": "intro",
194-
"number": 2,
195-
"date": "1969-07-21T20:17:40",
196-
},
197-
embedding=_random_embeddings(768),
198-
)
182+
documents = []
183+
for i in range(3):
184+
documents.append(
185+
Document(
186+
content=f"A Foo Document {i}",
187+
meta={
188+
"name": f"name_{i}",
189+
"page": "100",
190+
"chapter": "intro",
191+
"number": 2,
192+
"date": "1969-07-21T20:17:40",
193+
},
194+
embedding=_random_embeddings(768),
199195
)
200-
documents.append(
201-
Document(
202-
content=f"A Bar Document {i}",
203-
meta={
204-
"name": f"name_{i}",
205-
"page": "123",
206-
"chapter": "abstract",
207-
"number": -2,
208-
"date": "1972-12-11T19:54:58",
209-
},
210-
embedding=_random_embeddings(768),
211-
)
196+
)
197+
documents.append(
198+
Document(
199+
content=f"A Bar Document {i}",
200+
meta={
201+
"name": f"name_{i}",
202+
"page": "123",
203+
"chapter": "abstract",
204+
"number": -2,
205+
"date": "1972-12-11T19:54:58",
206+
},
207+
embedding=_random_embeddings(768),
212208
)
213-
documents.append(
214-
Document(
215-
content=f"A Foobar Document {i}",
216-
meta={
217-
"name": f"name_{i}",
218-
"page": "90",
219-
"chapter": "conclusion",
220-
"number": -10,
221-
"date": "1989-11-09T17:53:00",
222-
},
223-
embedding=_random_embeddings(768),
224-
)
209+
)
210+
documents.append(
211+
Document(
212+
content=f"A Foobar Document {i}",
213+
meta={
214+
"name": f"name_{i}",
215+
"page": "90",
216+
"chapter": "conclusion",
217+
"number": -10,
218+
"date": "1989-11-09T17:53:00",
219+
},
220+
embedding=_random_embeddings(768),
225221
)
226-
documents.append(
227-
Document(
228-
content=f"Document {i} without embedding",
229-
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
230-
)
222+
)
223+
documents.append(
224+
Document(
225+
content=f"Document {i} without embedding",
226+
meta={"name": f"name_{i}", "no_embedding": True, "chapter": "conclusion"},
231227
)
228+
)
229+
documents.append(
230+
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
231+
)
232+
documents.append(
233+
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
234+
)
235+
236+
if include_dataframe_docs:
237+
for i in range(3):
232238
documents.append(Document(dataframe=pd.DataFrame([i]), meta={"name": f"table_doc_{i}"}))
233-
documents.append(
234-
Document(content=f"Doc {i} with zeros emb", meta={"name": "zeros_doc"}, embedding=TEST_EMBEDDING_1)
235-
)
236-
documents.append(
237-
Document(content=f"Doc {i} with ones emb", meta={"name": "ones_doc"}, embedding=TEST_EMBEDDING_2)
238-
)
239-
return documents
239+
240+
return documents
241+
242+
243+
class FilterableDocsFixtureMixin:
244+
"""
245+
Mixin class that adds a filterable_docs() fixture to a test class.
246+
"""
247+
248+
@pytest.fixture
249+
def filterable_docs(self) -> List[Document]:
250+
"""Fixture that returns a list of Documents that can be used to test filtering."""
251+
return create_filterable_docs(include_dataframe_docs=False)
240252

241253

242254
class FilterDocumentsTest(AssertDocumentsEqualMixin, FilterableDocsFixtureMixin):
243255
"""
244-
Utility class to test a Document Store `filter_documents` method using different types of filters.
256+
Utility class to test a Document Store `filter_documents` method using different types of filters.
245257
246258
To use it create a custom test class and override the `document_store` fixture to return your Document Store.
247259
Example usage:
@@ -270,16 +282,6 @@ def test_comparison_equal(self, document_store, filterable_docs):
270282
result = document_store.filter_documents(filters={"field": "meta.number", "operator": "==", "value": 100})
271283
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") == 100])
272284

273-
def test_comparison_equal_with_dataframe(self, document_store, filterable_docs):
274-
"""Test filter_documents() with == comparator and dataframe"""
275-
document_store.write_documents(filterable_docs)
276-
result = document_store.filter_documents(
277-
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
278-
)
279-
self.assert_documents_are_equal(
280-
result, [d for d in filterable_docs if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))]
281-
)
282-
283285
def test_comparison_equal_with_none(self, document_store, filterable_docs):
284286
"""Test filter_documents() with == comparator and None"""
285287
document_store.write_documents(filterable_docs)
@@ -293,16 +295,6 @@ def test_comparison_not_equal(self, document_store, filterable_docs):
293295
result = document_store.filter_documents({"field": "meta.number", "operator": "!=", "value": 100})
294296
self.assert_documents_are_equal(result, [d for d in filterable_docs if d.meta.get("number") != 100])
295297

296-
def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs):
297-
"""Test filter_documents() with != comparator and dataframe"""
298-
document_store.write_documents(filterable_docs)
299-
result = document_store.filter_documents(
300-
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
301-
)
302-
self.assert_documents_are_equal(
303-
result, [d for d in filterable_docs if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))]
304-
)
305-
306298
def test_comparison_not_equal_with_none(self, document_store, filterable_docs):
307299
"""Test filter_documents() with != comparator and None"""
308300
document_store.write_documents(filterable_docs)
@@ -340,12 +332,6 @@ def test_comparison_greater_than_with_string(self, document_store, filterable_do
340332
with pytest.raises(FilterError):
341333
document_store.filter_documents(filters={"field": "meta.number", "operator": ">", "value": "1"})
342334

343-
def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs):
344-
"""Test filter_documents() with > comparator and dataframe"""
345-
document_store.write_documents(filterable_docs)
346-
with pytest.raises(FilterError):
347-
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})
348-
349335
def test_comparison_greater_than_with_list(self, document_store, filterable_docs):
350336
"""Test filter_documents() with > comparator and list"""
351337
document_store.write_documents(filterable_docs)
@@ -389,14 +375,6 @@ def test_comparison_greater_than_equal_with_string(self, document_store, filtera
389375
with pytest.raises(FilterError):
390376
document_store.filter_documents(filters={"field": "meta.number", "operator": ">=", "value": "1"})
391377

392-
def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs):
393-
"""Test filter_documents() with >= comparator and dataframe"""
394-
document_store.write_documents(filterable_docs)
395-
with pytest.raises(FilterError):
396-
document_store.filter_documents(
397-
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
398-
)
399-
400378
def test_comparison_greater_than_equal_with_list(self, document_store, filterable_docs):
401379
"""Test filter_documents() with >= comparator and list"""
402380
document_store.write_documents(filterable_docs)
@@ -440,12 +418,6 @@ def test_comparison_less_than_with_string(self, document_store, filterable_docs)
440418
with pytest.raises(FilterError):
441419
document_store.filter_documents(filters={"field": "meta.number", "operator": "<", "value": "1"})
442420

443-
def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs):
444-
"""Test filter_documents() with < comparator and dataframe"""
445-
document_store.write_documents(filterable_docs)
446-
with pytest.raises(FilterError):
447-
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})
448-
449421
def test_comparison_less_than_with_list(self, document_store, filterable_docs):
450422
"""Test filter_documents() with < comparator and list"""
451423
document_store.write_documents(filterable_docs)
@@ -489,14 +461,6 @@ def test_comparison_less_than_equal_with_string(self, document_store, filterable
489461
with pytest.raises(FilterError):
490462
document_store.filter_documents(filters={"field": "meta.number", "operator": "<=", "value": "1"})
491463

492-
def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs):
493-
"""Test filter_documents() with <= comparator and dataframe"""
494-
document_store.write_documents(filterable_docs)
495-
with pytest.raises(FilterError):
496-
document_store.filter_documents(
497-
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
498-
)
499-
500464
def test_comparison_less_than_equal_with_list(self, document_store, filterable_docs):
501465
"""Test filter_documents() with <= comparator and list"""
502466
document_store.write_documents(filterable_docs)
@@ -638,6 +602,83 @@ def test_missing_condition_value_key(self, document_store, filterable_docs):
638602
)
639603

640604

605+
class FilterableDocsFixtureMixinWithDataframe:
606+
"""
607+
Mixin class that adds a filterable_docs_with_dataframe() fixture to a test class, including dataframe documents.
608+
"""
609+
610+
@pytest.fixture
611+
def filterable_docs_with_dataframe(self) -> List[Document]:
612+
"""Fixture that returns a list of Documents including dataframe documents."""
613+
documents = create_filterable_docs(include_dataframe_docs=True)
614+
615+
return documents
616+
617+
618+
class FilterDocumentsTestWithDataframe(AssertDocumentsEqualMixin, FilterableDocsFixtureMixinWithDataframe):
619+
"""
620+
Utility class to test a Document Store `filter_documents` method specifically for DataFrame documents.
621+
"""
622+
623+
def test_comparison_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
624+
"""Test filter_documents() with == comparator and dataframe"""
625+
document_store.write_documents(filterable_docs_with_dataframe)
626+
result = document_store.filter_documents(
627+
filters={"field": "dataframe", "operator": "==", "value": pd.DataFrame([1])}
628+
)
629+
self.assert_documents_are_equal(
630+
result,
631+
[
632+
d
633+
for d in filterable_docs_with_dataframe
634+
if d.dataframe is not None and d.dataframe.equals(pd.DataFrame([1]))
635+
],
636+
)
637+
638+
def test_comparison_not_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
639+
"""Test filter_documents() with != comparator and dataframe"""
640+
document_store.write_documents(filterable_docs_with_dataframe)
641+
result = document_store.filter_documents(
642+
filters={"field": "dataframe", "operator": "!=", "value": pd.DataFrame([1])}
643+
)
644+
self.assert_documents_are_equal(
645+
result,
646+
[
647+
d
648+
for d in filterable_docs_with_dataframe
649+
if d.dataframe is None or not d.dataframe.equals(pd.DataFrame([1]))
650+
],
651+
)
652+
653+
def test_comparison_greater_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
654+
"""Test filter_documents() with > comparator and dataframe"""
655+
document_store.write_documents(filterable_docs_with_dataframe)
656+
with pytest.raises(FilterError):
657+
document_store.filter_documents(filters={"field": "dataframe", "operator": ">", "value": pd.DataFrame([1])})
658+
659+
def test_comparison_greater_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
660+
"""Test filter_documents() with >= comparator and dataframe"""
661+
document_store.write_documents(filterable_docs_with_dataframe)
662+
with pytest.raises(FilterError):
663+
document_store.filter_documents(
664+
filters={"field": "dataframe", "operator": ">=", "value": pd.DataFrame([1])}
665+
)
666+
667+
def test_comparison_less_than_with_dataframe(self, document_store, filterable_docs_with_dataframe):
668+
"""Test filter_documents() with < comparator and dataframe"""
669+
document_store.write_documents(filterable_docs_with_dataframe)
670+
with pytest.raises(FilterError):
671+
document_store.filter_documents(filters={"field": "dataframe", "operator": "<", "value": pd.DataFrame([1])})
672+
673+
def test_comparison_less_than_equal_with_dataframe(self, document_store, filterable_docs_with_dataframe):
674+
"""Test filter_documents() with <= comparator and dataframe"""
675+
document_store.write_documents(filterable_docs_with_dataframe)
676+
with pytest.raises(FilterError):
677+
document_store.filter_documents(
678+
filters={"field": "dataframe", "operator": "<=", "value": pd.DataFrame([1])}
679+
)
680+
681+
641682
class DocumentStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, FilterDocumentsTest):
642683
@pytest.fixture
643684
def document_store(self) -> DocumentStore:
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
enhancements:
3+
- |
4+
Reorganized the document store test suite to isolate dataframe filter tests.
5+
This change prepares for potential future deprecation of the Document class's dataframe field.

test/document_stores/test_in_memory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
from haystack import Document
1212
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1313
from haystack.document_stores.in_memory import InMemoryDocumentStore
14-
from haystack.testing.document_store import DocumentStoreBaseTests
14+
from haystack.testing.document_store import DocumentStoreBaseTests, FilterDocumentsTestWithDataframe
1515

1616

17-
class TestMemoryDocumentStore(DocumentStoreBaseTests): # pylint: disable=R0904
17+
class TestMemoryDocumentStore(DocumentStoreBaseTests, FilterDocumentsTestWithDataframe): # pylint: disable=R0904
1818
"""
1919
Test InMemoryDocumentStore's specific features
2020
"""

0 commit comments

Comments
 (0)