@@ -141,6 +141,39 @@ def test_constructor_parameter_does_not_change(self):
141
141
retriever .run (retrieved_documents = [Document .from_dict (doc )], window_size = 1 )
142
142
assert retriever .window_size == 5
143
143
144
+ def test_context_documents_returned_are_ordered_by_split_idx_start (self ):
145
+ docs = []
146
+ accumulated_length = 0
147
+ for sent in range (10 ):
148
+ content = f"Sentence { sent } ."
149
+ docs .append (
150
+ Document (
151
+ content = content ,
152
+ meta = {
153
+ "id" : f"doc_{ sent } " ,
154
+ "split_idx_start" : accumulated_length ,
155
+ "source_id" : "source1" ,
156
+ "split_id" : sent ,
157
+ },
158
+ )
159
+ )
160
+ accumulated_length += len (content )
161
+
162
+ import random
163
+
164
+ random .shuffle (docs )
165
+
166
+ doc_store = InMemoryDocumentStore ()
167
+ doc_store .write_documents (docs )
168
+ retriever = SentenceWindowRetriever (document_store = doc_store , window_size = 3 )
169
+
170
+ # run the retriever with a document whose content = "Sentence 4."
171
+ result = retriever .run (retrieved_documents = [doc for doc in docs if doc .content == "Sentence 4." ])
172
+
173
+ # assert that the context documents are in the correct order
174
+ assert len (result ["context_documents" ]) == 7
175
+ assert [doc .meta ["split_idx_start" ] for doc in result ["context_documents" ]] == [11 , 22 , 33 , 44 , 55 , 66 , 77 ]
176
+
144
177
@pytest .mark .integration
145
178
def test_run_with_pipeline (self ):
146
179
splitter = DocumentSplitter (split_length = 1 , split_overlap = 0 , split_by = "sentence" )
@@ -165,13 +198,13 @@ def test_run_with_pipeline(self):
165
198
"This is a text with some words. There is a second sentence. And there is also a third sentence. "
166
199
"It also contains a fourth sentence. And a fifth sentence."
167
200
]
168
- assert len (result ["sentence_window_retriever" ]["context_documents" ][ 0 ] ) == 5
201
+ assert len (result ["sentence_window_retriever" ]["context_documents" ]) == 5
169
202
170
203
result = pipe .run ({"bm25_retriever" : {"query" : "third" }, "sentence_window_retriever" : {"window_size" : 1 }})
171
204
assert result ["sentence_window_retriever" ]["context_windows" ] == [
172
205
" There is a second sentence. And there is also a third sentence. It also contains a fourth sentence."
173
206
]
174
- assert len (result ["sentence_window_retriever" ]["context_documents" ][ 0 ] ) == 3
207
+ assert len (result ["sentence_window_retriever" ]["context_documents" ]) == 3
175
208
176
209
@pytest .mark .integration
177
210
def test_serialization_deserialization_in_pipeline (self ):
0 commit comments