@@ -43,6 +43,7 @@ def __init__(
43
43
top_k : Optional [int ] = None ,
44
44
ranking_mode : Literal ["reciprocal_rank_fusion" , "linear_score" ] = "reciprocal_rank_fusion" ,
45
45
sort_order : Literal ["ascending" , "descending" ] = "descending" ,
46
+ missing_meta : Literal ["drop" , "top" , "bottom" ] = "bottom" ,
46
47
meta_value_type : Optional [Literal ["float" , "int" , "date" ]] = None ,
47
48
):
48
49
"""
@@ -65,6 +66,14 @@ def __init__(
65
66
:param sort_order:
66
67
Whether to sort the meta field by ascending or descending order.
67
68
Possible values are `descending` (default) and `ascending`.
69
+ :param missing_meta:
70
+ What to do with documents that are missing the sorting metadata field.
71
+ Possible values are:
72
+ - 'drop' will drop the documents entirely.
73
+ - 'top' will place the documents at the top of the metadata-sorted list
74
+ (regardless of 'ascending' or 'descending').
75
+ - 'bottom' will place the documents at the bottom of metadata-sorted list
76
+ (regardless of 'ascending' or 'descending').
68
77
:param meta_value_type:
69
78
Parse the meta value into the data type specified before sorting.
70
79
This will only work if all meta values stored under `meta_field` in the provided documents are strings.
@@ -82,11 +91,13 @@ def __init__(
82
91
self .top_k = top_k
83
92
self .ranking_mode = ranking_mode
84
93
self .sort_order = sort_order
94
+ self .missing_meta = missing_meta
85
95
self ._validate_params (
86
96
weight = self .weight ,
87
97
top_k = self .top_k ,
88
98
ranking_mode = self .ranking_mode ,
89
99
sort_order = self .sort_order ,
100
+ missing_meta = self .missing_meta ,
90
101
meta_value_type = meta_value_type ,
91
102
)
92
103
self .meta_value_type = meta_value_type
@@ -97,6 +108,7 @@ def _validate_params(
97
108
top_k : Optional [int ],
98
109
ranking_mode : Literal ["reciprocal_rank_fusion" , "linear_score" ],
99
110
sort_order : Literal ["ascending" , "descending" ],
111
+ missing_meta : Literal ["drop" , "top" , "bottom" ],
100
112
meta_value_type : Optional [Literal ["float" , "int" , "date" ]],
101
113
):
102
114
if top_k is not None and top_k <= 0 :
@@ -125,6 +137,14 @@ def _validate_params(
125
137
"MetaFieldRanker." % sort_order
126
138
)
127
139
140
+ if missing_meta not in ["drop" , "top" , "bottom" ]:
141
+ raise ValueError (
142
+ "The value of parameter <missing_meta> must be 'drop', 'top', or 'bottom', "
143
+ "but is currently set to '%s'.\n "
144
+ "Change the <missing_meta> value to 'drop', 'top', or 'bottom' when initializing the "
145
+ "MetaFieldRanker." % missing_meta
146
+ )
147
+
128
148
if meta_value_type not in ["float" , "int" , "date" , None ]:
129
149
raise ValueError (
130
150
"The value of parameter <meta_value_type> must be 'float', 'int', 'date' or None but is "
@@ -141,6 +161,7 @@ def run(
141
161
weight : Optional [float ] = None ,
142
162
ranking_mode : Optional [Literal ["reciprocal_rank_fusion" , "linear_score" ]] = None ,
143
163
sort_order : Optional [Literal ["ascending" , "descending" ]] = None ,
164
+ missing_meta : Optional [Literal ["drop" , "top" , "bottom" ]] = None ,
144
165
meta_value_type : Optional [Literal ["float" , "int" , "date" ]] = None ,
145
166
):
146
167
"""
@@ -171,6 +192,15 @@ def run(
171
192
Whether to sort the meta field by ascending or descending order.
172
193
Possible values are `descending` (default) and `ascending`.
173
194
If not provided, the sort_order provided at initialization time is used.
195
+ :param missing_meta:
196
+ What to do with documents that are missing the sorting metadata field.
197
+ Possible values are:
198
+ - 'drop' will drop the documents entirely.
199
+ - 'top' will place the documents at the top of the metadata-sorted list
200
+ (regardless of 'ascending' or 'descending').
201
+ - 'bottom' will place the documents at the bottom of metadata-sorted list
202
+ (regardless of 'ascending' or 'descending').
203
+ If not provided, the missing_meta provided at initialization time is used.
174
204
:param meta_value_type:
175
205
Parse the meta value into the data type specified before sorting.
176
206
This will only work if all meta values stored under `meta_field` in the provided documents are strings.
@@ -199,12 +229,14 @@ def run(
199
229
weight = weight if weight is not None else self .weight
200
230
ranking_mode = ranking_mode or self .ranking_mode
201
231
sort_order = sort_order or self .sort_order
232
+ missing_meta = missing_meta or self .missing_meta
202
233
meta_value_type = meta_value_type or self .meta_value_type
203
234
self ._validate_params (
204
235
weight = weight ,
205
236
top_k = top_k ,
206
237
ranking_mode = ranking_mode ,
207
238
sort_order = sort_order ,
239
+ missing_meta = missing_meta ,
208
240
meta_value_type = meta_value_type ,
209
241
)
210
242
@@ -227,13 +259,27 @@ def run(
227
259
return {"documents" : documents [:top_k ]}
228
260
229
261
if len (docs_missing_meta_field ) > 0 :
230
- logger .warning (
231
- "The parameter <meta_field> is currently set to '{meta_field}' but the Documents with IDs {document_ids} don't have this meta key.\n "
232
- "These Documents will be placed at the end of the sorting order." ,
233
- meta_field = self .meta_field ,
234
- document_ids = "," .join ([doc .id for doc in docs_missing_meta_field ]),
262
+ warning_start = (
263
+ f"The parameter <meta_field> is currently set to '{ self .meta_field } ' but the Documents "
264
+ f"with IDs { ',' .join ([doc .id for doc in docs_missing_meta_field ])} don't have this meta key.\n "
235
265
)
236
266
267
+ if missing_meta == "bottom" :
268
+ logger .warning (
269
+ "{warning_start}Because the parameter <missing_meta> is set to 'bottom', these Documents will be placed at the end of the sorting order." ,
270
+ warning_start = warning_start ,
271
+ )
272
+ elif missing_meta == "top" :
273
+ logger .warning (
274
+ "{warning_start}Because the parameter <missing_meta> is set to 'top', these Documents will be placed at the top of the sorting order." ,
275
+ warning_start = warning_start ,
276
+ )
277
+ else :
278
+ logger .warning (
279
+ "{warning_start}Because the parameter <missing_meta> is set to 'drop', these Documents will be removed from the list of retrieved Documents." ,
280
+ warning_start = warning_start ,
281
+ )
282
+
237
283
# If meta_value_type is provided try to parse the meta values
238
284
parsed_meta = self ._parse_meta (docs_with_meta_field = docs_with_meta_field , meta_value_type = meta_value_type )
239
285
tuple_parsed_meta_and_docs = list (zip (parsed_meta , docs_with_meta_field ))
@@ -252,10 +298,18 @@ def run(
252
298
)
253
299
return {"documents" : documents [:top_k ]}
254
300
255
- # Add the docs missing the meta_field back on the end
301
+ # Merge rankings and handle missing meta fields as specified in the missing_meta parameter
256
302
sorted_by_meta = [doc for meta , doc in tuple_sorted_by_meta ]
257
- sorted_documents = sorted_by_meta + docs_missing_meta_field
258
- sorted_documents = self ._merge_rankings (documents , sorted_documents , weight , ranking_mode )
303
+ if missing_meta == "bottom" :
304
+ sorted_documents = sorted_by_meta + docs_missing_meta_field
305
+ sorted_documents = self ._merge_rankings (documents , sorted_documents , weight , ranking_mode )
306
+ elif missing_meta == "top" :
307
+ sorted_documents = docs_missing_meta_field + sorted_by_meta
308
+ sorted_documents = self ._merge_rankings (documents , sorted_documents , weight , ranking_mode )
309
+ else :
310
+ sorted_documents = sorted_by_meta
311
+ sorted_documents = self ._merge_rankings (docs_with_meta_field , sorted_documents , weight , ranking_mode )
312
+
259
313
return {"documents" : sorted_documents [:top_k ]}
260
314
261
315
def _parse_meta (
0 commit comments