Skip to content
This repository was archived by the owner on Sep 24, 2019. It is now read-only.

Commit b1243d8

Browse files
author
Anthony Bargnesi
committed
aggregate on full-text search; avoids Mongo limits
A full-text search filter to /api/evidence with a sort on bel_statement only used the text index. This means that the bel_statement sort had to be done in memory. This reaches the 32 MB sort limit with only several tens of thousands of documents. The solution employed here was to use cursored aggregation allowing disk use for sort stages. The solution was introduced as an alternative code path if a FTS filter was included in the HTTP request. Although this did minimize the risk of regression there is a fair bit of to clean up in the mongo access layer. closes #96
1 parent 5d44fd0 commit b1243d8

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

lib/openbel/api/evidence/mongo.rb

+126
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,15 @@ def find_evidence_by_id(value)
5151
end
5252

5353
def find_evidence(filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1)
54+
if includes_fts_search?(filters)
55+
text_search = get_fts_search(filters)
56+
evidence_aggregate(text_search, filters, offset, length, facet, facet_value_limit)
57+
else
58+
evidence_query(filters, offset, length, facet, facet_value_limit)
59+
end
60+
end
61+
62+
def evidence_query(filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1)
5463
query_hash = to_query(filters)
5564
query_opts = query_options(
5665
query_hash,
@@ -72,6 +81,75 @@ def find_evidence(filters = [], offset = 0, length = 0, facet = false, facet_val
7281
results
7382
end
7483

84+
def evidence_aggregate(text_search, filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1)
85+
match_filters = filters.select { |filter|
86+
filter['category'] != 'fts'
87+
}
88+
match = build_filter_query(match_filters)
89+
match[:$and].unshift({
90+
:$text => {
91+
:$search => text_search
92+
}
93+
})
94+
95+
pipeline = [
96+
{
97+
:$match => match
98+
},
99+
{
100+
:$project => {
101+
:_id => 1,
102+
:bel_statement => 1,
103+
:score => {
104+
:$meta => 'textScore'
105+
}
106+
}
107+
},
108+
{
109+
:$sort => {
110+
:score => {
111+
:$meta => 'textScore'
112+
},
113+
:bel_statement => 1
114+
}
115+
}
116+
]
117+
118+
if offset > 0
119+
pipeline << {
120+
:$skip => offset
121+
}
122+
end
123+
124+
if length > 0
125+
pipeline << {
126+
:$limit => length
127+
}
128+
end
129+
130+
fts_cursor = @collection.aggregate(pipeline, {
131+
:allowDiskUse => true,
132+
:cursor => {
133+
:batchSize => 0
134+
}
135+
})
136+
_ids = fts_cursor.map { |doc| doc['_id'] }
137+
138+
facets =
139+
if facet
140+
query_hash = to_query(filters)
141+
facets_cursor = @evidence_facets.find_facets(query_hash, filters, facet_value_limit)
142+
facets_cursor.to_a
143+
else
144+
nil
145+
end
146+
147+
{
148+
:cursor => @collection.find({:_id => {:$in => _ids}}),
149+
:facets => facets
150+
}
151+
end
152+
75153
def find_dataset_evidence(dataset, filters = [], offset = 0, length = 0, facet = false, facet_value_limit = -1)
76154
query_hash = to_query(filters)
77155
query_hash[:$and] ||= []
@@ -197,6 +275,54 @@ def ensure_all_indexes
197275

198276
private
199277

278+
def includes_fts_search?(filters)
279+
filters.any? { |filter|
280+
filter['category'] == 'fts' && filter['name'] == 'search'
281+
}
282+
end
283+
284+
def get_fts_search(filters)
285+
fts_filter = filters.find { |filter|
286+
filter['category'] == 'fts' && filter['name'] == 'search'
287+
}
288+
fts_filter.fetch('value', '')
289+
end
290+
291+
def build_filter_query(filters = [])
292+
{
293+
:$and => filters.map { |filter|
294+
category = filter['category']
295+
name = filter['name']
296+
value = filter['value']
297+
298+
case category
299+
when 'experiment_context'
300+
{
301+
:experiment_context => {
302+
:$elemMatch => {
303+
:name => name.to_s,
304+
:value => value.to_s
305+
}
306+
}
307+
}
308+
when 'metadata'
309+
{
310+
:metadata => {
311+
:$elemMatch => {
312+
:name => name.to_s,
313+
:value => value.to_s
314+
}
315+
}
316+
}
317+
else
318+
{
319+
"#{filter['category']}.#{filter['name']}" => filter['value'].to_s
320+
}
321+
end
322+
}
323+
}
324+
end
325+
200326
def to_query(filters = [])
201327
if !filters || filters.empty?
202328
return {}

0 commit comments

Comments
 (0)