Skip to content

Commit 79d88ac

Browse files
authored
Merge pull request #33851 from vespa-engine/toregge/handle-element-gap-in-near-search-iterator-for-indexed-search
Handle element gap in near search iterator for indexed search.
2 parents 0aaf975 + e8b6466 commit 79d88ac

3 files changed

Lines changed: 37 additions & 11 deletions

File tree

searchlib/src/tests/nearsearch/nearsearch_test.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ class MyQuery {
110110
return _window;
111111
}
112112
const IElementGapInspector& get_element_gap_inspector() const noexcept { return _element_gap_inspector; }
113+
MyQuery& set_element_gap(std::optional<uint32_t> element_gap) {
114+
_element_gap_inspector = MockElementGapInspector(element_gap);
115+
return *this;
116+
}
113117
};
114118

115119
MyQuery::MyQuery(bool ordered, uint32_t window)
@@ -205,6 +209,12 @@ TEST_F(NearSearchTest, element_boundary)
205209
MyTerm bar({69, 70, 71}, {{1, 5, {1}}});
206210
testNearSearch(MyQuery(false, 20).addTerm(foo).addTerm(bar), 0, "near 1");
207211
testNearSearch(MyQuery(true, 20).addTerm(foo).addTerm(bar), 0, "onear 1");
212+
testNearSearch(MyQuery(false, 20).addTerm(foo).addTerm(bar).set_element_gap(0), 69, "near 1");
213+
testNearSearch(MyQuery(true, 20).addTerm(foo).addTerm(bar).set_element_gap(0), 69, "onear 1");
214+
testNearSearch(MyQuery(false, 20).addTerm(foo).addTerm(bar).set_element_gap(14), 69, "near 2");
215+
testNearSearch(MyQuery(true, 20).addTerm(foo).addTerm(bar).set_element_gap(14), 69, "onear 2");
216+
testNearSearch(MyQuery(false, 20).addTerm(foo).addTerm(bar).set_element_gap(15), 0, "near 3");
217+
testNearSearch(MyQuery(true, 20).addTerm(foo).addTerm(bar).set_element_gap(15), 0, "onear 3");
208218
}
209219

210220
TEST_F(NearSearchTest, repeated_terms)

searchlib/src/vespa/searchlib/queryeval/nearsearch.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace search::queryeval {
1616
namespace {
1717

1818
using search::fef::TermFieldMatchDataArray;
19+
using search::fef::TermFieldMatchDataPosition;
1920
using search::fef::TermFieldMatchDataPositionKey;
2021

2122
template<typename T>
@@ -32,6 +33,16 @@ void setup_fields(uint32_t window, const IElementGapInspector& element_gap_inspe
3233
}
3334
}
3435

36+
TermFieldMatchDataPositionKey
37+
calc_window_end_pos(const TermFieldMatchDataPosition& pos, uint32_t window, std::optional<uint32_t> element_gap)
38+
{
39+
if (!element_gap.has_value() || pos.getElementLen() + element_gap.value() > pos.getPosition() + window) {
40+
return { pos.getElementId(), pos.getPosition() + window };
41+
} else {
42+
return { pos.getElementId() + 1, pos.getPosition() + window - pos.getElementLen() - element_gap.value() };
43+
}
44+
}
45+
3546
} // namespace search::queryeval::<unnamed>
3647

3748
NearSearchBase::NearSearchBase(Children terms,
@@ -153,8 +164,15 @@ struct PosIter {
153164
struct Iterators
154165
{
155166
vespalib::PriorityQueue<PosIter> _queue;
156-
TermFieldMatchDataPositionKey _maxOcc;
167+
TermFieldMatchDataPositionKey _maxOcc;
168+
std::optional<uint32_t> _element_gap;
157169

170+
Iterators(std::optional<uint32_t> element_gap)
171+
: _queue(),
172+
_maxOcc(),
173+
_element_gap(element_gap)
174+
{
175+
}
158176
void update(TermFieldMatchDataPositionKey occ)
159177
{
160178
if (_queue.size() == 1 || _maxOcc < occ) { _maxOcc = occ; }
@@ -173,8 +191,7 @@ struct Iterators
173191
bool match(uint32_t window) {
174192
for (;;) {
175193
PosIter &front = _queue.front();
176-
TermFieldMatchDataPositionKey lastAllowed = *front.curPos;
177-
lastAllowed.setPosition(front.curPos->getPosition() + window);
194+
auto lastAllowed = calc_window_end_pos(*front.curPos, window, _element_gap);
178195

179196
if (!(lastAllowed < _maxOcc)) {
180197
return true;
@@ -184,8 +201,7 @@ struct Iterators
184201
if (front.curPos == front.endPos) {
185202
return false;
186203
}
187-
lastAllowed = *front.curPos;
188-
lastAllowed.setPosition(front.curPos->getPosition() + window);
204+
lastAllowed = calc_window_end_pos(*front.curPos, window, _element_gap);
189205
} while (lastAllowed < _maxOcc);
190206

191207
update(*front.curPos);
@@ -199,7 +215,7 @@ struct Iterators
199215
bool
200216
NearSearch::Matcher::match(uint32_t docId)
201217
{
202-
Iterators pos;
218+
Iterators pos(get_element_gap());
203219
for (uint32_t i = 0, len = inputs().size(); i < len; ++i) {
204220
const search::fef::TermFieldMatchData *term = inputs()[i];
205221
if (term->getDocId() != docId || term->begin() == term->end()) {
@@ -263,15 +279,14 @@ ONearSearch::Matcher::match(uint32_t docId)
263279
// Look for match for every occurrence of the first term.
264280
for ( ; pos[0] != inputs()[0]->end(); ++pos[0]) {
265281
TermFieldMatchDataPositionKey firstTermPos = *pos[0];
266-
lastAllowed = firstTermPos;
267-
lastAllowed.setPosition(firstTermPos.getPosition() + remain);
282+
lastAllowed = calc_window_end_pos(*pos[0], remain, get_element_gap());
268283
if (lastAllowed < curTermPos) {
269284
// if we already know that we must seek onwards:
270285
continue;
271286
}
272287
prevTermPos = firstTermPos;
273-
LOG(spam, "Looking for match in window [%d, %d].",
274-
firstTermPos.getPosition(), lastAllowed.getPosition());
288+
LOG(spam, "Looking for match in window [%d:%d, %d:%d].",
289+
firstTermPos.getElementId(), firstTermPos.getPosition(), lastAllowed.getElementId(), lastAllowed.getPosition());
275290
for (uint32_t i = 1; i < numTerms; ++i) {
276291
LOG(spam, "Forwarding iterator for term %d beyond %d.", i, prevTermPos.getPosition());
277292
while (pos[i] != inputs()[i]->end() && !(prevTermPos < *pos[i])) {

searchlib/src/vespa/searchlib/queryeval/nearsearch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class NearSearchBase : public AndSearch
2929
std::optional<uint32_t> _element_gap;
3030
TermFieldMatchDataArray _inputs;
3131
protected:
32-
uint32_t window() const { return _window; }
32+
uint32_t window() const noexcept { return _window; }
33+
std::optional<uint32_t> get_element_gap() const noexcept { return _element_gap; }
3334
const TermFieldMatchDataArray &inputs() const { return _inputs; }
3435
public:
3536
MatcherBase(uint32_t win, std::optional<uint32_t> element_gap, uint32_t fieldId, const TermFieldMatchDataArray &in)

0 commit comments

Comments
 (0)