Skip to content

Commit

Permalink
MB-65284: Add synonym query tracking and update stats map (#2141)
Browse files Browse the repository at this point in the history
- Add a Scorch counter stat `TotSynonymSearches` to track the number of
synonym-enabled queries received by the index. This stat will be
incremented by 1 each time the FieldTermSynonymMap is set, indicating
that the query will use synonyms.

---------

Co-authored-by: Abhinav Dangeti <[email protected]>
  • Loading branch information
CascadingRadium and abhinavdangeti authored Feb 21, 2025
1 parent 0637e36 commit 0ca3253
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 11 deletions.
1 change: 1 addition & 0 deletions index/scorch/scorch.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["term_searchers_started"] = m["TotTermSearchersStarted"]
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
m["knn_searches"] = m["TotKNNSearches"]
m["synonym_searches"] = m["TotSynonymSearches"]

m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"]
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
Expand Down
2 changes: 1 addition & 1 deletion index/scorch/segment_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import (
"fmt"

"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/bleve/v2/geo"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"

zapv11 "github.com/blevesearch/zapx/v11"
Expand Down
4 changes: 4 additions & 0 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -1134,3 +1134,7 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string,
return is.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}

func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) {
atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta)
}
3 changes: 2 additions & 1 deletion index/scorch/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ type Stats struct {
TotTermSearchersStarted uint64
TotTermSearchersFinished uint64

TotKNNSearches uint64
TotKNNSearches uint64
TotSynonymSearches uint64

TotEventTriggerStarted uint64
TotEventTriggerCompleted uint64
Expand Down
3 changes: 3 additions & 0 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
setKnnHitsInCollector(knnHits, req, coll)

if fts != nil {
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
is.UpdateSynonymSearchCount(1)
}
ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts)
}

Expand Down
16 changes: 8 additions & 8 deletions mapping/mapping_vectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,18 +310,18 @@ func TestProcessVector(t *testing.T) {

func TestNormalizeVector(t *testing.T) {
vectors := [][]float32{
[]float32{1,2,3,4,5},
[]float32{1,0,0,0,0},
[]float32{0.182574183,0.365148365,0.547722578,0.730296731},
[]float32{1,1,1,1,1,1,1,1},
[]float32{1, 2, 3, 4, 5},
[]float32{1, 0, 0, 0, 0},
[]float32{0.182574183, 0.365148365, 0.547722578, 0.730296731},
[]float32{1, 1, 1, 1, 1, 1, 1, 1},
[]float32{0},
}

expectedNormalizedVectors := [][]float32{
[]float32{0.13483998,0.26967996,0.40451995,0.5393599,0.67419994},
[]float32{1,0,0,0,0},
[]float32{0.18257418,0.36514837,0.5477226,0.73029673},
[]float32{0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338},
[]float32{0.13483998, 0.26967996, 0.40451995, 0.5393599, 0.67419994},
[]float32{1, 0, 0, 0, 0},
[]float32{0.18257418, 0.36514837, 0.5477226, 0.73029673},
[]float32{0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338},
[]float32{0},
}

Expand Down
41 changes: 40 additions & 1 deletion search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,6 @@ func TestMatchQueryPartialMatch(t *testing.T) {
t.Errorf("Expected 1 result, but got: %v", res.Total)
}
hit := res.Hits[0]
fmt.Println(hit.Expl, hit.ID)
if hit.ID != "doc1" || hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a full match")
}
Expand Down Expand Up @@ -4319,6 +4318,15 @@ func TestSynonymSearchQueries(t *testing.T) {
},
}

getTotalSynonymSearchStat := func(idx Index) int {
ir, err := idx.Advanced()
if err != nil {
t.Fatal(err)
}
stat := ir.StatsMap()["synonym_searches"].(uint64)
return int(stat)
}

runTestQueries := func(idx Index) error {
for _, dtq := range testQueries {
q, err := query.ParseQuery([]byte(dtq.query))
Expand Down Expand Up @@ -4352,6 +4360,12 @@ func TestSynonymSearchQueries(t *testing.T) {
if err != nil {
t.Fatal(err)
}
// now verify that the stat for number of synonym enabled queries is correct
totalSynonymSearchStat := getTotalSynonymSearchStat(idx)
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}

// test with index alias - with 1 batch per index
numIndexes := len(batches)
indexes := make([]Index, numIndexes)
Expand Down Expand Up @@ -4384,6 +4398,20 @@ func TestSynonymSearchQueries(t *testing.T) {
if err != nil {
t.Fatal(err)
}
// verify the synonym search stat for the alias
totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0])
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
for i := 1; i < numIndexes; i++ {
idxStat := getTotalSynonymSearchStat(indexes[i])
if idxStat != totalSynonymSearchStat {
t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat)
}
}
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
// test with multi-level alias now with two index per alias
// and having any extra index being in the final alias
numAliases := numIndexes / 2
Expand All @@ -4405,6 +4433,17 @@ func TestSynonymSearchQueries(t *testing.T) {
if err != nil {
t.Fatal(err)
}
// verify the synonym searches stat for the alias
totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0])
if totalSynonymSearchStat != 2*len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
for i := 1; i < numIndexes; i++ {
idxStat := getTotalSynonymSearchStat(indexes[i])
if idxStat != totalSynonymSearchStat {
t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat)
}
}
}

func TestGeoDistanceInSort(t *testing.T) {
Expand Down

0 comments on commit 0ca3253

Please sign in to comment.