From ccdcb066290c5f69f8ac90389477d4c44a88509d Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Thu, 20 Feb 2025 15:07:04 +0530 Subject: [PATCH 1/5] Add synonym query tracking and update stats map --- index/scorch/scorch.go | 1 + index/scorch/snapshot_index.go | 4 ++++ index/scorch/stats.go | 3 ++- index_impl.go | 3 +++ search_test.go | 31 +++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 1 deletion(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index f8d532e03..225efaf1d 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -635,6 +635,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["term_searchers_started"] = m["TotTermSearchersStarted"] m["term_searchers_finished"] = m["TotTermSearchersFinished"] m["knn_searches"] = m["TotKNNSearches"] + m["synonym_queries"] = m["TotSynonymQueries"] m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index ddca76ec3..3c71bc42f 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -1134,3 +1134,7 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string, return is.AutomatonIterator(a, prefixBeg, prefixEnd) }) } + +func (is *IndexSnapshot) UpdateSynonymQueryCount(delta uint64) { + atomic.AddUint64(&is.parent.stats.TotSynonymQueries, delta) +} diff --git a/index/scorch/stats.go b/index/scorch/stats.go index 269ae2f63..d941f33c6 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -51,7 +51,8 @@ type Stats struct { TotTermSearchersStarted uint64 TotTermSearchersFinished uint64 - TotKNNSearches uint64 + TotKNNSearches uint64 + TotSynonymQueries uint64 TotEventTriggerStarted uint64 TotEventTriggerCompleted uint64 diff --git a/index_impl.go b/index_impl.go index d59dfb9a1..05a05f783 100644 --- a/index_impl.go +++ b/index_impl.go @@ -636,6 +636,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr setKnnHitsInCollector(knnHits, req, coll) if fts != nil { + if is, ok := indexReader.(*scorch.IndexSnapshot); ok { + is.UpdateSynonymQueryCount(1) + } ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts) } diff --git a/search_test.go b/search_test.go index e00d5ad06..327928359 100644 --- a/search_test.go +++ b/search_test.go @@ -4319,6 +4319,15 @@ func TestSynonymSearchQueries(t *testing.T) { }, } + getTotalSynonymQueryStat := func(idx Index) int { + ir, err := idx.Advanced() + if err != nil { + t.Fatal(err) + } + stat := ir.StatsMap()["synonym_queries"].(int) + return stat + } + runTestQueries := func(idx Index) error { for _, dtq := range testQueries { q, err := query.ParseQuery([]byte(dtq.query)) @@ -4352,6 +4361,12 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } + // now verify that the stat for number of synonym enabled queries is correct + totalSynonymQueriesStat := getTotalSynonymQueryStat(idx) + if totalSynonymQueriesStat != len(testQueries) { + t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + } + // test with index alias - with 1 batch per index numIndexes := len(batches) indexes := make([]Index, numIndexes) @@ -4384,6 +4399,14 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } + // verify the synonym queries stat for the alias + totalSynonymQueriesStat = 0 + for i := 0; i < numIndexes; i++ { + totalSynonymQueriesStat += getTotalSynonymQueryStat(indexes[i]) + } + if totalSynonymQueriesStat != len(testQueries) { + t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + } // test with multi-level alias now with two index per alias // and having any extra index being in the final alias numAliases := numIndexes / 2 @@ -4405,6 +4428,14 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } + // verify the synonym queries stat for the alias + totalSynonymQueriesStat = 0 + for i := 0; i < numIndexes; i++ { + totalSynonymQueriesStat += getTotalSynonymQueryStat(indexes[i]) + } + if totalSynonymQueriesStat != len(testQueries) { + t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + } } func TestGeoDistanceInSort(t *testing.T) { From 4eb9fc0c8a95fc98cdb5f03d420d1077ec604673 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Thu, 20 Feb 2025 15:55:59 +0530 Subject: [PATCH 2/5] Fix test --- search_test.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/search_test.go b/search_test.go index 327928359..d4201a29b 100644 --- a/search_test.go +++ b/search_test.go @@ -1387,7 +1387,6 @@ func TestMatchQueryPartialMatch(t *testing.T) { t.Errorf("Expected 1 result, but got: %v", res.Total) } hit := res.Hits[0] - fmt.Println(hit.Expl, hit.ID) if hit.ID != "doc1" || hit.Expl.PartialMatch { t.Errorf("Expected doc1 to be a full match") } @@ -4324,8 +4323,8 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } - stat := ir.StatsMap()["synonym_queries"].(int) - return stat + stat := ir.StatsMap()["synonym_queries"].(uint64) + return int(stat) } runTestQueries := func(idx Index) error { @@ -4400,9 +4399,15 @@ func TestSynonymSearchQueries(t *testing.T) { t.Fatal(err) } // verify the synonym queries stat for the alias - totalSynonymQueriesStat = 0 - for i := 0; i < numIndexes; i++ { - totalSynonymQueriesStat += getTotalSynonymQueryStat(indexes[i]) + totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) + if totalSynonymQueriesStat != len(testQueries) { + t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + } + for i := 1; i < numIndexes; i++ { + idxStat := getTotalSynonymQueryStat(indexes[i]) + if idxStat != totalSynonymQueriesStat { + t.Fatalf("expected %d synonym queries, got %d", totalSynonymQueriesStat, idxStat) + } } if totalSynonymQueriesStat != len(testQueries) { t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) @@ -4429,13 +4434,17 @@ func TestSynonymSearchQueries(t *testing.T) { t.Fatal(err) } // verify the synonym queries stat for the alias - totalSynonymQueriesStat = 0 - for i := 0; i < numIndexes; i++ { - totalSynonymQueriesStat += getTotalSynonymQueryStat(indexes[i]) - } - if totalSynonymQueriesStat != len(testQueries) { + totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) + if totalSynonymQueriesStat != 2*len(testQueries) { t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) } + totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) + for i := 1; i < numIndexes; i++ { + idxStat := getTotalSynonymQueryStat(indexes[i]) + if idxStat != totalSynonymQueriesStat { + t.Fatalf("expected %d synonym queries, got %d", totalSynonymQueriesStat, idxStat) + } + } } func TestGeoDistanceInSort(t *testing.T) { From 8fe5e34bd13067df16b6394889018c9350d46f16 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Fri, 21 Feb 2025 03:42:07 +0530 Subject: [PATCH 3/5] Rename synonym query metrics to synonym search for consistency --- index/scorch/scorch.go | 2 +- index/scorch/snapshot_index.go | 4 ++-- index/scorch/stats.go | 4 ++-- index_impl.go | 2 +- search_test.go | 44 +++++++++++++++++----------------- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 225efaf1d..2b5408514 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -635,7 +635,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["term_searchers_started"] = m["TotTermSearchersStarted"] m["term_searchers_finished"] = m["TotTermSearchersFinished"] m["knn_searches"] = m["TotKNNSearches"] - m["synonym_queries"] = m["TotSynonymQueries"] + m["synonym_searches"] = m["TotSynonymSearches"] m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 3c71bc42f..dc96cac4b 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -1135,6 +1135,6 @@ func (is *IndexSnapshot) ThesaurusKeysRegexp(name string, }) } -func (is *IndexSnapshot) UpdateSynonymQueryCount(delta uint64) { - atomic.AddUint64(&is.parent.stats.TotSynonymQueries, delta) +func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) { + atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta) } diff --git a/index/scorch/stats.go b/index/scorch/stats.go index d941f33c6..2f2b8e6a3 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -51,8 +51,8 @@ type Stats struct { TotTermSearchersStarted uint64 TotTermSearchersFinished uint64 - TotKNNSearches uint64 - TotSynonymQueries uint64 + TotKNNSearches uint64 + TotSynonymSearches uint64 TotEventTriggerStarted uint64 TotEventTriggerCompleted uint64 diff --git a/index_impl.go b/index_impl.go index 05a05f783..c9e1400c3 100644 --- a/index_impl.go +++ b/index_impl.go @@ -637,7 +637,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr if fts != nil { if is, ok := indexReader.(*scorch.IndexSnapshot); ok { - is.UpdateSynonymQueryCount(1) + is.UpdateSynonymSearchCount(1) } ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts) } diff --git a/search_test.go b/search_test.go index d4201a29b..3d940121f 100644 --- a/search_test.go +++ b/search_test.go @@ -4318,12 +4318,12 @@ func TestSynonymSearchQueries(t *testing.T) { }, } - getTotalSynonymQueryStat := func(idx Index) int { + getTotalSynonymSearchStat := func(idx Index) int { ir, err := idx.Advanced() if err != nil { t.Fatal(err) } - stat := ir.StatsMap()["synonym_queries"].(uint64) + stat := ir.StatsMap()["synonym_searches"].(uint64) return int(stat) } @@ -4361,9 +4361,9 @@ func TestSynonymSearchQueries(t *testing.T) { t.Fatal(err) } // now verify that the stat for number of synonym enabled queries is correct - totalSynonymQueriesStat := getTotalSynonymQueryStat(idx) - if totalSynonymQueriesStat != len(testQueries) { - t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + totalSynonymSearchStat := getTotalSynonymSearchStat(idx) + if totalSynonymSearchStat != len(testQueries) { + t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat) } // test with index alias - with 1 batch per index @@ -4398,19 +4398,19 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } - // verify the synonym queries stat for the alias - totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) - if totalSynonymQueriesStat != len(testQueries) { - t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + // verify the synonym search stat for the alias + totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0]) + if totalSynonymSearchStat != len(testQueries) { + t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat) } for i := 1; i < numIndexes; i++ { - idxStat := getTotalSynonymQueryStat(indexes[i]) - if idxStat != totalSynonymQueriesStat { - t.Fatalf("expected %d synonym queries, got %d", totalSynonymQueriesStat, idxStat) + idxStat := getTotalSynonymSearchStat(indexes[i]) + if idxStat != totalSynonymSearchStat { + t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat) } } - if totalSynonymQueriesStat != len(testQueries) { - t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + if totalSynonymSearchStat != len(testQueries) { + t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat) } // test with multi-level alias now with two index per alias // and having any extra index being in the final alias @@ -4433,16 +4433,16 @@ func TestSynonymSearchQueries(t *testing.T) { if err != nil { t.Fatal(err) } - // verify the synonym queries stat for the alias - totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) - if totalSynonymQueriesStat != 2*len(testQueries) { - t.Fatalf("expected %d synonym queries, got %d", len(testQueries), totalSynonymQueriesStat) + // verify the synonym searches stat for the alias + totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0]) + if totalSynonymSearchStat != 2*len(testQueries) { + t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat) } - totalSynonymQueriesStat = getTotalSynonymQueryStat(indexes[0]) + totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0]) for i := 1; i < numIndexes; i++ { - idxStat := getTotalSynonymQueryStat(indexes[i]) - if idxStat != totalSynonymQueriesStat { - t.Fatalf("expected %d synonym queries, got %d", totalSynonymQueriesStat, idxStat) + idxStat := getTotalSynonymSearchStat(indexes[i]) + if idxStat != totalSynonymSearchStat { + t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat) } } } From 95dbd9498d25c135982264b1cc710ff273d19ac9 Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Fri, 21 Feb 2025 03:44:52 +0530 Subject: [PATCH 4/5] minor test fix --- search_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/search_test.go b/search_test.go index 3d940121f..e21d19983 100644 --- a/search_test.go +++ b/search_test.go @@ -4438,7 +4438,6 @@ func TestSynonymSearchQueries(t *testing.T) { if totalSynonymSearchStat != 2*len(testQueries) { t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat) } - totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0]) for i := 1; i < numIndexes; i++ { idxStat := getTotalSynonymSearchStat(indexes[i]) if idxStat != totalSynonymSearchStat { From ae887dfefcf0f359572dc034c70bed8f7c715c0e Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Thu, 20 Feb 2025 17:27:56 -0700 Subject: [PATCH 5/5] go fmt ./... --- index/scorch/segment_plugin.go | 2 +- index/scorch/stats.go | 2 +- mapping/mapping_vectors_test.go | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/index/scorch/segment_plugin.go b/index/scorch/segment_plugin.go index fe639d097..790a8008a 100644 --- a/index/scorch/segment_plugin.go +++ b/index/scorch/segment_plugin.go @@ -18,8 +18,8 @@ import ( "fmt" "github.com/RoaringBitmap/roaring/v2" - index "github.com/blevesearch/bleve_index_api" "github.com/blevesearch/bleve/v2/geo" + index "github.com/blevesearch/bleve_index_api" segment "github.com/blevesearch/scorch_segment_api/v2" zapv11 "github.com/blevesearch/zapx/v11" diff --git a/index/scorch/stats.go b/index/scorch/stats.go index 2f2b8e6a3..397b98948 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -51,7 +51,7 @@ type Stats struct { TotTermSearchersStarted uint64 TotTermSearchersFinished uint64 - TotKNNSearches uint64 + TotKNNSearches uint64 TotSynonymSearches uint64 TotEventTriggerStarted uint64 diff --git a/mapping/mapping_vectors_test.go b/mapping/mapping_vectors_test.go index 6d409760f..6b6f92e8c 100644 --- a/mapping/mapping_vectors_test.go +++ b/mapping/mapping_vectors_test.go @@ -310,18 +310,18 @@ func TestProcessVector(t *testing.T) { func TestNormalizeVector(t *testing.T) { vectors := [][]float32{ - []float32{1,2,3,4,5}, - []float32{1,0,0,0,0}, - []float32{0.182574183,0.365148365,0.547722578,0.730296731}, - []float32{1,1,1,1,1,1,1,1}, + []float32{1, 2, 3, 4, 5}, + []float32{1, 0, 0, 0, 0}, + []float32{0.182574183, 0.365148365, 0.547722578, 0.730296731}, + []float32{1, 1, 1, 1, 1, 1, 1, 1}, []float32{0}, } expectedNormalizedVectors := [][]float32{ - []float32{0.13483998,0.26967996,0.40451995,0.5393599,0.67419994}, - []float32{1,0,0,0,0}, - []float32{0.18257418,0.36514837,0.5477226,0.73029673}, - []float32{0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338,0.35355338}, + []float32{0.13483998, 0.26967996, 0.40451995, 0.5393599, 0.67419994}, + []float32{1, 0, 0, 0, 0}, + []float32{0.18257418, 0.36514837, 0.5477226, 0.73029673}, + []float32{0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338, 0.35355338}, []float32{0}, }