Skip to content

Commit aa8cc1f

Browse files
committed
fix #4479: add fuzzy keyword search to title
1 parent e82f3ca commit aa8cc1f

File tree

5 files changed

+24
-13
lines changed

5 files changed

+24
-13
lines changed

modules/indexer/internal/bleve/query.go

+8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query
2828
return q
2929
}
3030

31+
// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness
32+
func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery {
33+
q := bleve.NewFuzzyQuery(matchPhrase)
34+
q.FieldVal = field
35+
q.Fuzziness = fuzziness
36+
return q
37+
}
38+
3139
// BoolFieldQuery generates a bool field query for the given value and field
3240
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
3341
q := bleve.NewBoolFieldQuery(value)

modules/indexer/internal/bleve/util.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
5050

5151
func GuessFuzzinessByKeyword(s string) int {
5252
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
53-
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
54-
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
53+
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot
54+
// which we need to live with, as we need to support not just ASCII
55+
// in case of code points >= 128 we will increase the fuzziness to 2
56+
// the standard is 1
5557
for _, r := range s {
5658
if r >= 128 {
57-
return 0
59+
return 2
5860
}
5961
}
60-
return min(2, len(s)/4)
62+
return 1
6163
}

modules/indexer/issues/bleve/bleve.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
162162
}
163163

164164
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
165-
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
165+
inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness),
166166
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
167167
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
168168
}...))

modules/indexer/issues/indexer_test.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@ func searchIssueIsPull(t *testing.T) {
209209
SearchOptions{
210210
IsPull: optional.Some(false),
211211
},
212-
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
212+
[]int64{25, 24, 23, 17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
213213
},
214214
{
215215
SearchOptions{
216216
IsPull: optional.Some(true),
217217
},
218-
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
218+
[]int64{22, 21, 28, 27, 26, 12, 11, 20, 19, 9, 8, 3, 2},
219219
},
220220
}
221221
for _, test := range tests {
@@ -236,7 +236,7 @@ func searchIssueIsClosed(t *testing.T) {
236236
SearchOptions{
237237
IsClosed: optional.Some(false),
238238
},
239-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
239+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
240240
},
241241
{
242242
SearchOptions{
@@ -302,7 +302,7 @@ func searchIssueByLabelID(t *testing.T) {
302302
SearchOptions{
303303
ExcludedLabelIDs: []int64{1},
304304
},
305-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
305+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
306306
},
307307
}
308308
for _, test := range tests {
@@ -323,7 +323,7 @@ func searchIssueByTime(t *testing.T) {
323323
SearchOptions{
324324
UpdatedAfterUnix: optional.Some(int64(0)),
325325
},
326-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
326+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
327327
},
328328
}
329329
for _, test := range tests {
@@ -344,7 +344,7 @@ func searchIssueWithOrder(t *testing.T) {
344344
SearchOptions{
345345
SortBy: internal.SortByCreatedAsc,
346346
},
347-
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
347+
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 26, 27, 28, 21, 22, 23, 24, 25},
348348
},
349349
}
350350
for _, test := range tests {
@@ -401,8 +401,8 @@ func searchIssueWithPaginator(t *testing.T) {
401401
PageSize: 5,
402402
},
403403
},
404-
[]int64{22, 21, 17, 16, 15},
405-
22,
404+
[]int64{25, 24, 23, 22, 21},
405+
28,
406406
},
407407
}
408408
for _, test := range tests {

routers/web/repo/issue.go

+1
Original file line numberDiff line numberDiff line change
@@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) {
26772677
MilestoneIDs: includedMilestones,
26782678
ProjectID: projectID,
26792679
SortBy: issue_indexer.SortByCreatedDesc,
2680+
IsFuzzyKeyword: true,
26802681
}
26812682

26822683
if since != 0 {

0 commit comments

Comments
 (0)