Skip to content

Commit 87bdec2

Browse files
committed
fix #4479: add fuzzy keyword search to title
1 parent e82f3ca commit 87bdec2

File tree

13 files changed

+202
-21
lines changed

13 files changed

+202
-21
lines changed

models/fixtures/access.yml

+6
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,9 @@
171171
user_id: 40
172172
repo_id: 61
173173
mode: 4
174+
175+
-
176+
id: 30
177+
user_id: 40
178+
repo_id: 62
179+
mode: 4

models/fixtures/issue.yml

+105-3
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@
346346
poster_id: 39
347347
original_author_id: 0
348348
name: repo60 pull1
349-
content: content for the 1st issue
349+
content: content for the 1st pr
350350
milestone_id: 0
351351
priority: 0
352352
is_closed: false
@@ -360,10 +360,10 @@
360360
id: 22
361361
repo_id: 61
362362
index: 1
363-
poster_id: 40
363+
poster_id: 39
364364
original_author_id: 0
365365
name: repo61 pull1
366-
content: content for the 1st issue
366+
content: content for the 1st pr
367367
milestone_id: 0
368368
priority: 0
369369
is_closed: false
@@ -372,3 +372,105 @@
372372
created_unix: 1707270422
373373
updated_unix: 1707270422
374374
is_locked: false
375+
376+
-
377+
id: 23
378+
repo_id: 62
379+
index: 1
380+
poster_id: 40
381+
original_author_id: 0
382+
name: Ökononmie der UI
383+
content: die ökonomie der UI muß dringend verbessert werden
384+
milestone_id: 0
385+
priority: 0
386+
is_closed: false
387+
is_pull: false
388+
num_comments: 0
389+
created_unix: 1707270422
390+
updated_unix: 1707270422
391+
is_locked: false
392+
393+
-
394+
id: 24
395+
repo_id: 62
396+
index: 2
397+
poster_id: 40
398+
original_author_id: 0
399+
name: 갃갃갃
400+
content: 啊啊啊
401+
milestone_id: 0
402+
priority: 0
403+
is_closed: false
404+
is_pull: false
405+
num_comments: 0
406+
created_unix: 1707270422
407+
updated_unix: 1707270422
408+
is_locked: false
409+
410+
-
411+
id: 25
412+
repo_id: 62
413+
index: 3
414+
poster_id: 40
415+
original_author_id: 0
416+
name: 啊啊啊
417+
content: 갃갃갃
418+
milestone_id: 0
419+
priority: 0
420+
is_closed: false
421+
is_pull: false
422+
num_comments: 0
423+
created_unix: 1707270422
424+
updated_unix: 1707270422
425+
is_locked: false
426+
427+
-
428+
id: 26
429+
repo_id: 62
430+
index: 4
431+
poster_id: 40
432+
original_author_id: 0
433+
name: repo62 pull1 - Ökononmie der UI
434+
content: die ökonomie der UI muß dringend verbessert werden
435+
milestone_id: 0
436+
priority: 0
437+
is_closed: false
438+
is_pull: true
439+
num_comments: 0
440+
created_unix: 1707270412
441+
updated_unix: 1707270412
442+
is_locked: false
443+
444+
-
445+
id: 27
446+
repo_id: 62
447+
index: 5
448+
poster_id: 40
449+
original_author_id: 0
450+
name: repo62 pull2 - 갃갃갃
451+
content: 啊啊啊
452+
milestone_id: 0
453+
priority: 0
454+
is_closed: false
455+
is_pull: true
456+
num_comments: 0
457+
created_unix: 1707270413
458+
updated_unix: 1707270413
459+
is_locked: false
460+
461+
-
462+
id: 28
463+
repo_id: 62
464+
index: 6
465+
poster_id: 40
466+
original_author_id: 0
467+
name: repo62 pull3 - 啊啊啊
468+
content: 갃갃갃
469+
milestone_id: 0
470+
priority: 0
471+
is_closed: false
472+
is_pull: true
473+
num_comments: 0
474+
created_unix: 1707270414
475+
updated_unix: 1707270414
476+
is_locked: false

models/fixtures/issue_index.yml

+4
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,7 @@
3333
-
3434
group_id: 51
3535
max_index: 1
36+
37+
-
38+
group_id: 62
39+
max_index: 6

models/fixtures/pull_request.yml

+27
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,30 @@
117117
index: 1
118118
head_repo_id: 61
119119
base_repo_id: 61
120+
121+
-
122+
id: 11
123+
type: 0 # gitea pull request
124+
status: 2 # mergeable
125+
issue_id: 26
126+
index: 4
127+
head_repo_id: 62
128+
base_repo_id: 62
129+
130+
-
131+
id: 12
132+
type: 0 # gitea pull request
133+
status: 2 # mergeable
134+
issue_id: 27
135+
index: 5
136+
head_repo_id: 62
137+
base_repo_id: 62
138+
139+
-
140+
id: 13
141+
type: 0 # gitea pull request
142+
status: 2 # mergeable
143+
issue_id: 28
144+
index: 6
145+
head_repo_id: 62
146+
base_repo_id: 62

models/fixtures/repository.yml

+31
Original file line numberDiff line numberDiff line change
@@ -1768,3 +1768,34 @@
17681768
size: 0
17691769
is_fsck_enabled: true
17701770
close_issues_via_commit_in_any_branch: false
1771+
1772+
-
1773+
id: 62
1774+
owner_id: 41
1775+
owner_name: org41
1776+
lower_name: repo62
1777+
name: repo62
1778+
default_branch: main
1779+
num_watches: 0
1780+
num_stars: 0
1781+
num_forks: 0
1782+
num_issues: 3
1783+
num_closed_issues: 0
1784+
num_pulls: 3
1785+
num_closed_pulls: 0
1786+
num_milestones: 0
1787+
num_closed_milestones: 0
1788+
num_projects: 0
1789+
num_closed_projects: 0
1790+
is_private: false
1791+
is_empty: false
1792+
is_archived: false
1793+
is_mirror: false
1794+
status: 0
1795+
is_fork: false
1796+
fork_id: 0
1797+
is_template: false
1798+
template_id: 0
1799+
size: 0
1800+
is_fsck_enabled: true
1801+
close_issues_via_commit_in_any_branch: false

models/fixtures/user.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1510,7 +1510,7 @@
15101510
num_followers: 0
15111511
num_following: 0
15121512
num_stars: 0
1513-
num_repos: 1
1513+
num_repos: 2
15141514
num_teams: 2
15151515
num_members: 3
15161516
visibility: 0

models/issues/issue_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ func TestCountIssues(t *testing.T) {
379379
assert.NoError(t, unittest.PrepareTestDatabase())
380380
count, err := issues_model.CountIssues(db.DefaultContext, &issues_model.IssuesOptions{})
381381
assert.NoError(t, err)
382-
assert.EqualValues(t, 22, count)
382+
assert.EqualValues(t, 28, count)
383383
}
384384

385385
func TestIssueLoadAttributes(t *testing.T) {

models/repo/repo_list_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,12 @@ func getTestCases() []struct {
138138
{
139139
name: "AllPublic/PublicRepositoriesOfUserIncludingCollaborative",
140140
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, AllPublic: true, Template: optional.Some(false)},
141-
count: 33,
141+
count: 34,
142142
},
143143
{
144144
name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborative",
145145
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, Private: true, AllPublic: true, AllLimited: true, Template: optional.Some(false)},
146-
count: 38,
146+
count: 39,
147147
},
148148
{
149149
name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborativeByName",
@@ -158,7 +158,7 @@ func getTestCases() []struct {
158158
{
159159
name: "AllPublic/PublicRepositoriesOfOrganization",
160160
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 17, AllPublic: true, Collaborate: optional.Some(false), Template: optional.Some(false)},
161-
count: 33,
161+
count: 34,
162162
},
163163
{
164164
name: "AllTemplates",

modules/indexer/internal/bleve/query.go

+8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query
2828
return q
2929
}
3030

31+
// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness
32+
func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery {
33+
q := bleve.NewFuzzyQuery(matchPhrase)
34+
q.FieldVal = field
35+
q.Fuzziness = fuzziness
36+
return q
37+
}
38+
3139
// BoolFieldQuery generates a bool field query for the given value and field
3240
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
3341
q := bleve.NewBoolFieldQuery(value)

modules/indexer/internal/bleve/util.go

+6-4
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
5050

5151
func GuessFuzzinessByKeyword(s string) int {
5252
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
53-
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
54-
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
53+
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot
54+
// which we need to live with, as we need to support not just ASCII
55+
// in case of code points >= 128 we will increase the fuzziness to 2
56+
// the standard is 1
5557
for _, r := range s {
5658
if r >= 128 {
57-
return 0
59+
return 2
5860
}
5961
}
60-
return min(2, len(s)/4)
62+
return 1
6163
}

modules/indexer/issues/bleve/bleve.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
162162
}
163163

164164
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
165-
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
165+
inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness),
166166
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
167167
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
168168
}...))

modules/indexer/issues/indexer_test.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@ func searchIssueIsPull(t *testing.T) {
209209
SearchOptions{
210210
IsPull: optional.Some(false),
211211
},
212-
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
212+
[]int64{25, 24, 23, 17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
213213
},
214214
{
215215
SearchOptions{
216216
IsPull: optional.Some(true),
217217
},
218-
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
218+
[]int64{22, 21, 28, 27, 26, 12, 11, 20, 19, 9, 8, 3, 2},
219219
},
220220
}
221221
for _, test := range tests {
@@ -236,7 +236,7 @@ func searchIssueIsClosed(t *testing.T) {
236236
SearchOptions{
237237
IsClosed: optional.Some(false),
238238
},
239-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
239+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
240240
},
241241
{
242242
SearchOptions{
@@ -302,7 +302,7 @@ func searchIssueByLabelID(t *testing.T) {
302302
SearchOptions{
303303
ExcludedLabelIDs: []int64{1},
304304
},
305-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
305+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
306306
},
307307
}
308308
for _, test := range tests {
@@ -323,7 +323,7 @@ func searchIssueByTime(t *testing.T) {
323323
SearchOptions{
324324
UpdatedAfterUnix: optional.Some(int64(0)),
325325
},
326-
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
326+
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
327327
},
328328
}
329329
for _, test := range tests {
@@ -344,7 +344,7 @@ func searchIssueWithOrder(t *testing.T) {
344344
SearchOptions{
345345
SortBy: internal.SortByCreatedAsc,
346346
},
347-
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
347+
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 26, 27, 28, 21, 22, 23, 24, 25},
348348
},
349349
}
350350
for _, test := range tests {
@@ -401,8 +401,8 @@ func searchIssueWithPaginator(t *testing.T) {
401401
PageSize: 5,
402402
},
403403
},
404-
[]int64{22, 21, 17, 16, 15},
405-
22,
404+
[]int64{25, 24, 23, 22, 21},
405+
28,
406406
},
407407
}
408408
for _, test := range tests {

routers/web/repo/issue.go

+1
Original file line numberDiff line numberDiff line change
@@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) {
26772677
MilestoneIDs: includedMilestones,
26782678
ProjectID: projectID,
26792679
SortBy: issue_indexer.SortByCreatedDesc,
2680+
IsFuzzyKeyword: true,
26802681
}
26812682

26822683
if since != 0 {

0 commit comments

Comments
 (0)