Skip to content

Commit 783ee0e

Browse files
committed
feat(search): support code search by zoekt
Signed-off-by: ZheNing Hu <[email protected]>
1 parent c27d87a commit 783ee0e

File tree

8 files changed

+560
-1
lines changed

8 files changed

+560
-1
lines changed

go.mod

+6
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ require (
105105
github.com/sassoftware/go-rpmutils v0.4.0
106106
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3
107107
github.com/shurcooL/vfsgen v0.0.0-20230704071429-0000e147ea92
108+
github.com/sourcegraph/zoekt v0.0.0-20240410142517-ab1b8f09199e
108109
github.com/stretchr/testify v1.10.0
109110
github.com/syndtr/goleveldb v1.0.0
110111
github.com/tstranex/u2f v1.0.0
@@ -175,6 +176,7 @@ require (
175176
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
176177
github.com/blevesearch/zapx/v15 v15.3.13 // indirect
177178
github.com/blevesearch/zapx/v16 v16.1.5 // indirect
179+
github.com/bmatcuk/doublestar v1.3.4 // indirect
178180
github.com/bmatcuk/doublestar/v4 v4.8.1 // indirect
179181
github.com/boombuler/barcode v1.0.2 // indirect
180182
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 // indirect
@@ -231,13 +233,15 @@ require (
231233
github.com/gorilla/handlers v1.5.2 // indirect
232234
github.com/gorilla/mux v1.8.1 // indirect
233235
github.com/gorilla/securecookie v1.1.2 // indirect
236+
github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da // indirect
234237
github.com/hashicorp/errwrap v1.1.0 // indirect
235238
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
236239
github.com/hashicorp/go-multierror v1.1.1 // indirect
237240
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
238241
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
239242
github.com/jessevdk/go-flags v1.6.1 // indirect
240243
github.com/josharian/intern v1.0.0 // indirect
244+
github.com/keegancsmith/rpc v1.3.0 // indirect
241245
github.com/kevinburke/ssh_config v1.2.0 // indirect
242246
github.com/klauspost/pgzip v1.2.6 // indirect
243247
github.com/kr/pretty v0.3.1 // indirect
@@ -264,6 +268,7 @@ require (
264268
github.com/oklog/ulid v1.3.1 // indirect
265269
github.com/olekukonko/tablewriter v0.0.5 // indirect
266270
github.com/onsi/ginkgo v1.16.5 // indirect
271+
github.com/opentracing/opentracing-go v1.2.0 // indirect
267272
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
268273
github.com/pierrec/lz4/v4 v4.1.22 // indirect
269274
github.com/pjbgf/sha1cd v0.3.2 // indirect
@@ -282,6 +287,7 @@ require (
282287
github.com/sirupsen/logrus v1.9.3 // indirect
283288
github.com/skeema/knownhosts v1.3.1 // indirect
284289
github.com/sourcegraph/conc v0.3.0 // indirect
290+
github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da // indirect
285291
github.com/spf13/afero v1.14.0 // indirect
286292
github.com/spf13/cast v1.7.1 // indirect
287293
github.com/spf13/pflag v1.0.6 // indirect

go.sum

+62
Large diffs are not rendered by default.

modules/indexer/code/git.go

+4
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
3737
needGenesis = len(stdout) == 0
3838
}
3939

40+
if setting.Indexer.RepoType == "zoekt" {
41+
// TODO: check if zoekt index file meta status is not sync with db index status, if not, get genesis changes
42+
}
43+
4044
if needGenesis {
4145
return genesisChanges(ctx, repo, revision)
4246
}

modules/indexer/code/indexer.go

+20-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package code
55

66
import (
7+
"code.gitea.io/gitea/modules/indexer/code/zoekt"
78
"context"
89
"os"
910
"runtime/pprof"
@@ -116,7 +117,7 @@ func Init() {
116117

117118
// Create the Queue
118119
switch setting.Indexer.RepoType {
119-
case "bleve", "elasticsearch":
120+
case "bleve", "elasticsearch", "zoekt":
120121
handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) {
121122
indexer := *globalIndexer.Load()
122123
for _, indexerData := range items {
@@ -183,6 +184,24 @@ func Init() {
183184
close(waitChannel)
184185
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
185186
}
187+
case "zoekt":
188+
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
189+
defer func() {
190+
if err := recover(); err != nil {
191+
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
192+
log.Error("The indexer files are likely corrupted and may need to be deleted")
193+
log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
194+
}
195+
}()
196+
197+
rIndexer = zoekt.NewIndexer(setting.Indexer.RepoPath)
198+
existed, err = rIndexer.Init(ctx)
199+
if err != nil {
200+
cancel()
201+
(*globalIndexer.Load()).Close()
202+
close(waitChannel)
203+
log.Fatal("PID: %d Unable to initialize the zoekt Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
204+
}
186205

187206
default:
188207
log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)

modules/indexer/code/zoekt/utils.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package zoekt
2+
3+
import "unicode/utf8"
4+
5+
// Bitmap used by func special to check whether a character needs to be escaped.
6+
var specialBytes [16]byte
7+
8+
// special reports whether byte b needs to be escaped by QuoteMeta.
9+
func special(b byte) bool {
10+
return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
11+
}
12+
13+
func init() {
14+
for _, b := range []byte(`-:\.+*?()|[]{}^$`) {
15+
specialBytes[b%16] |= 1 << (b / 16)
16+
}
17+
}
18+
19+
func QuoteMeta(s string) string {
20+
// A byte loop is correct because all metacharacters are ASCII.
21+
var i int
22+
for i = 0; i < len(s); i++ {
23+
if special(s[i]) {
24+
break
25+
}
26+
}
27+
// No meta characters found, so return original string.
28+
if i >= len(s) {
29+
return s
30+
}
31+
32+
b := make([]byte, 3*len(s)-2*i)
33+
copy(b, s[:i])
34+
j := i
35+
for ; i < len(s); i++ {
36+
if special(s[i]) {
37+
b[j] = '\\'
38+
j++
39+
b[j] = '\\'
40+
j++
41+
}
42+
b[j] = s[i]
43+
j++
44+
}
45+
return string(b[:j])
46+
}

0 commit comments

Comments
 (0)