Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: feat(search): support code search by zoekt #33850

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions assets/go-licenses.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ require (
github.com/sassoftware/go-rpmutils v0.4.0
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3
github.com/shurcooL/vfsgen v0.0.0-20230704071429-0000e147ea92
github.com/sourcegraph/zoekt v0.0.0-20240410142517-ab1b8f09199e
github.com/stretchr/testify v1.10.0
github.com/syndtr/goleveldb v1.0.0
github.com/tstranex/u2f v1.0.0
Expand Down Expand Up @@ -175,6 +176,7 @@ require (
github.com/blevesearch/zapx/v14 v14.3.10 // indirect
github.com/blevesearch/zapx/v15 v15.3.13 // indirect
github.com/blevesearch/zapx/v16 v16.1.5 // indirect
github.com/bmatcuk/doublestar v1.3.4 // indirect
github.com/bmatcuk/doublestar/v4 v4.8.1 // indirect
github.com/boombuler/barcode v1.0.2 // indirect
github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 // indirect
Expand Down Expand Up @@ -231,13 +233,15 @@ require (
github.com/gorilla/handlers v1.5.2 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect
github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jessevdk/go-flags v1.6.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/keegancsmith/rpc v1.3.0 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/pgzip v1.2.6 // indirect
github.com/kr/pretty v0.3.1 // indirect
Expand All @@ -264,6 +268,7 @@ require (
github.com/oklog/ulid v1.3.1 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/onsi/ginkgo v1.16.5 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/pierrec/lz4/v4 v4.1.22 // indirect
github.com/pjbgf/sha1cd v0.3.2 // indirect
Expand All @@ -282,6 +287,7 @@ require (
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/sourcegraph/go-ctags v0.0.0-20240424152308-4faeee4849da // indirect
github.com/spf13/afero v1.14.0 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
Expand Down
62 changes: 62 additions & 0 deletions go.sum

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions modules/indexer/code/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s
needGenesis = len(stdout) == 0
}

// TODO: check if zoekt index file meta status is not sync with db index status, if not, get genesis changes
//if setting.Indexer.RepoType == "zoekt" {
//}

if needGenesis {
return genesisChanges(ctx, repo, revision)
}
Expand Down
21 changes: 20 additions & 1 deletion modules/indexer/code/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
"code.gitea.io/gitea/modules/indexer/code/zoekt"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
Expand Down Expand Up @@ -116,7 +117,7 @@ func Init() {

// Create the Queue
switch setting.Indexer.RepoType {
case "bleve", "elasticsearch":
case "bleve", "elasticsearch", "zoekt":
handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) {
indexer := *globalIndexer.Load()
for _, indexerData := range items {
Expand Down Expand Up @@ -183,6 +184,24 @@ func Init() {
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err)
}
case "zoekt":
log.Info("PID: %d Initializing Repository Indexer at: %s", os.Getpid(), setting.Indexer.RepoPath)
defer func() {
if err := recover(); err != nil {
log.Error("PANIC whilst initializing repository indexer: %v\nStacktrace: %s", err, log.Stack(2))
log.Error("The indexer files are likely corrupted and may need to be deleted")
log.Error("You can completely remove the \"%s\" directory to make Gitea recreate the indexes", setting.Indexer.RepoPath)
}
}()

rIndexer = zoekt.NewIndexer(setting.Indexer.RepoPath)
existed, err = rIndexer.Init(ctx)
if err != nil {
cancel()
(*globalIndexer.Load()).Close()
close(waitChannel)
log.Fatal("PID: %d Unable to initialize the zoekt Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err)
}

default:
log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType)
Expand Down
49 changes: 49 additions & 0 deletions modules/indexer/code/zoekt/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package zoekt

import "unicode/utf8"

// Bitmap used by func special to check whether a character needs to be escaped.
var specialBytes [16]byte

// special reports whether byte b needs to be escaped by QuoteMeta.
func special(b byte) bool {
return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
}

func init() {
for _, b := range []byte(`-:\.+*?()|[]{}^$`) {
specialBytes[b%16] |= 1 << (b / 16)
}
}

func QuoteMeta(s string) string {
// A byte loop is correct because all metacharacters are ASCII.
var i int
for i = 0; i < len(s); i++ {
if special(s[i]) {
break
}
}
// No meta characters found, so return original string.
if i >= len(s) {
return s
}

b := make([]byte, 3*len(s)-2*i)
copy(b, s[:i])
j := i
for ; i < len(s); i++ {
if special(s[i]) {
b[j] = '\\'
j++
b[j] = '\\'
j++
}
b[j] = s[i]
j++
}
return string(b[:j])
}
Loading
Loading