Skip to content

Improve issue & code search #33860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions modules/git/grep.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,19 @@ type GrepResult struct {
LineCodes []string
}

type GrepModeType string

const (
GrepModeExact GrepModeType = "exact"
GrepModeWords GrepModeType = "words"
GrepModeRegexp GrepModeType = "regexp"
)

type GrepOptions struct {
RefName string
MaxResultLimit int
ContextLineNumber int
IsFuzzy bool
GrepMode GrepModeType
MaxLineLength int // the maximum length of a line to parse, exceeding chars will be truncated
PathspecList []string
}
Expand All @@ -52,15 +60,23 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand("grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd := NewCommand("grep", "--null", "--break", "--heading", "--line-number", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
if opts.GrepMode == GrepModeExact {
cmd.AddArguments("--fixed-strings")
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
} else if opts.GrepMode == GrepModeRegexp {
cmd.AddArguments("--perl-regexp")
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
} else /* words */ {
words := strings.Fields(search)
for _, word := range words {
cmd.AddArguments("--fixed-strings", "--ignore-case")
for i, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
if i < len(words)-1 {
cmd.AddOptionValues("--and")
}
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
cmd.AddDashesAndList(opts.PathspecList...)
Expand Down
20 changes: 12 additions & 8 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer"
path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
Expand Down Expand Up @@ -136,6 +137,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
}

func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}

// NewIndexer creates a new bleve local indexer
func NewIndexer(indexDir string) *Indexer {
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
Expand Down Expand Up @@ -267,19 +272,18 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10)

keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
if opts.SearchMode == indexer.SearchModeExact {
q := bleve.NewMatchPhraseQuery(opts.Keyword)
q.FieldVal = "Content"
if opts.IsKeywordFuzzy {
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
}
contentQuery = q
} else {
} else /* words */ {
q := bleve.NewMatchQuery(opts.Keyword)
q.FieldVal = "Content"
if opts.IsKeywordFuzzy {
if opts.SearchMode == indexer.SearchModeFuzzy {
// this logic doesn't seem right, it is only used to pass the test-case `Keyword: "dESCRIPTION"`, which doesn't seem to be a real-life use-case.
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
} else {
q.Operator = query.MatchQueryOperatorAnd
}
contentQuery = q
}
Expand Down
19 changes: 9 additions & 10 deletions modules/indexer/code/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/internal"
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
Expand All @@ -24,7 +25,6 @@ import (
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/typesniffer"
"code.gitea.io/gitea/modules/util"

"github.com/go-enry/go-enry/v2"
"github.com/olivere/elastic/v7"
Expand All @@ -46,6 +46,10 @@ type Indexer struct {
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
}

func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
return indexer.SearchModesExactWords()
}

// NewIndexer creates a new elasticsearch indexer
func NewIndexer(url, indexerName string) *Indexer {
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
Expand Down Expand Up @@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var contentQuery elastic.Query
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
if isPhrase {
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
} else {
// TODO: this is the old logic, but not really using "fuzziness"
// * IsKeywordFuzzy=true: "best_fields"
// * IsKeywordFuzzy=false: "phrase_prefix"
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
if opts.SearchMode == indexer.SearchModeExact {
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
} else /* words */ {
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
}
kwQuery := elastic.NewBoolQuery().Should(
contentQuery,
Expand Down
12 changes: 9 additions & 3 deletions modules/indexer/code/gitgrep/gitgrep.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"strings"

"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
)
Expand All @@ -23,11 +24,16 @@ func indexSettingToGitGrepPathspecList() (list []string) {
return list
}

func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) {
grepMode := git.GrepModeWords
if searchMode == indexer.SearchModeExact {
grepMode = git.GrepModeExact
} else if searchMode == indexer.SearchModeRegexp {
grepMode = git.GrepModeRegexp
}
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
ContextLineNumber: 1,
IsFuzzy: isFuzzy,
GrepMode: grepMode,
RefName: ref.String(),
PathspecList: indexSettingToGitGrepPathspecList(),
})
Expand Down
9 changes: 9 additions & 0 deletions modules/indexer/code/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
Expand Down Expand Up @@ -302,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) {
}
log.Info("Done (re)populating the repo indexer with existing repositories")
}

func SupportedSearchModes() []indexer.SearchMode {
gi := globalIndexer.Load()
if gi == nil {
return nil
}
return (*gi).SupportedSearchModes()
}
35 changes: 19 additions & 16 deletions modules/indexer/code/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest"
indexer_module "code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/code/bleve"
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
"code.gitea.io/gitea/modules/indexer/code/internal"
Expand Down Expand Up @@ -39,10 +40,11 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
assert.NoError(t, setupRepositoryIndexes(t.Context(), indexer))

keywords := []struct {
RepoIDs []int64
Keyword string
Langs int
Results []codeSearchResult
RepoIDs []int64
Keyword string
Langs int
SearchMode indexer_module.SearchModeType
Results []codeSearchResult
}{
// Search for an exact match on the contents of a file
// This scenario yields a single result (the file README.md on the repo '1')
Expand Down Expand Up @@ -183,17 +185,18 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
// Search for matches on the contents of files regardless of case.
{
RepoIDs: nil,
Keyword: "dESCRIPTION",
Langs: 1,
RepoIDs: nil,
Keyword: "dESCRIPTION",
Langs: 1,
SearchMode: indexer_module.SearchModeFuzzy,
Results: []codeSearchResult{
{
Filename: "README.md",
Content: "# repo1\n\nDescription for repo1",
},
},
},
// Search for an exact match on the filename within the repo '62' (case insenstive).
// Search for an exact match on the filename within the repo '62' (case-insensitive).
// This scenario yields a single result (the file avocado.md on the repo '62')
{
RepoIDs: []int64{62},
Expand All @@ -206,7 +209,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
// Search for matches on the contents of files when the criteria is a expression.
// Search for matches on the contents of files when the criteria are an expression.
{
RepoIDs: []int64{62},
Keyword: "console.log",
Expand All @@ -218,7 +221,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
// Search for matches on the contents of files when the criteria is part of a expression.
// Search for matches on the contents of files when the criteria are parts of an expression.
{
RepoIDs: []int64{62},
Keyword: "log",
Expand All @@ -235,16 +238,16 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(t.Context(), &internal.SearchOptions{
RepoIDs: kw.RepoIDs,
Keyword: kw.Keyword,
RepoIDs: kw.RepoIDs,
Keyword: kw.Keyword,
SearchMode: kw.SearchMode,
Paginator: &db.ListOptions{
Page: 1,
PageSize: 10,
},
IsKeywordFuzzy: true,
})
assert.NoError(t, err)
assert.Len(t, langs, kw.Langs)
require.NoError(t, err)
require.Len(t, langs, kw.Langs)

hits := make([]codeSearchResult, 0, len(res))

Expand Down Expand Up @@ -289,7 +292,7 @@ func TestBleveIndexAndSearch(t *testing.T) {
_, err := idx.Init(t.Context())
require.NoError(t, err)

testIndexer("beleve", t, idx)
testIndexer("bleve", t, idx)
}

func TestESIndexAndSearch(t *testing.T) {
Expand Down
8 changes: 7 additions & 1 deletion modules/indexer/code/internal/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/indexer/internal"
)

Expand All @@ -18,14 +19,15 @@ type Indexer interface {
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
SupportedSearchModes() []indexer.SearchMode
}

type SearchOptions struct {
RepoIDs []int64
Keyword string
Language string

IsKeywordFuzzy bool
SearchMode indexer.SearchModeType

db.Paginator
}
Expand All @@ -41,6 +43,10 @@ type dummyIndexer struct {
internal.Indexer
}

func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
return nil
}

func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error {
return fmt.Errorf("indexer is not ready")
}
Expand Down
12 changes: 1 addition & 11 deletions modules/indexer/code/internal/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ import (
"code.gitea.io/gitea/modules/log"
)

const (
filenameMatchNumberOfLines = 7 // Copied from github search
)
const filenameMatchNumberOfLines = 7 // Copied from GitHub search

func FilenameIndexerID(repoID int64, filename string) string {
return internal.Base36(repoID) + "_" + filename
Expand Down Expand Up @@ -48,11 +46,3 @@ func FilenameMatchIndexPos(content string) (int, int) {
}
return 0, len(content)
}

func ParseKeywordAsPhrase(keyword string) (string, bool) {
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 {
// only remove the prefix and suffix quotes, no need to decode the content at the moment
return keyword[1 : len(keyword)-1], true
}
return "", false
}
30 changes: 0 additions & 30 deletions modules/indexer/code/internal/util_test.go

This file was deleted.

1 change: 0 additions & 1 deletion modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
}

// PerformSearch perform a search on a repository
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
if opts == nil || len(opts.Keyword) == 0 {
return 0, nil, nil, nil
Expand Down
Loading