From aa8cc1ff5af82628ea927eaed34b34c479c8122f Mon Sep 17 00:00:00 2001 From: silkentrance Date: Mon, 1 Jul 2024 19:50:00 +0200 Subject: [PATCH] fix #4479: add fuzzy keyword search to title --- modules/indexer/internal/bleve/query.go | 8 ++++++++ modules/indexer/internal/bleve/util.go | 10 ++++++---- modules/indexer/issues/bleve/bleve.go | 2 +- modules/indexer/issues/indexer_test.go | 16 ++++++++-------- routers/web/repo/issue.go | 1 + 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 21422b281c..c5399fe47f 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query return q } +// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness +func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery { + q := bleve.NewFuzzyQuery(matchPhrase) + q.FieldVal = field + q.Fuzziness = fuzziness + return q +} + // BoolFieldQuery generates a bool field query for the given value and field func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { q := bleve.NewBoolFieldQuery(value) diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go index a2265f86e6..ac6bae2018 100644 --- a/modules/indexer/internal/bleve/util.go +++ b/modules/indexer/internal/bleve/util.go @@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { func GuessFuzzinessByKeyword(s string) int { // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2 - // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword - // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot. + // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot + // which we need to live with, as we need to support not just ASCII + // in case of code points >= 128 we will increase the fuzziness to 2 + // the standard is 1 for _, r := range s { if r >= 128 { - return 0 + return 2 } } - return min(2, len(s)/4) + return 1 } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 7ef370e89c..045fd5f0a9 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( } queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), }...)) diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index e426229f78..323d9c2b63 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -209,13 +209,13 @@ func searchIssueIsPull(t *testing.T) { SearchOptions{ IsPull: optional.Some(false), }, - []int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1}, + []int64{25, 24, 23, 17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1}, }, { SearchOptions{ IsPull: optional.Some(true), }, - []int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2}, + []int64{22, 21, 28, 27, 26, 12, 11, 20, 19, 9, 8, 3, 2}, }, } for _, test := range tests { @@ -236,7 +236,7 @@ func searchIssueIsClosed(t *testing.T) { SearchOptions{ IsClosed: optional.Some(false), }, - []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1}, + []int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1}, }, { SearchOptions{ @@ -302,7 +302,7 @@ func searchIssueByLabelID(t *testing.T) { SearchOptions{ ExcludedLabelIDs: []int64{1}, }, - []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3}, + []int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3}, }, } for _, test := range tests { @@ -323,7 +323,7 @@ func searchIssueByTime(t *testing.T) { SearchOptions{ UpdatedAfterUnix: optional.Some(int64(0)), }, - []int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1}, + []int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1}, }, } for _, test := range tests { @@ -344,7 +344,7 @@ func searchIssueWithOrder(t *testing.T) { SearchOptions{ SortBy: internal.SortByCreatedAsc, }, - []int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22}, + []int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 26, 27, 28, 21, 22, 23, 24, 25}, }, } for _, test := range tests { @@ -401,8 +401,8 @@ func searchIssueWithPaginator(t *testing.T) { PageSize: 5, }, }, - []int64{22, 21, 17, 16, 15}, - 22, + []int64{25, 24, 23, 22, 21}, + 28, }, } for _, test := range tests { diff --git a/routers/web/repo/issue.go b/routers/web/repo/issue.go index 7c4e3e36f3..537474cbd4 100644 --- a/routers/web/repo/issue.go +++ b/routers/web/repo/issue.go @@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) { MilestoneIDs: includedMilestones, ProjectID: projectID, SortBy: issue_indexer.SortByCreatedDesc, + IsFuzzyKeyword: true, } if since != 0 {