From 3307aba005788d6cef7f13c07aef91f9155925cb Mon Sep 17 00:00:00 2001 From: umuttalha Date: Mon, 6 Jan 2025 01:23:42 +0300 Subject: [PATCH 1/3] added turkish stemmer test --- analysis/lang/tr/stemmer_tr_test.go | 115 ++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 analysis/lang/tr/stemmer_tr_test.go diff --git a/analysis/lang/tr/stemmer_tr_test.go b/analysis/lang/tr/stemmer_tr_test.go new file mode 100644 index 000000000..64efb0944 --- /dev/null +++ b/analysis/lang/tr/stemmer_tr_test.go @@ -0,0 +1,115 @@ +// Copyright (c) 2020 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tr + +import ( + "reflect" + "testing" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +func TestSnowballTurkishStemmer(t *testing.T) { + tests := []struct { + input analysis.TokenStream + output analysis.TokenStream + }{ + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kimsesizler"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kimsesiz"), + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kitaplar"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kitap"), + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("bardak"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("bardak"), + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("bardaklar"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("bardak"), + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kediye"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("kedi"), + }, + }, + }, + { + input: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("yazdım"), + }, + }, + output: analysis.TokenStream{ + &analysis.Token{ + Term: []byte("yaz"), + }, + }, + }, + } + + cache := registry.NewCache() + filter, err := cache.TokenFilterNamed(SnowballStemmerName) + if err != nil { + t.Fatal(err) + } + for _, test := range tests { + actual := filter.Filter(test.input) + if !reflect.DeepEqual(actual, test.output) { + t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term) + } + } +} From 65881654a94ce5f1d86316aa89b9934f6a0ed9c9 Mon Sep 17 00:00:00 2001 From: umuttalha Date: Mon, 6 Jan 2025 12:27:37 +0300 Subject: [PATCH 2/3] turkish stemmer test edited --- analysis/lang/tr/stemmer_tr_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/lang/tr/stemmer_tr_test.go b/analysis/lang/tr/stemmer_tr_test.go index 64efb0944..68a1b9cf8 100644 --- a/analysis/lang/tr/stemmer_tr_test.go +++ b/analysis/lang/tr/stemmer_tr_test.go @@ -54,12 +54,12 @@ func TestSnowballTurkishStemmer(t *testing.T) { { input: analysis.TokenStream{ &analysis.Token{ - Term: []byte("bardak"), + Term: []byte("arabanın"), }, }, output: analysis.TokenStream{ &analysis.Token{ - Term: []byte("bardak"), + Term: []byte("araba"), }, }, }, From aac8c6b98d96c9a71e9fbac9c7f450ed1f784d99 Mon Sep 17 00:00:00 2001 From: umuttalha Date: Mon, 6 Jan 2025 21:07:17 +0300 Subject: [PATCH 3/3] Copyright date fixed --- analysis/lang/tr/stemmer_tr_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/lang/tr/stemmer_tr_test.go b/analysis/lang/tr/stemmer_tr_test.go index 68a1b9cf8..8ad5a564e 100644 --- a/analysis/lang/tr/stemmer_tr_test.go +++ b/analysis/lang/tr/stemmer_tr_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Couchbase, Inc. +// Copyright (c) 2025 Couchbase, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.