|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | +package opennlp.tools.eval; |
| 18 | + |
| 19 | +import java.io.FileNotFoundException; |
| 20 | +import java.io.IOException; |
| 21 | +import java.nio.file.Files; |
| 22 | +import java.nio.file.Path; |
| 23 | +import java.util.List; |
| 24 | +import java.util.Locale; |
| 25 | + |
| 26 | +import org.junit.jupiter.params.ParameterizedTest; |
| 27 | +import org.junit.jupiter.params.provider.EnumSource; |
| 28 | + |
| 29 | +import opennlp.tools.stemmer.snowball.SnowballStemmer; |
| 30 | + |
| 31 | +import static org.junit.jupiter.api.Assertions.assertEquals; |
| 32 | + |
| 33 | +/** |
| 34 | + * Eval tests for the {@link SnowballStemmer} class. |
| 35 | + * <p> |
| 36 | + * Uses the data set provided by <a href="https://github.com/snowballstem/snowball-data"></a> |
| 37 | + * to test all languages available in OpenNLP. |
| 38 | + * <p> |
| 39 | + */ |
| 40 | +public class SnowballTokenizerEval extends AbstractEvalTest { |
| 41 | + |
| 42 | + @ParameterizedTest |
| 43 | + @EnumSource(SnowballStemmer.ALGORITHM.class) |
| 44 | + public void test(SnowballStemmer.ALGORITHM lang) throws IOException { |
| 45 | + |
| 46 | + final List<String> vocabulary = getData(lang, "voc.txt"); |
| 47 | + final List<String> expectedOutputs = getData(lang, "output.txt"); |
| 48 | + |
| 49 | + assertEquals(vocabulary.size(), expectedOutputs.size(), "Expected equally sized lists."); |
| 50 | + final SnowballStemmer stemmer = new SnowballStemmer(lang); |
| 51 | + |
| 52 | + for (int i = 0; i < vocabulary.size(); i++) { |
| 53 | + |
| 54 | + final String word = vocabulary.get(i); |
| 55 | + final String stem = expectedOutputs.get(i); |
| 56 | + |
| 57 | + assertEquals(stem, stemmer.stem(word)); |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + private List<String> getData(SnowballStemmer.ALGORITHM lang, String name) throws IOException { |
| 62 | + final Path expectedOutput = getSnowballDataLanguagePath( |
| 63 | + getSnowballDataPath(), lang).resolve(name); |
| 64 | + return Files.readAllLines(expectedOutput); |
| 65 | + } |
| 66 | + |
| 67 | + private Path getSnowballDataPath() throws FileNotFoundException { |
| 68 | + return getOpennlpDataDir().toPath().resolve("snowball-data"); |
| 69 | + } |
| 70 | + |
| 71 | + private Path getSnowballDataLanguagePath(Path root, SnowballStemmer.ALGORITHM lang) { |
| 72 | + return root.resolve(lang.toString().toLowerCase(Locale.ROOT)); |
| 73 | + } |
| 74 | + |
| 75 | +} |
0 commit comments