Skip to content

Commit ed2682c

Browse files
authored
OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
- removes @disabled from multiple cmdline execution tests - adjusts TokenizerTrainerTool to handle existing yet "empty" abb-dict files better
1 parent 3fd914f commit ed2682c

File tree

4 files changed

+131
-99
lines changed

4 files changed

+131
-99
lines changed

opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.io.File;
2222
import java.io.FileInputStream;
2323
import java.io.IOException;
24+
import java.io.InputStream;
2425

2526
import opennlp.tools.cmdline.AbstractTrainerTool;
2627
import opennlp.tools.cmdline.CmdLineUtil;
@@ -33,6 +34,7 @@
3334
import opennlp.tools.tokenize.TokenSample;
3435
import opennlp.tools.tokenize.TokenizerFactory;
3536
import opennlp.tools.tokenize.TokenizerModel;
37+
import opennlp.tools.util.InvalidFormatException;
3638
import opennlp.tools.util.TrainingParameters;
3739
import opennlp.tools.util.model.ModelUtil;
3840

@@ -53,9 +55,15 @@ public String getShortDescription() {
5355

5456
static Dictionary loadDict(File f) throws IOException {
5557
Dictionary dict = null;
56-
if (f != null) {
58+
if (f != null && f.exists()) {
5759
CmdLineUtil.checkInputFile("abb dict", f);
58-
dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
60+
try (InputStream in = new BufferedInputStream(new FileInputStream(f))) {
61+
if (in.available() == 0) {
62+
throw new InvalidFormatException("Encountered an empty dictionary file?!");
63+
} else {
64+
dict = new Dictionary(in);
65+
}
66+
}
5967
}
6068
return dict;
6169
}

opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java

+55-50
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,22 @@
1919

2020
import java.io.BufferedOutputStream;
2121
import java.io.ByteArrayInputStream;
22-
import java.io.ByteArrayOutputStream;
2322
import java.io.File;
2423
import java.io.FileOutputStream;
2524
import java.io.IOException;
2625
import java.io.InputStream;
2726
import java.io.OutputStream;
28-
import java.io.PrintStream;
2927
import java.nio.charset.StandardCharsets;
3028
import java.nio.file.Files;
3129

32-
import org.junit.jupiter.api.Assertions;
33-
import org.junit.jupiter.api.Disabled;
30+
import ch.qos.logback.classic.Level;
31+
import ch.qos.logback.classic.Logger;
32+
import ch.qos.logback.classic.LoggerContext;
33+
import nl.altindag.log.LogCaptor;
34+
import org.junit.jupiter.api.AfterAll;
35+
import org.junit.jupiter.api.BeforeAll;
3436
import org.junit.jupiter.api.Test;
37+
import org.slf4j.LoggerFactory;
3538

3639
import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
3740
import opennlp.tools.namefind.NameFinderME;
@@ -44,75 +47,80 @@
4447
import opennlp.tools.util.PlainTextByLineStream;
4548
import opennlp.tools.util.TrainingParameters;
4649

47-
public class TokenNameFinderToolTest {
48-
49-
@Test
50-
//TODO OPENNLP-1447
51-
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
52-
"We need to find a way to redirect log output (i.e. implement " +
53-
"a custom log adapter and plug it in, if we want to do such tests.")
54-
void run() throws IOException {
50+
import static org.junit.jupiter.api.Assertions.assertEquals;
51+
import static org.junit.jupiter.api.Assertions.assertThrows;
52+
import static org.junit.jupiter.api.Assertions.assertTrue;
5553

56-
File model1 = trainModel();
54+
public class TokenNameFinderToolTest {
5755

58-
String[] args = new String[] {model1.getAbsolutePath()};
56+
/*
57+
* Programmatic change to debug log to ensure that we can see log messages to
58+
* confirm no duplicate download is happening
59+
*/
60+
@BeforeAll
61+
public static void prepare() {
62+
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
63+
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
64+
logger.setLevel(Level.INFO);
65+
}
5966

60-
final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
61-
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
67+
/*
68+
* Programmatic restore the default log level (= OFF) after the test
69+
*/
70+
@AfterAll
71+
public static void cleanup() {
72+
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
73+
Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
74+
logger.setLevel(Level.OFF);
75+
}
6276

63-
System.setIn(stream);
77+
@Test
78+
void run() throws IOException {
79+
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
80+
File model1 = trainModel();
81+
String[] args = new String[] {model1.getAbsolutePath()};
6482

65-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
66-
PrintStream ps = new PrintStream(baos);
67-
System.setOut(ps);
83+
final String in = "It is Stefanie Schmidt.\n";
84+
InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
6885

69-
TokenNameFinderTool tool = new TokenNameFinderTool();
70-
tool.run(args);
86+
System.setIn(stream);
7187

72-
final String content = baos.toString(StandardCharsets.UTF_8);
73-
Assertions.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>"));
88+
TokenNameFinderTool tool = new TokenNameFinderTool();
89+
tool.run(args);
7490

75-
Assertions.assertTrue(model1.delete());
91+
assertEquals(1, logCaptor.getInfoLogs().size());
92+
final String content = logCaptor.getInfoLogs().get(0);
93+
logCaptor.clearLogs();
94+
assertEquals("It is <START:person> Stefanie Schmidt. <END>", content);
95+
assertTrue(model1.delete());
96+
}
7697
}
7798

7899
@Test
79100
void invalidModel() {
80-
81-
Assertions.assertThrows(TerminateToolException.class, () -> {
82-
101+
assertThrows(TerminateToolException.class, () -> {
83102
String[] args = new String[] {"invalidmodel.bin"};
84-
85103
TokenNameFinderTool tool = new TokenNameFinderTool();
86104
tool.run(args);
87105

88106
});
89-
90-
91107
}
92108

93109
@Test
94-
//TODO OPENNLP-1447
95-
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
96-
"We need to find a way to redirect log output (i.e. implement " +
97-
"a custom log adapter and plug it in, if we want to do such tests.")
98110
void usage() {
111+
try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
112+
String[] args = new String[] {};
99113

100-
String[] args = new String[] {};
101-
102-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
103-
PrintStream ps = new PrintStream(baos);
104-
System.setOut(ps);
105-
106-
TokenNameFinderTool tool = new TokenNameFinderTool();
107-
tool.run(args);
108-
109-
final String content = baos.toString(StandardCharsets.UTF_8);
110-
Assertions.assertEquals(tool.getHelp(), content.trim());
114+
TokenNameFinderTool tool = new TokenNameFinderTool();
115+
tool.run(args);
111116

117+
assertEquals(1, logCaptor.getInfoLogs().size());
118+
final String content = logCaptor.getInfoLogs().get(0);
119+
assertEquals(tool.getHelp(), content.trim());
120+
}
112121
}
113122

114123
private File trainModel() throws IOException {
115-
116124
ObjectStream<String> lineStream =
117125
new PlainTextByLineStream(new MockInputStreamFactory(
118126
new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")),
@@ -123,7 +131,6 @@ private File trainModel() throws IOException {
123131
params.put(TrainingParameters.CUTOFF_PARAM, 1);
124132

125133
TokenNameFinderModel model;
126-
127134
TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
128135

129136
try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) {
@@ -132,12 +139,10 @@ private File trainModel() throws IOException {
132139
}
133140

134141
File modelFile = Files.createTempFile("model", ".bin").toFile();
135-
136142
try (OutputStream modelOut =
137143
new BufferedOutputStream(new FileOutputStream(modelFile))) {
138144
model.serialize(modelOut);
139145
}
140-
141146
return modelFile;
142147
}
143148

opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java

+61-46
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,40 @@
1818
package opennlp.tools.cmdline.tokenizer;
1919

2020
import java.io.ByteArrayInputStream;
21-
import java.io.ByteArrayOutputStream;
2221
import java.io.File;
2322
import java.io.IOException;
2423
import java.io.InputStream;
25-
import java.io.PrintStream;
2624
import java.nio.charset.StandardCharsets;
2725
import java.nio.file.Files;
2826
import java.nio.file.Path;
2927
import java.nio.file.StandardOpenOption;
3028

31-
import org.junit.jupiter.api.Assertions;
32-
import org.junit.jupiter.api.Disabled;
29+
import ch.qos.logback.classic.Level;
30+
import ch.qos.logback.classic.Logger;
31+
import ch.qos.logback.classic.LoggerContext;
32+
import nl.altindag.log.LogCaptor;
33+
import org.junit.jupiter.api.AfterAll;
34+
import org.junit.jupiter.api.BeforeAll;
3335
import org.junit.jupiter.api.Test;
36+
import org.slf4j.LoggerFactory;
3437

3538
import opennlp.tools.AbstractTempDirTest;
39+
import opennlp.tools.cmdline.CmdLineUtil;
3640
import opennlp.tools.cmdline.StreamFactoryRegistry;
3741
import opennlp.tools.cmdline.TerminateToolException;
3842
import opennlp.tools.dictionary.Dictionary;
3943
import opennlp.tools.util.InvalidFormatException;
4044

45+
import static org.junit.jupiter.api.Assertions.assertEquals;
46+
import static org.junit.jupiter.api.Assertions.assertNotNull;
47+
import static org.junit.jupiter.api.Assertions.assertThrows;
48+
import static org.junit.jupiter.api.Assertions.assertTrue;
49+
4150
/**
4251
* Tests for the {@link TokenizerTrainerTool} class.
4352
*/
4453
public class TokenizerTrainerToolTest extends AbstractTempDirTest {
4554

46-
private TokenizerTrainerTool tokenizerTrainerTool;
47-
4855
private final String sampleSuccessData =
4956
"Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a nonexecutive " +
5057
"director Nov. 29<SPLIT>.\n" +
@@ -54,55 +61,70 @@ public class TokenizerTrainerToolTest extends AbstractTempDirTest {
5461

5562
private final String sampleFailureData = "It is Fail Test Case.\n\nNothing in this sentence.";
5663

64+
/*
65+
* Programmatic change to debug log to ensure that we can see log messages to
66+
* confirm no duplicate download is happening
67+
*/
68+
@BeforeAll
69+
public static void prepare() {
70+
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
71+
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
72+
logger.setLevel(Level.INFO);
73+
}
74+
75+
/*
76+
* Programmatic restore the default log level (= OFF) after the test
77+
*/
78+
@AfterAll
79+
public static void cleanup() {
80+
LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
81+
Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
82+
logger.setLevel(Level.OFF);
83+
}
84+
5785
@Test
5886
public void testGetShortDescription() {
59-
tokenizerTrainerTool = new TokenizerTrainerTool();
60-
Assertions.assertEquals("Trainer for the learnable tokenizer",
87+
TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool();
88+
assertEquals("Trainer for the learnable tokenizer",
6189
tokenizerTrainerTool.getShortDescription());
6290
}
6391

6492
@Test
6593
public void testLoadDictHappyCase() throws IOException {
6694
File dictFile = new File("lang/ga/abb_GA.xml");
6795
Dictionary dict = TokenizerTrainerTool.loadDict(dictFile);
68-
Assertions.assertNotNull(dict);
96+
assertNotNull(dict);
6997
}
7098

7199
@Test
72100
public void testLoadDictFailCase() {
73-
Assertions.assertThrows(InvalidFormatException.class , () ->
101+
assertThrows(InvalidFormatException.class , () ->
74102
TokenizerTrainerTool.loadDict(prepareDataFile("")));
75103
}
76104

77-
//TODO OPENNLP-1447
78-
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
79-
"We need to find a way to redirect log output (i.e. implement " +
80-
"a custom log adapter and plug it in, if we want to do such tests.")
105+
@Test
81106
public void testTestRunHappyCase() throws IOException {
82-
File model = tempDir.resolve("model-en.bin").toFile();
83-
84-
String[] args =
85-
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
86-
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };
87-
88-
InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
89-
System.setIn(stream);
90-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
91-
PrintStream ps = new PrintStream(baos);
92-
System.setOut(ps);
93-
94-
tokenizerTrainerTool = new TokenizerTrainerTool();
95-
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
96-
97-
final String content = baos.toString(StandardCharsets.UTF_8);
98-
Assertions.assertTrue(content.contains("Number of Event Tokens: 171"));
99-
Assertions.assertTrue(model.delete());
107+
try (LogCaptor logCaptor = LogCaptor.forClass(CmdLineUtil.class)) {
108+
File model = tempDir.resolve("model-en.bin").toFile();
109+
110+
String[] args =
111+
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
112+
"-data" , String.valueOf(prepareDataFile(sampleSuccessData)) , "-encoding" , "UTF-8" };
113+
114+
InputStream stream = new ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
115+
System.setIn(stream);
116+
117+
TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
118+
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
119+
120+
assertEquals(3, logCaptor.getInfoLogs().size());
121+
final String content = logCaptor.getInfoLogs().get(2);
122+
assertTrue(content.startsWith("Wrote tokenizer model to path:"));
123+
assertTrue(model.delete());
124+
}
100125
}
101126

102-
//TODO OPENNLP-1447
103-
@Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
104-
"We need to find a way to redirect log output (i.e. implement " +
105-
"a custom log adapter and plug it in, if we want to do such tests.")
127+
@Test
106128
public void testTestRunExceptionCase() throws IOException {
107129
File model = tempDir.resolve("model-en.bin").toFile();
108130
model.deleteOnExit();
@@ -111,17 +133,10 @@ public void testTestRunExceptionCase() throws IOException {
111133
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" , "false" , "-lang" , "en" ,
112134
"-data" , String.valueOf(prepareDataFile(sampleFailureData)) , "-encoding" , "UTF-8" };
113135

114-
InputStream stream = new ByteArrayInputStream(sampleFailureData.getBytes(StandardCharsets.UTF_8));
115-
System.setIn(stream);
116-
ByteArrayOutputStream baos = new ByteArrayOutputStream();
117-
PrintStream ps = new PrintStream(baos);
118-
System.setOut(ps);
119-
120-
Assertions.assertThrows(TerminateToolException.class , () -> {
121-
tokenizerTrainerTool = new TokenizerTrainerTool();
122-
tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
136+
assertThrows(TerminateToolException.class , () -> {
137+
TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
138+
trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
123139
});
124-
125140
}
126141

127142
// This is guaranteed to be deleted after the test finishes.

opennlp-tools/src/test/resources/logback-test.xml

+5-1
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,16 @@
2323

2424
<appender name="consoleAppender" class="ch.qos.logback.core.ConsoleAppender">
2525
<encoder>
26-
<pattern>%date{HH:mm:ss.SSS} [%thread] %-5level %class{36}.%method:%line - %msg%n</pattern>
26+
<pattern>%date{HH:mm:ss.SSS} [%thread] %-4level %class{36}.%method:%line - %msg%n</pattern>
2727
</encoder>
2828
</appender>
2929

3030
<logger name="opennlp" level="off"/>
3131

32+
<logger name="opennlp.tools.cmdline.namefind" level="off"/>
33+
34+
<logger name="opennlp.tools.cmdline.CmdLineUtil" level="off"/>
35+
3236
<root level="off">
3337
<appender-ref ref="consoleAppender" />
3438
</root>

0 commit comments

Comments
 (0)