18
18
package opennlp .tools .cmdline .tokenizer ;
19
19
20
20
import java .io .ByteArrayInputStream ;
21
- import java .io .ByteArrayOutputStream ;
22
21
import java .io .File ;
23
22
import java .io .IOException ;
24
23
import java .io .InputStream ;
25
- import java .io .PrintStream ;
26
24
import java .nio .charset .StandardCharsets ;
27
25
import java .nio .file .Files ;
28
26
import java .nio .file .Path ;
29
27
import java .nio .file .StandardOpenOption ;
30
28
31
- import org .junit .jupiter .api .Assertions ;
32
- import org .junit .jupiter .api .Disabled ;
29
+ import ch .qos .logback .classic .Level ;
30
+ import ch .qos .logback .classic .Logger ;
31
+ import ch .qos .logback .classic .LoggerContext ;
32
+ import nl .altindag .log .LogCaptor ;
33
+ import org .junit .jupiter .api .AfterAll ;
34
+ import org .junit .jupiter .api .BeforeAll ;
33
35
import org .junit .jupiter .api .Test ;
36
+ import org .slf4j .LoggerFactory ;
34
37
35
38
import opennlp .tools .AbstractTempDirTest ;
39
+ import opennlp .tools .cmdline .CmdLineUtil ;
36
40
import opennlp .tools .cmdline .StreamFactoryRegistry ;
37
41
import opennlp .tools .cmdline .TerminateToolException ;
38
42
import opennlp .tools .dictionary .Dictionary ;
39
43
import opennlp .tools .util .InvalidFormatException ;
40
44
45
+ import static org .junit .jupiter .api .Assertions .assertEquals ;
46
+ import static org .junit .jupiter .api .Assertions .assertNotNull ;
47
+ import static org .junit .jupiter .api .Assertions .assertThrows ;
48
+ import static org .junit .jupiter .api .Assertions .assertTrue ;
49
+
41
50
/**
42
51
* Tests for the {@link TokenizerTrainerTool} class.
43
52
*/
44
53
public class TokenizerTrainerToolTest extends AbstractTempDirTest {
45
54
46
- private TokenizerTrainerTool tokenizerTrainerTool ;
47
-
48
55
private final String sampleSuccessData =
49
56
"Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a nonexecutive " +
50
57
"director Nov. 29<SPLIT>.\n " +
@@ -54,55 +61,70 @@ public class TokenizerTrainerToolTest extends AbstractTempDirTest {
54
61
55
62
private final String sampleFailureData = "It is Fail Test Case.\n \n Nothing in this sentence." ;
56
63
64
+ /*
65
+ * Programmatic change to debug log to ensure that we can see log messages to
66
+ * confirm no duplicate download is happening
67
+ */
68
+ @ BeforeAll
69
+ public static void prepare () {
70
+ LoggerContext context = (LoggerContext ) LoggerFactory .getILoggerFactory ();
71
+ Logger logger = context .getLogger ("opennlp.tools.cmdline.CmdLineUtil" );
72
+ logger .setLevel (Level .INFO );
73
+ }
74
+
75
+ /*
76
+ * Programmatic restore the default log level (= OFF) after the test
77
+ */
78
+ @ AfterAll
79
+ public static void cleanup () {
80
+ LoggerContext context = (LoggerContext ) LoggerFactory .getILoggerFactory ();
81
+ Logger logger = context .getLogger ("opennlp.tools.cmdline.CmdLineUtil" );
82
+ logger .setLevel (Level .OFF );
83
+ }
84
+
57
85
@ Test
58
86
public void testGetShortDescription () {
59
- tokenizerTrainerTool = new TokenizerTrainerTool ();
60
- Assertions . assertEquals ("Trainer for the learnable tokenizer" ,
87
+ TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool ();
88
+ assertEquals ("Trainer for the learnable tokenizer" ,
61
89
tokenizerTrainerTool .getShortDescription ());
62
90
}
63
91
64
92
@ Test
65
93
public void testLoadDictHappyCase () throws IOException {
66
94
File dictFile = new File ("lang/ga/abb_GA.xml" );
67
95
Dictionary dict = TokenizerTrainerTool .loadDict (dictFile );
68
- Assertions . assertNotNull (dict );
96
+ assertNotNull (dict );
69
97
}
70
98
71
99
@ Test
72
100
public void testLoadDictFailCase () {
73
- Assertions . assertThrows (InvalidFormatException .class , () ->
101
+ assertThrows (InvalidFormatException .class , () ->
74
102
TokenizerTrainerTool .loadDict (prepareDataFile ("" )));
75
103
}
76
104
77
- //TODO OPENNLP-1447
78
- @ Disabled (value = "OPENNLP-1447: These kind of tests won't work anymore. " +
79
- "We need to find a way to redirect log output (i.e. implement " +
80
- "a custom log adapter and plug it in, if we want to do such tests." )
105
+ @ Test
81
106
public void testTestRunHappyCase () throws IOException {
82
- File model = tempDir . resolve ( "model-en.bin" ). toFile ();
83
-
84
- String [] args =
85
- new String [] { "-model" , model . getAbsolutePath () , "-alphaNumOpt" , "false" , "-lang" , "en" ,
86
- "-data " , String . valueOf ( prepareDataFile ( sampleSuccessData )) , "-encoding " , "UTF-8" };
87
-
88
- InputStream stream = new ByteArrayInputStream ( sampleSuccessData . getBytes ( StandardCharsets . UTF_8 ));
89
- System . setIn ( stream );
90
- ByteArrayOutputStream baos = new ByteArrayOutputStream ( );
91
- PrintStream ps = new PrintStream ( baos );
92
- System . setOut ( ps );
93
-
94
- tokenizerTrainerTool = new TokenizerTrainerTool ();
95
- tokenizerTrainerTool . run ( StreamFactoryRegistry . DEFAULT_FORMAT , args );
96
-
97
- final String content = baos . toString ( StandardCharsets . UTF_8 );
98
- Assertions . assertTrue (content . contains ( "Number of Event Tokens: 171" ));
99
- Assertions . assertTrue ( model . delete ());
107
+ try ( LogCaptor logCaptor = LogCaptor . forClass ( CmdLineUtil . class )) {
108
+ File model = tempDir . resolve ( "model-en.bin" ). toFile ();
109
+
110
+ String [] args =
111
+ new String [] { "-model " , model . getAbsolutePath () , "-alphaNumOpt " , "false" , "-lang" , "en" ,
112
+ "-data" , String . valueOf ( prepareDataFile ( sampleSuccessData )) , "-encoding" , "UTF-8" };
113
+
114
+ InputStream stream = new ByteArrayInputStream ( sampleSuccessData . getBytes ( StandardCharsets . UTF_8 ) );
115
+ System . setIn ( stream );
116
+
117
+ TokenizerTrainerTool trainerTool = new TokenizerTrainerTool ( );
118
+ trainerTool . run ( StreamFactoryRegistry . DEFAULT_FORMAT , args );
119
+
120
+ assertEquals ( 3 , logCaptor . getInfoLogs (). size () );
121
+ final String content = logCaptor . getInfoLogs (). get ( 2 );
122
+ assertTrue ( content . startsWith ( "Wrote tokenizer model to path:" ) );
123
+ assertTrue (model . delete ( ));
124
+ }
100
125
}
101
126
102
- //TODO OPENNLP-1447
103
- @ Disabled (value = "OPENNLP-1447: These kind of tests won't work anymore. " +
104
- "We need to find a way to redirect log output (i.e. implement " +
105
- "a custom log adapter and plug it in, if we want to do such tests." )
127
+ @ Test
106
128
public void testTestRunExceptionCase () throws IOException {
107
129
File model = tempDir .resolve ("model-en.bin" ).toFile ();
108
130
model .deleteOnExit ();
@@ -111,17 +133,10 @@ public void testTestRunExceptionCase() throws IOException {
111
133
new String [] { "-model" , model .getAbsolutePath () , "-alphaNumOpt" , "false" , "-lang" , "en" ,
112
134
"-data" , String .valueOf (prepareDataFile (sampleFailureData )) , "-encoding" , "UTF-8" };
113
135
114
- InputStream stream = new ByteArrayInputStream (sampleFailureData .getBytes (StandardCharsets .UTF_8 ));
115
- System .setIn (stream );
116
- ByteArrayOutputStream baos = new ByteArrayOutputStream ();
117
- PrintStream ps = new PrintStream (baos );
118
- System .setOut (ps );
119
-
120
- Assertions .assertThrows (TerminateToolException .class , () -> {
121
- tokenizerTrainerTool = new TokenizerTrainerTool ();
122
- tokenizerTrainerTool .run (StreamFactoryRegistry .DEFAULT_FORMAT , args );
136
+ assertThrows (TerminateToolException .class , () -> {
137
+ TokenizerTrainerTool trainerTool = new TokenizerTrainerTool ();
138
+ trainerTool .run (StreamFactoryRegistry .DEFAULT_FORMAT , args );
123
139
});
124
-
125
140
}
126
141
127
142
// This is guaranteed to be deleted after the test finishes.
0 commit comments