Skip to content

Commit 49678c3

Browse files
authored
OPENNLP-1677: Extend JavaDoc of POSTaggerME (#717)
1 parent 2f2f631 commit 49678c3

File tree

1 file changed

+52
-22
lines changed

1 file changed

+52
-22
lines changed

opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,30 @@
5050
import opennlp.tools.util.featuregen.StringPattern;
5151

5252
/**
53-
* A {@link POSTagger part-of-speech tagger} that uses maximum entropy.
53+
* A {@link POSTagger part-of-speech tagger} implementation that uses maximum entropy.
5454
* <p>
55-
* Tries to predict whether words are nouns, verbs, or any of 70 other POS tags
55+
* Tries to predict whether words are nouns, verbs, or any other {@link POSTagFormat POS tags}
5656
* depending on their surrounding context.
57+
*
58+
* @see POSModel
59+
* @see POSTagFormat
60+
* @see POSTagger
5761
*/
5862
public class POSTaggerME implements POSTagger {
5963

6064
private static final Logger logger = LoggerFactory.getLogger(POSTaggerME.class);
6165

66+
/**
67+
* The default beam size value is 3.
68+
*/
6269
public static final int DEFAULT_BEAM_SIZE = 3;
6370

6471
private final POSModel modelPackage;
6572

6673
/**
6774
* The {@link POSContextGenerator feature context generator}.
6875
*/
69-
protected final POSContextGenerator contextGen;
76+
protected final POSContextGenerator cg;
7077

7178
/**
7279
* {@link TagDictionary} used for restricting words to a fixed set of tags.
@@ -140,7 +147,7 @@ public POSTaggerME(POSModel model, POSTagFormat format) {
140147

141148
modelPackage = model;
142149

143-
contextGen = factory.getPOSContextGenerator(beamSize);
150+
cg = factory.getPOSContextGenerator(beamSize);
144151
tagDictionary = factory.getTagDictionary();
145152
size = beamSize;
146153

@@ -165,14 +172,20 @@ public String[] getAllPosTags() {
165172
return model.getOutcomes();
166173
}
167174

175+
/**
176+
* {@inheritDoc}
177+
*/
168178
@Override
169179
public String[] tag(String[] sentence) {
170180
return this.tag(sentence, null);
171181
}
172182

183+
/**
184+
* {@inheritDoc}
185+
*/
173186
@Override
174187
public String[] tag(String[] sentence, Object[] additionalContext) {
175-
bestSequence = model.bestSequence(sentence, additionalContext, contextGen, sequenceValidator);
188+
bestSequence = model.bestSequence(sentence, additionalContext, cg, sequenceValidator);
176189
final List<String> t = bestSequence.getOutcomes();
177190
return convertTags(t);
178191
}
@@ -186,7 +199,7 @@ public String[] tag(String[] sentence, Object[] additionalContext) {
186199
*/
187200
public String[][] tag(int numTaggings, String[] sentence) {
188201
Sequence[] bestSequences = model.bestSequences(numTaggings, sentence, null,
189-
contextGen, sequenceValidator);
202+
cg, sequenceValidator);
190203
String[][] tags = new String[bestSequences.length][];
191204
for (int si = 0; si < tags.length; si++) {
192205
List<String> t = bestSequences[si].getOutcomes();
@@ -204,18 +217,25 @@ private String[] convertTags(List<String> t) {
204217
}
205218
}
206219

220+
/**
221+
* {@inheritDoc}
222+
*/
207223
@Override
208224
public Sequence[] topKSequences(String[] sentence) {
209225
return this.topKSequences(sentence, null);
210226
}
211227

228+
/**
229+
* {@inheritDoc}
230+
*/
212231
@Override
213232
public Sequence[] topKSequences(String[] sentence, Object[] additionalContext) {
214-
return model.bestSequences(size, sentence, additionalContext, contextGen, sequenceValidator);
233+
return model.bestSequences(size, sentence, additionalContext, cg, sequenceValidator);
215234
}
216235

217236
/**
218-
* Populates the specified array with the probabilities for each tag of the last tagged sentence.
237+
* Populates the specified {@code probs} array with the probabilities
238+
* for each tag of the last tagged sentence.
219239
*
220240
* @param probs An array to put the probabilities into.
221241
*/
@@ -239,7 +259,7 @@ public String[] getOrderedTags(List<String> words, List<String> tags, int index,
239259
MaxentModel posModel = modelPackage.getArtifact(POSModel.POS_MODEL_ENTRY_NAME);
240260
if (posModel != null) {
241261

242-
double[] probs = posModel.eval(contextGen.getContext(index, words.toArray(new String[0]),
262+
double[] probs = posModel.eval(cg.getContext(index, words.toArray(new String[0]),
243263
tags.toArray(new String[0]), null));
244264

245265
String[] orderedTags = new String[probs.length];
@@ -263,34 +283,44 @@ public String[] getOrderedTags(List<String> words, List<String> tags, int index,
263283
}
264284
}
265285

266-
public static POSModel train(String languageCode,
267-
ObjectStream<POSSample> samples, TrainingParameters trainParams,
268-
POSTaggerFactory posFactory) throws IOException {
269-
270-
int beamSize = trainParams.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER, POSTaggerME.DEFAULT_BEAM_SIZE);
271-
272-
POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator();
286+
/**
287+
* Starts a training of a {@link POSModel} with the given parameters.
288+
*
289+
* @param languageCode The ISO language code to train the model. Must not be {@code null}.
290+
* @param samples The {@link ObjectStream} of {@link POSSample} used as input for training.
291+
* @param mlParams The {@link TrainingParameters} for the context of the training process.
292+
* @param posFactory The {@link POSTaggerFactory} for creating related objects as defined
293+
* via {@code mlParams}.
294+
*
295+
* @return A valid, trained {@link POSModel} instance.
296+
* @throws IOException Thrown if IO errors occurred.
297+
*/
298+
public static POSModel train(String languageCode, ObjectStream<POSSample> samples,
299+
TrainingParameters mlParams, POSTaggerFactory posFactory)
300+
throws IOException {
273301

274-
Map<String, String> manifestInfoEntries = new HashMap<>();
302+
final int beamSize = mlParams.getIntParameter(
303+
BeamSearch.BEAM_SIZE_PARAMETER, POSTaggerME.DEFAULT_BEAM_SIZE);
275304

276-
TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
305+
final POSContextGenerator contextGenerator = posFactory.getPOSContextGenerator();
306+
final TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
307+
final Map<String, String> manifestInfoEntries = new HashMap<>();
277308

278309
MaxentModel posModel = null;
279310
SequenceClassificationModel seqPosModel = null;
280311
if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
281312
ObjectStream<Event> es = new POSSampleEventStream(samples, contextGenerator);
282313

283-
EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams,
284-
manifestInfoEntries);
314+
EventTrainer trainer = TrainerFactory.getEventTrainer(mlParams, manifestInfoEntries);
285315
posModel = trainer.train(es);
286316
} else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
287317
POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator);
288318
EventModelSequenceTrainer<POSSample> trainer =
289-
TrainerFactory.getEventModelSequenceTrainer(trainParams, manifestInfoEntries);
319+
TrainerFactory.getEventModelSequenceTrainer(mlParams, manifestInfoEntries);
290320
posModel = trainer.train(ss);
291321
} else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
292322
SequenceTrainer trainer = TrainerFactory.getSequenceModelTrainer(
293-
trainParams, manifestInfoEntries);
323+
mlParams, manifestInfoEntries);
294324

295325
// TODO: This will probably cause issue, since the feature generator uses the outcomes array
296326

0 commit comments

Comments
 (0)