You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/11 14:14:46 UTC
[opennlp] branch master updated: OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)
This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new 453aee5e OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)
453aee5e is described below
commit 453aee5e31e5e367fd07daf363ca1488543fb4a1
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Sun Dec 11 15:14:41 2022 +0100
OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)
- adds missing JavaDoc
- improves existing documentation for clarity
- removes superfluous text
- adds 'final' modifier where useful and applicable
- adds 'Override' annotation where useful and applicable
- fixes several typos
---
.../lemmatizer/MorfologikLemmatizer.java | 29 ++--
.../DefaultLemmatizerContextGenerator.java | 19 +--
.../DefaultLemmatizerSequenceValidator.java | 4 +
.../tools/lemmatizer/DictionaryLemmatizer.java | 138 +++++++++++++------
.../java/opennlp/tools/lemmatizer/LemmaSample.java | 41 +++---
.../tools/lemmatizer/LemmaSampleEventStream.java | 12 +-
.../lemmatizer/LemmaSampleSequenceStream.java | 3 +
.../tools/lemmatizer/LemmaSampleStream.java | 13 +-
.../java/opennlp/tools/lemmatizer/Lemmatizer.java | 17 ++-
.../lemmatizer/LemmatizerContextGenerator.java | 12 +-
.../tools/lemmatizer/LemmatizerEvaluator.java | 31 ++---
.../tools/lemmatizer/LemmatizerFactory.java | 10 +-
.../opennlp/tools/lemmatizer/LemmatizerME.java | 146 +++++++++++++++------
.../opennlp/tools/lemmatizer/LemmatizerModel.java | 81 ++++++++++--
.../opennlp/tools/lemmatizer/package-info.java | 2 +-
.../opennlp/tools/lemmatizer/DummyLemmatizer.java | 7 +-
16 files changed, 390 insertions(+), 175 deletions(-)
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
index f9597a9c..a9ce6b26 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
@@ -36,12 +36,26 @@ public class MorfologikLemmatizer implements Lemmatizer {
private final Dictionary dictionary;
- public MorfologikLemmatizer(Path dictionaryPath) throws IllegalArgumentException,
- IOException {
+ /**
+ * Initializes a {@link MorfologikLemmatizer} and related {@link Dictionary}
+ * from the input tab separated dictionary.
+ *
+ * @param dictionaryPath The dictionary referenced via a valid, readable {@link Path}.
+ *
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryPath}.
+ */
+ public MorfologikLemmatizer(Path dictionaryPath) throws IOException {
this(Dictionary.read(dictionaryPath));
}
- public MorfologikLemmatizer(Dictionary dictionary) throws IllegalArgumentException {
+ /**
+ * Initializes a {@link MorfologikLemmatizer} and related {@link Dictionary}
+ * from the input tab separated dictionary.
+ *
+ * @param dictionary The {@link Dictionary} to be used.
+ */
+ public MorfologikLemmatizer(Dictionary dictionary) {
this.dictionary = dictionary;
}
@@ -77,14 +91,7 @@ public class MorfologikLemmatizer implements Lemmatizer {
return lemmas;
}
-
- /**
- * Generates a lemma tags for the word and postag returning the result in list of possible lemmas.
- *
- * @param toks an array of the tokens
- * @param tags an array of the pos tags
- * @return an list of possible lemmas for each token in the sequence.
- */
+ @Override
public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
List<List<String>> lemmas = new ArrayList<>();
for (int i = 0; i < toks.size(); i++) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
index 7da7107b..0e720f7d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
@@ -23,18 +23,19 @@ import java.util.regex.Pattern;
/**
* Simple feature generator for learning statistical lemmatizers.
- * Features based on Grzegorz Chrupała. 2008. Towards a Machine-Learning
- * Architecture for Lexical Functional Grammar Parsing. PhD dissertation,
- * Dublin City University
- * @version 2016-02-15
+ * <p>
+ * Features based on Grzegorz Chrupała. 2008.
+ * <a href="http://grzegorz.chrupala.me/papers/phd-single.pdf">
+ * Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing.
+ * </a> PhD dissertation, Dublin City University
*/
public class DefaultLemmatizerContextGenerator implements LemmatizerContextGenerator {
private static final int PREFIX_LENGTH = 5;
private static final int SUFFIX_LENGTH = 7;
- private static Pattern hasCap = Pattern.compile("[A-Z]");
- private static Pattern hasNum = Pattern.compile("[0-9]");
+ private static final Pattern PATTERN_HAS_CAP = Pattern.compile("[A-Z]");
+ private static final Pattern PATTERN_HAS_NUM = Pattern.compile("[0-9]");
public DefaultLemmatizerContextGenerator() {
}
@@ -55,11 +56,13 @@ public class DefaultLemmatizerContextGenerator implements LemmatizerContextGener
return suffs;
}
+ @Override
public String[] getContext(int index, String[] sequence, String[] priorDecisions,
Object[] additionalContext) {
return getContext(index, sequence, (String[]) additionalContext[0], priorDecisions);
}
+ @Override
public String[] getContext(int index, String[] toks, String[] tags, String[] preds) {
// Word
String w0;
@@ -102,11 +105,11 @@ public class DefaultLemmatizerContextGenerator implements LemmatizerContextGener
features.add("h");
}
- if (hasCap.matcher(lex).find()) {
+ if (PATTERN_HAS_CAP.matcher(lex).find()) {
features.add("c");
}
- if (hasNum.matcher(lex).find()) {
+ if (PATTERN_HAS_NUM.matcher(lex).find()) {
features.add("d");
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
index 8a697ebd..9f843126 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
@@ -19,9 +19,13 @@ package opennlp.tools.lemmatizer;
import opennlp.tools.util.SequenceValidator;
+/**
+ * The default lemmatizer {@link SequenceValidator} implementation.
+ */
public class DefaultLemmatizerSequenceValidator implements SequenceValidator<String> {
//TODO implement this
+ @Override
public boolean validSequence(int i, String[] sequence, String[] s, String outcome) {
return true;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index e4936085..1416643d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -25,6 +25,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
@@ -33,47 +34,106 @@ import java.util.List;
import java.util.Map;
/**
- * Lemmatize by simple dictionary lookup into a hashmap built from a file
- * containing, for each line, word\tabpostag\tablemma.
- * @version 2014-07-08
+ * A {@link Lemmatizer} implementation that works by simple dictionary lookup into
+ * a {@link Map} built from a file containing, for each line:
+ * <p>
+ * {@code word\tabpostag\tablemma}.
*/
public class DictionaryLemmatizer implements Lemmatizer {
- /**
+ /*
* The hashmap containing the dictionary.
*/
private final Map<List<String>, List<String>> dictMap = new HashMap<>();
/**
- * Construct a hashmap from the input tab separated dictionary.
+ * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+ * from the input tab separated dictionary.
+ * <p>
+ * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+ * Alternatively, if multiple lemmas are possible for each word-postag pair,
+ * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
*
- * The input file should have, for each line, word\tabpostag\tablemma.
- * Alternatively, if multiple lemmas are possible for each word,postag pair,
- * then the format should be word\tab\postag\tablemma01#lemma02#lemma03
+ * @param dictionaryStream The dictionary referenced by an open {@link InputStream}.
+ * @param charset The {@link Charset character encoding} of the dictionary.
*
- * @param dictionary the input dictionary via inputstream
- * @param charset the encoding of the inputstream
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryStream}.
*/
- public DictionaryLemmatizer(final InputStream dictionary, Charset charset) throws IOException {
- init(dictionary, charset);
+ public DictionaryLemmatizer(final InputStream dictionaryStream, Charset charset)
+ throws IOException {
+ init(dictionaryStream, charset);
}
- public DictionaryLemmatizer(final InputStream dictionary) throws IOException {
- this(dictionary, StandardCharsets.UTF_8);
+ /**
+ * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+ * from the input tab separated dictionary.
+ * <p>
+ * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+ * Alternatively, if multiple lemmas are possible for each word-postag pair,
+ * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+ *
+ * @param dictionaryStream The dictionary referenced by an open {@link InputStream}.
+ *
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryStream}.
+ */
+ public DictionaryLemmatizer(final InputStream dictionaryStream) throws IOException {
+ this(dictionaryStream, StandardCharsets.UTF_8);
}
+ /**
+ * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+ * from the input tab separated dictionary.
+ * <p>
+ * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+ * Alternatively, if multiple lemmas are possible for each word-postag pair,
+ * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+ *
+ * @param dictionaryFile The dictionary referenced by a valid, readable {@link File}.
+ *
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryFile}.
+ */
public DictionaryLemmatizer(File dictionaryFile) throws IOException {
this(dictionaryFile, StandardCharsets.UTF_8);
}
+ /**
+ * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+ * from the input tab separated dictionary.
+ * <p>
+ * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+ * Alternatively, if multiple lemmas are possible for each word-postag pair,
+ * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+ *
+ * @param dictionaryFile The dictionary referenced by a valid, readable {@link File}.
+ * @param charset The {@link Charset character encoding} of the dictionary.
+ *
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryFile}.
+ */
public DictionaryLemmatizer(File dictionaryFile, Charset charset) throws IOException {
try (InputStream in = new FileInputStream(dictionaryFile)) {
init(in, charset);
}
}
- public DictionaryLemmatizer(Path dictionaryFile) throws IOException {
- this(dictionaryFile.toFile());
+ /**
+ * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+ * from the input tab separated dictionary.
+ * <p>
+ * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+ * Alternatively, if multiple lemmas are possible for each word-postag pair,
+ * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+ *
+ * @param dictionaryPath The dictionary referenced via a valid, readable {@link Path}.
+ *
+ * @throws IOException Thrown if IO errors occurred while reading in from
+ * {@code dictionaryPath}.
+ */
+ public DictionaryLemmatizer(Path dictionaryPath) throws IOException {
+ init(Files.newInputStream(dictionaryPath), StandardCharsets.UTF_8);
}
private void init(InputStream dictionary, Charset charset) throws IOException {
@@ -87,29 +147,24 @@ public class DictionaryLemmatizer implements Lemmatizer {
}
}
/**
- * Get the Map containing the dictionary.
- *
- * @return dictMap the Map
+ * @return Retrieves the {@link Map} containing the dictionary.
*/
public Map<List<String>, List<String>> getDictMap() {
return this.dictMap;
}
/**
- * Get the dictionary keys (word and postag).
- *
- * @param word
- * the surface form word
- * @param postag
- * the assigned postag
- * @return returns the dictionary keys
+ * @param word The surface form word.
+ * @param postag The assigned postag.
+ *
+ * @return Retrieves the dictionary keys (word and postag).
*/
private List<String> getDictKeys(final String word, final String postag) {
- final List<String> keys = new ArrayList<>(Arrays.asList(word.toLowerCase(), postag));
- return keys;
+ return new ArrayList<>(Arrays.asList(word.toLowerCase(), postag));
}
+ @Override
public String[] lemmatize(final String[] tokens, final String[] postags) {
List<String> lemmas = new ArrayList<>();
for (int i = 0; i < tokens.length; i++) {
@@ -118,6 +173,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
return lemmas.toArray(new String[lemmas.size()]);
}
+ @Override
public List<List<String>> lemmatize(final List<String> tokens, final List<String> posTags) {
List<List<String>> allLemmas = new ArrayList<>();
for (int i = 0; i < tokens.size(); i++) {
@@ -127,13 +183,14 @@ public class DictionaryLemmatizer implements Lemmatizer {
}
/**
- * Lookup lemma in a dictionary. Outputs "O" if not found.
+ * Lookup lemma in a dictionary. Outputs {@code "0"} if no lemma could be found
+ * for the specified {@code word}.
*
- * @param word
- * the token
- * @param postag
- * the postag
- * @return the lemma
+ * @param word The token to look up the lemma for.
+ * @param postag The postag.
+ *
+ * @return The corresponding lemma, or {@code "0"} if no lemma for {@code word}
+ * could be found.
*/
private String lemmatize(final String word, final String postag) {
String lemma;
@@ -149,14 +206,13 @@ public class DictionaryLemmatizer implements Lemmatizer {
}
/**
- * Lookup every lemma for a word,pos tag in a dictionary. Outputs "O" if not
- * found.
+ * Lookup every lemma for a word,pos tag in a dictionary. Outputs {@code "0"} if no
+ * lemmas could be found for the specified {@code word}.
+ *
+ * @param word The token to look up the lemma for.
+ * @param postag The postag.
*
- * @param word
- * the token
- * @param postag
- * the postag
- * @return every lemma
+ * @return A list of relevant lemmas.
*/
private List<String> getAllLemmas(final String word, final String postag) {
List<String> lemmasList = new ArrayList<>();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
index fea5f3b5..455f145c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
@@ -26,7 +26,7 @@ import java.util.Objects;
import opennlp.tools.commons.Sample;
/**
- * Represents an lemmatized sentence.
+ * Represents a lemmatized sentence.
*/
public class LemmaSample implements Sample {
@@ -39,25 +39,22 @@ public class LemmaSample implements Sample {
private final List<String> lemmas;
/**
- * Represents one lemma sample.
- * @param tokens the token
- * @param tags the postags
- * @param lemmas the lemmas
+ * Initializes a {@link LemmaSample} instance with the given parameters.
+ *
+ * @param tokens The tokens.
+ * @param tags The postags.
+ * @param lemmas The lemmas for {@code tokens}.
*/
public LemmaSample(String[] tokens, String[] tags, String[] lemmas) {
-
- validateArguments(tokens.length, tags.length, lemmas.length);
-
- this.tokens = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tokens)));
- this.tags = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tags)));
- this.lemmas = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(lemmas)));
+ this(Arrays.asList(tokens), Arrays.asList(tags), Arrays.asList(lemmas));
}
/**
- * Lemma Sample constructor.
- * @param tokens the tokens
- * @param tags the postags
- * @param lemmas the lemmas
+ * Initializes a {@link LemmaSample} instance with the given parameters.
+ *
+ * @param tokens The tokens.
+ * @param tags The postags.
+ * @param lemmas The lemmas for {@code tokens}.
*/
public LemmaSample(List<String> tokens, List<String> tags, List<String> lemmas) {
@@ -68,14 +65,23 @@ public class LemmaSample implements Sample {
this.lemmas = Collections.unmodifiableList(new ArrayList<>(lemmas));
}
+ /**
+ * @return Retrieves the tokens of a {@link LemmaSample}.
+ */
public String[] getTokens() {
return tokens.toArray(new String[tokens.size()]);
}
+ /**
+ * @return Retrieves the postags of a {@link LemmaSample}.
+ */
public String[] getTags() {
return tags.toArray(new String[tags.size()]);
}
+ /**
+ * @return Retrieves the lemmas of a {@link LemmaSample}.
+ */
public String[] getLemmas() {
return lemmas.toArray(new String[lemmas.size()]);
}
@@ -85,9 +91,8 @@ public class LemmaSample implements Sample {
if (tokensSize != tagsSize || tagsSize != lemmasSize) {
throw new IllegalArgumentException(
"All arrays must have the same length: " +
- "sentenceSize: " + tokensSize +
- ", tagsSize: " + tagsSize +
- ", predsSize: " + lemmasSize + "!");
+ "sentenceSize: " + tokensSize + ", tagsSize: " + tagsSize +
+ ", predsSize: " + lemmasSize + "!");
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
index a8d71e87..d16e5aa9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
@@ -27,22 +27,26 @@ import opennlp.tools.util.AbstractEventStream;
import opennlp.tools.util.ObjectStream;
/**
- * Class for creating an event stream out of data files for training a probabilistic lemmatizer.
+ * Class for creating an event stream out of data files for training a probabilistic {@link Lemmatizer}.
*/
public class LemmaSampleEventStream extends AbstractEventStream<LemmaSample> {
- private LemmatizerContextGenerator contextGenerator;
+ private final LemmatizerContextGenerator contextGenerator;
/**
- * Creates a new event stream based on the specified data stream using the specified context generator.
+ * Creates a new event stream based on the specified data stream using a
+ * {@link LemmatizerContextGenerator}.
+ *
* @param d The data stream for this event stream.
- * @param cg The context generator which should be used in the creation of events for this event stream.
+ * @param cg The {@link LemmatizerContextGenerator} which should be used in the
+ * creation of events for this event stream {@code d}.
*/
public LemmaSampleEventStream(ObjectStream<LemmaSample> d, LemmatizerContextGenerator cg) {
super(d);
this.contextGenerator = cg;
}
+ @Override
protected Iterator<Event> createEvents(LemmaSample sample) {
if (sample != null) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
index 48c5e3e5..a086a9e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
@@ -25,6 +25,9 @@ import opennlp.tools.ml.model.Sequence;
import opennlp.tools.ml.model.SequenceStream;
import opennlp.tools.util.ObjectStream;
+/**
+ * A {@link SequenceStream} implementation encapsulating {@link LemmaSample samples}.
+ */
public class LemmaSampleSequenceStream implements SequenceStream<LemmaSample> {
private final ObjectStream<LemmaSample> samples;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
index 9c661a52..b09b05c4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
@@ -26,16 +26,23 @@ import opennlp.tools.util.ObjectStream;
/**
- * Reads data for training and testing the lemmatizer. The format consists of:
- * word\tpostag\tlemma.
- * @version 2016-02-16
+ * Reads data for training and testing the {@link Lemmatizer}.
+ * <p>
+ * The format consists of:
+ * {@code word\tpostag\tlemma}.
*/
public class LemmaSampleStream extends FilterObjectStream<String, LemmaSample> {
+ /**
+ * Initializes a {@link LemmaSampleStream instance}.
+ *
+ * @param samples A plain text {@link ObjectStream line stream}.
+ */
public LemmaSampleStream(ObjectStream<String> samples) {
super(samples);
}
+ @Override
public LemmaSample read() throws IOException {
List<String> toks = new ArrayList<>();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index 933eec10..4b6e9910 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -20,27 +20,26 @@ package opennlp.tools.lemmatizer;
import java.util.List;
/**
- * The interface for lemmatizers.
+ * The common interface for lemmatizers.
*/
public interface Lemmatizer {
/**
- * Generates lemmas for the word and postag returning the result in an array.
+ * Generates lemmas for the word and postag.
*
- * @param toks an array of the tokens
+ * @param toks An array of the tokens
* @param tags an array of the pos tags
*
- * @return an array of possible lemmas for each token in the sequence.
+ * @return An array of possible lemmas for each token in the {@code toks} sequence.
*/
String[] lemmatize(String[] toks, String[] tags);
/**
- * Generates a lemma tags for the word and postag returning the result in a list
- * of every possible lemma for each token and postag.
+ * Generates lemma tags for the word and postag.
*
- * @param toks an array of the tokens
- * @param tags an array of the pos tags
- * @return a list of every possible lemma for each token in the sequence.
+ * @param toks An array of the tokens
+ * @param tags An array of the pos tags
+ * @return A list of every possible lemma for each token in the {@code toks} sequence.
*/
List<List<String>> lemmatize(List<String> toks, List<String> tags);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
index 1b6fc0b1..8654f35f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
@@ -20,18 +20,20 @@ package opennlp.tools.lemmatizer;
import opennlp.tools.util.BeamSearchContextGenerator;
/**
- * Interface for the context generator used for probabilistic lemmatizer.
+ * Interface for the context generator used for probabilistic {@link Lemmatizer}.
*/
public interface LemmatizerContextGenerator extends BeamSearchContextGenerator<String> {
/**
* Returns the contexts for lemmatizing of the specified index.
- * @param i The index of the token in the specified toks array for which the context should be constructed.
- * @param toks The tokens of the sentence. The <code>toString</code> methods of
+ *
+ * @param i The index of the token in the specified {@code toks} array for which
+ * the context should be constructed.
+ * @param toks The tokens of the sentence. The {@code toString()} methods of
* these objects should return the token text.
- * @param tags The POS tags for the the specified tokens.
+ * @param tags The POS tags for the specified {@code toks}.
* @param lemmas The previous decisions made in the tagging of this sequence.
- * Only indices less than i will be examined.
+ * Only indices less than {@code i} will be examined.
* @return An array of predictive contexts on which a model basis its decisions.
*/
String[] getContext(int i, String[] toks, String[] tags, String[] lemmas);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
index 4a64a378..0a8acf48 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
@@ -23,19 +23,19 @@ import opennlp.tools.util.eval.Mean;
/**
* The {@link LemmatizerEvaluator} measures the performance of
* the given {@link Lemmatizer} with the provided reference
- * {@link LemmaSample}s.
+ * {@link LemmaSample samples}.
*/
public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
- private Lemmatizer lemmatizer;
+ private final Lemmatizer lemmatizer;
- private Mean wordAccuracy = new Mean();
+ private final Mean wordAccuracy = new Mean();
/**
- * Initializes the current instance.
+ * Initializes a {@link LemmatizerEvaluator} instance with the given {@link Lemmatizer}.
*
- * @param aLemmatizer a lemmatizer
- * @param listeners an array of evaluation listeners
+ * @param aLemmatizer The {@link Lemmatizer} to evaluate.
+ * @param listeners The {@link LemmatizerEvaluationMonitor evaluation listeners}.
*/
public LemmatizerEvaluator(Lemmatizer aLemmatizer, LemmatizerEvaluationMonitor ... listeners) {
super(listeners);
@@ -44,14 +44,14 @@ public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
/**
* Evaluates the given reference {@link LemmaSample} object.
- *
+ * <p>
* This is done by tagging the sentence from the reference
* {@link LemmaSample} with the {@link Lemmatizer}. The
* tags are then used to update the word accuracy score.
*
* @param reference the reference {@link LemmaSample}.
*
- * @return the predicted {@link LemmaSample}.
+ * @return The predicted {@link LemmaSample}.
*/
@Override
protected LemmaSample processSample(LemmaSample reference) {
@@ -71,29 +71,24 @@ public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
}
/**
- * Retrieves the word accuracy.
- *
- * This is defined as:
- * word accuracy = correctly detected tags / total words
+ * Accuracy is defined as:
+ * {@code word accuracy = correctly detected tags / total words}
*
- * @return the word accuracy
+ * @return Retrieves the word accuracy.
*/
public double getWordAccuracy() {
return wordAccuracy.mean();
}
/**
- * Retrieves the total number of words considered
- * in the evaluation.
- *
- * @return the word count
+ * @return Retrieves the total number of words considered in the evaluation.
*/
public long getWordCount() {
return wordAccuracy.count();
}
/**
- * Represents this objects as human readable {@link String}.
+ * Returns this object's human-readable {@link String} representation.
*/
@Override
public String toString() {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
index 1d804a85..3a924a63 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
@@ -55,9 +55,7 @@ public class LemmatizerFactory extends BaseToolFactory {
return ExtensionLoader.instantiateExtension(LemmatizerFactory.class, subclassName);
} catch (Exception e) {
String msg = "Could not instantiate the " + subclassName
- + ". The initialization throw an exception.";
- System.err.println(msg);
- e.printStackTrace();
+ + ". The initialization threw an exception.";
throw new InvalidFormatException(msg, e);
}
}
@@ -67,10 +65,16 @@ public class LemmatizerFactory extends BaseToolFactory {
// no additional artifacts
}
+ /**
+ * @return Retrieves a new {@link SequenceValidator} instance.
+ */
public SequenceValidator<String> getSequenceValidator() {
return new DefaultLemmatizerSequenceValidator();
}
+ /**
+ * @return Retrieves a new {@link LemmatizerContextGenerator} instance.
+ */
public LemmatizerContextGenerator getContextGenerator() {
return new DefaultLemmatizerContextGenerator();
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
index 5b59b874..4a19c516 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
@@ -40,11 +40,16 @@ import opennlp.tools.util.StringUtil;
import opennlp.tools.util.TrainingParameters;
/**
- * A probabilistic {@link Lemmatizer}. Tries to predict the induced permutation class
- * for each word depending on its surrounding context. Based on
- * Grzegorz Chrupała. 2008. Towards a Machine-Learning Architecture
- * for Lexical Functional Grammar Parsing. PhD dissertation, Dublin City University.
- * http://grzegorz.chrupala.me/papers/phd-single.pdf
+ * A probabilistic {@link Lemmatizer} implementation.
+ * <p>
+ * Tries to predict the induced permutation class for each word depending on
+ * its surrounding context.
+ * <p>
+ * Based on Grzegorz Chrupała. 2008.
+ * <a href="http://grzegorz.chrupala.me/papers/phd-single.pdf">
+ * Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing.
+ * </a> PhD dissertation, Dublin City University
+ *
*/
public class LemmatizerME implements Lemmatizer {
@@ -59,10 +64,11 @@ public class LemmatizerME implements Lemmatizer {
private final SequenceValidator<String> sequenceValidator;
/**
- * Initializes the current instance with the provided model
- * and the default beam size of 3.
+ * Initializes a {@link LemmatizerME} with the provided
+ * {@link LemmatizerModel model} and a default
+ * {@code beam size} of {@code 3}.
*
- * @param model the model
+ * @param model The {@link LemmatizerModel} to be used.
*/
public LemmatizerME(LemmatizerModel model) {
@@ -87,12 +93,14 @@ public class LemmatizerME implements Lemmatizer {
}
}
+ @Override
public String[] lemmatize(String[] toks, String[] tags) {
String[] ses = predictSES(toks, tags);
return decodeLemmas(toks, ses);
}
- @Override public List<List<String>> lemmatize(List<String> toks,
+ @Override
+ public List<List<String>> lemmatize(List<String> toks,
List<String> tags) {
String[] tokens = toks.toArray(new String[toks.size()]);
String[] posTags = tags.toArray(new String[tags.size()]);
@@ -106,9 +114,11 @@ public class LemmatizerME implements Lemmatizer {
/**
* Predict Short Edit Script (automatically induced lemma class).
- * @param toks the array of tokens
- * @param tags the array of pos tags
- * @return an array containing the lemma classes
+ *
+ * @param toks An array of tokens.
+ * @param tags An array of postags.
+ *
+ * @return An array of possible lemma classes for each token in {@code toks}.
*/
public String[] predictSES(String[] toks, String[] tags) {
bestSequence = model.bestSequence(toks, new Object[] {tags}, contextGenerator, sequenceValidator);
@@ -118,10 +128,12 @@ public class LemmatizerME implements Lemmatizer {
/**
* Predict all possible lemmas (using a default upper bound).
- * @param numLemmas the default number of lemmas
- * @param toks the tokens
- * @param tags the postags
- * @return a double array containing all posible lemmas for each token and postag pair
+ *
+ * @param numLemmas The default number of lemmas
+ * @param toks An array of tokens.
+ * @param tags An array of postags.
+ *
+ * @return A 2-dimensional array containing all possible lemmas for each token and postag pair.
*/
public String[][] predictLemmas(int numLemmas, String[] toks, String[] tags) {
Sequence[] bestSequences = model.bestSequences(numLemmas, toks, new Object[] {tags},
@@ -137,9 +149,11 @@ public class LemmatizerME implements Lemmatizer {
/**
* Decodes the lemma from the word and the induced lemma class.
- * @param toks the array of tokens
- * @param preds the predicted lemma classes
- * @return the array of decoded lemmas
+ *
+ * @param toks An array of tokens.
+ * @param preds An array of predicted lemma classes.
+ *
+ * @return The array of decoded lemmas.
*/
public static String[] decodeLemmas(String[] toks, String[] preds) {
List<String> lemmas = new ArrayList<>();
@@ -153,6 +167,14 @@ public class LemmatizerME implements Lemmatizer {
return lemmas.toArray(new String[lemmas.size()]);
}
+ /**
+ * Encodes the word given its lemmas.
+ *
+ * @param toks An array of tokens.
+ * @param lemmas An array of lemmas.
+ *
+ * @return The array of lemma classes.
+ */
public static String[] encodeLemmas(String[] toks, String[] lemmas) {
List<String> sesList = new ArrayList<>();
for (int i = 0; i < toks.length; i++) {
@@ -165,21 +187,36 @@ public class LemmatizerME implements Lemmatizer {
return sesList.toArray(new String[sesList.size()]);
}
+ /**
+ * @param sentence An array of tokens.
+ * @param tags An array of postags.
+ *
+ * @return Retrieves the top-k {@link Sequence sequences}.
+ */
public Sequence[] topKSequences(String[] sentence, String[] tags) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, contextGenerator, sequenceValidator);
}
+ /**
+ * @param sentence An array of tokens.
+ * @param tags An array of postags.
+ * @param minSequenceScore The minimum score to be achieved.
+ *
+ * @return Retrieves the top-k {@link Sequence sequences}.
+ */
public Sequence[] topKSequences(String[] sentence, String[] tags, double minSequenceScore) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, minSequenceScore,
contextGenerator, sequenceValidator);
}
/**
- * Populates the specified array with the probabilities of the last decoded sequence. The
- * sequence was determined based on the previous call to <code>lemmatize</code>. The
- * specified array should be at least as large as the number of tokens in the
- * previous call to <code>lemmatize</code>.
+ * Populates the specified array with the probabilities of the last decoded sequence.
+ * The sequence was determined based on the previous call to
+ * {@link #lemmatize(String[], String[])}.
+ * <p>
+ * The specified array should be at least as large as the number of tokens in the
+ * previous call to {@link #lemmatize(String[], String[])}.
*
* @param probs An array used to hold the probabilities of the last decoded sequence.
*/
@@ -188,49 +225,57 @@ public class LemmatizerME implements Lemmatizer {
}
/**
- * Returns an array with the probabilities of the last decoded sequence. The
- * sequence was determined based on the previous call to <code>chunk</code>.
- * @return An array with the same number of probabilities as tokens were sent to <code>chunk</code>
- * when it was last called.
+ * Returns an array with the probabilities of the last decoded sequence.
+ * The sequence was determined based on the previous call to
+ * {@link #lemmatize(String[], String[])}.
+ *
+ * @return An array with the same number of probabilities as tokens were sent to
+ * {@link #lemmatize(String[], String[])} when it was last called.
*/
public double[] probs() {
return bestSequence.getProbs();
}
- public static LemmatizerModel train(String languageCode,
- ObjectStream<LemmaSample> samples, TrainingParameters trainParams,
- LemmatizerFactory posFactory) throws IOException {
-
- int beamSize = trainParams.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER,
- LemmatizerME.DEFAULT_BEAM_SIZE);
-
- LemmatizerContextGenerator contextGenerator = posFactory.getContextGenerator();
+ /**
+ * Starts a training of a {@link LemmatizerModel} with the given parameters.
+ *
+ * @param languageCode The ISO conform language code.
+ * @param samples The {@link ObjectStream} of {@link LemmaSample} used as input for training.
+ * @param params The {@link TrainingParameters} for the context of the training.
+ * @param factory The {@link LemmatizerFactory} for creating related objects defined
+ * via {@code params}.
+ *
+ * @return A valid, trained {@link LemmatizerModel} instance.
+ * @throws IOException Thrown if IO errors occurred.
+ */
+ public static LemmatizerModel train(String languageCode, ObjectStream<LemmaSample> samples,
+ TrainingParameters params, LemmatizerFactory factory)
+ throws IOException {
+ LemmatizerContextGenerator contextGenerator = factory.getContextGenerator();
Map<String, String> manifestInfoEntries = new HashMap<>();
-
- TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
+ TrainerType trainerType = TrainerFactory.getTrainerType(params);
MaxentModel lemmatizerModel = null;
SequenceClassificationModel<String> seqLemmatizerModel = null;
if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
ObjectStream<Event> es = new LemmaSampleEventStream(samples, contextGenerator);
- EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams,
+ EventTrainer trainer = TrainerFactory.getEventTrainer(params,
manifestInfoEntries);
lemmatizerModel = trainer.train(es);
}
else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator);
EventModelSequenceTrainer trainer =
- TrainerFactory.getEventModelSequenceTrainer(trainParams, manifestInfoEntries);
+ TrainerFactory.getEventModelSequenceTrainer(params, manifestInfoEntries);
lemmatizerModel = trainer.train(ss);
}
else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
SequenceTrainer<LemmaSample> trainer = TrainerFactory.getSequenceModelTrainer(
- trainParams, manifestInfoEntries);
+ params, manifestInfoEntries);
// TODO: This will probably cause issue, since the feature generator uses the outcomes array
-
LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator);
seqLemmatizerModel = trainer.train(ss);
}
@@ -238,19 +283,34 @@ public class LemmatizerME implements Lemmatizer {
throw new IllegalArgumentException("Trainer type is not supported: " + trainerType);
}
+ int beamSize = params.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER,
+ LemmatizerME.DEFAULT_BEAM_SIZE);
if (lemmatizerModel != null) {
- return new LemmatizerModel(languageCode, lemmatizerModel, beamSize, manifestInfoEntries, posFactory);
+ return new LemmatizerModel(languageCode, lemmatizerModel, beamSize, manifestInfoEntries, factory);
}
else {
- return new LemmatizerModel(languageCode, seqLemmatizerModel, manifestInfoEntries, posFactory);
+ return new LemmatizerModel(languageCode, seqLemmatizerModel, manifestInfoEntries, factory);
}
}
+ /**
+ * @param sentence An array of tokens.
+ * @param tags An array of postags.
+ *
+ * @return Retrieves the top-k {@link Sequence lemma classes}.
+ */
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
new Object[] { tags }, contextGenerator, sequenceValidator);
}
+ /**
+ * @param sentence An array of tokens.
+ * @param tags An array of postags.
+ * @param minSequenceScore The minimum score to be achieved.
+ *
+ * @return Retrieves the top-k {@link Sequence lemma classes}.
+ */
public Sequence[] topKLemmaClasses(String[] sentence, String[] tags, double minSequenceScore) {
return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, minSequenceScore,
contextGenerator, sequenceValidator);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
index a4f7c556..3fa2aac3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;
import java.util.Properties;
@@ -34,8 +35,7 @@ import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.BaseModel;
/**
- * The {@link LemmatizerModel} is the model used
- * by a learnable {@link Lemmatizer}.
+ * The {@link LemmatizerModel} is the model used by a learnable {@link Lemmatizer}.
*
* @see LemmatizerME
*/
@@ -45,6 +45,14 @@ public class LemmatizerModel extends BaseModel {
private static final String COMPONENT_NAME = "StatisticalLemmatizer";
private static final String LEMMATIZER_MODEL_ENTRY_NAME = "lemmatizer.model";
+ /**
+ * Initializes a {@link LemmatizerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param lemmatizerModel A valid {@link SequenceClassificationModel}.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link LemmatizerFactory} for creating related objects.
+ */
public LemmatizerModel(String languageCode, SequenceClassificationModel<String> lemmatizerModel,
Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -52,11 +60,28 @@ public class LemmatizerModel extends BaseModel {
checkArtifactMap();
}
+ /**
+ * Initializes a {@link LemmatizerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param lemmatizerModel A valid {@link MaxentModel}.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link LemmatizerFactory} for creating related objects.
+ */
public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel,
Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
this(languageCode, lemmatizerModel, LemmatizerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory);
}
+ /**
+ * Initializes a {@link LemmatizerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param lemmatizerModel A valid {@link MaxentModel}.
+ * @param beamSize The size of the beam that should be used when decoding sequences.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link LemmatizerFactory} for creating related objects.
+ */
public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, int beamSize,
Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -67,23 +92,58 @@ public class LemmatizerModel extends BaseModel {
checkArtifactMap();
}
+ /**
+ * Initializes a {@link LemmatizerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param lemmatizerModel A valid {@link MaxentModel}.
+ * @param factory The {@link LemmatizerFactory} for creating related objects.
+ */
public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, LemmatizerFactory factory) {
this(languageCode, lemmatizerModel, null, factory);
}
- public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException {
+ /**
+ * Initializes a {@link LemmatizerModel} instance via a valid {@link InputStream}.
+ *
+ * @param in The {@link InputStream} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public LemmatizerModel(InputStream in) throws IOException {
super(COMPONENT_NAME, in);
}
- public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException {
+ /**
+ * Initializes a {@link LemmatizerModel} instance via a valid {@link File}.
+ *
+ * @param modelFile The {@link File} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public LemmatizerModel(File modelFile) throws IOException {
super(COMPONENT_NAME, modelFile);
}
- public LemmatizerModel(Path modelPath) throws IOException, InvalidFormatException {
- this(modelPath.toFile());
+ /**
+ * Initializes a {@link LemmatizerModel} instance via a valid {@link Path}.
+ *
+ * @param modelPath The {@link Path} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public LemmatizerModel(Path modelPath) throws IOException {
+ super(COMPONENT_NAME, Files.newInputStream(modelPath));
}
- public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException {
+ /**
+ * Initializes a {@link LemmatizerModel} instance via a valid {@link URL}.
+ *
+ * @param modelURL The {@link URL} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public LemmatizerModel(URL modelURL) throws IOException {
super(COMPONENT_NAME, modelURL);
}
@@ -96,6 +156,9 @@ public class LemmatizerModel extends BaseModel {
}
}
+ /**
+ * @return Retrieves a {@link SequenceClassificationModel} instance.
+ */
public SequenceClassificationModel<String> getLemmatizerSequenceModel() {
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
@@ -123,7 +186,9 @@ public class LemmatizerModel extends BaseModel {
return LemmatizerFactory.class;
}
-
+ /**
+ * @return Retrieves the active {@link LemmatizerFactory}.
+ */
public LemmatizerFactory getFactory() {
return (LemmatizerFactory) this.toolFactory;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
index 74d21487..f8f0cd8e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
@@ -16,6 +16,6 @@
*/
/**
- * Package related with the lemmatizer tool
+ * Package related to the lemmatizer functionality.
*/
package opennlp.tools.lemmatizer;
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
index dcfc883f..02a84dfd 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
@@ -22,17 +22,18 @@ import java.util.Arrays;
import java.util.List;
/**
- * This dummy lemmatizer implementation simulates a LemmatizerME. The file has
- * samples of sentences, with target and predicted values.
+ * This dummy lemmatizer implementation simulates a {@link LemmatizerME}.
+ * The file has samples of sentences, with target and predicted values.
*/
public class DummyLemmatizer implements Lemmatizer {
- private DummyLemmaSampleStream mSampleStream;
+ private final DummyLemmaSampleStream mSampleStream;
public DummyLemmatizer(DummyLemmaSampleStream aSampleStream) {
mSampleStream = aSampleStream;
}
+ @Override
public String[] lemmatize(String[] toks, String[] tags) {
try {
LemmaSample predsSample = mSampleStream.read();