You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/11 14:14:46 UTC

[opennlp] branch master updated: OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new 453aee5e OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)
453aee5e is described below

commit 453aee5e31e5e367fd07daf363ca1488543fb4a1
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Sun Dec 11 15:14:41 2022 +0100

    OPENNLP-1409 Enhance JavaDoc in opennlp.tools.lemmatizer package (#455)
    
    - adds missing JavaDoc
    - improves existing documentation for clarity
    - removes superfluous text
    - adds 'final' modifier where useful and applicable
    - adds 'Override' annotation where useful and applicable
    - fixes several typos
---
 .../lemmatizer/MorfologikLemmatizer.java           |  29 ++--
 .../DefaultLemmatizerContextGenerator.java         |  19 +--
 .../DefaultLemmatizerSequenceValidator.java        |   4 +
 .../tools/lemmatizer/DictionaryLemmatizer.java     | 138 +++++++++++++------
 .../java/opennlp/tools/lemmatizer/LemmaSample.java |  41 +++---
 .../tools/lemmatizer/LemmaSampleEventStream.java   |  12 +-
 .../lemmatizer/LemmaSampleSequenceStream.java      |   3 +
 .../tools/lemmatizer/LemmaSampleStream.java        |  13 +-
 .../java/opennlp/tools/lemmatizer/Lemmatizer.java  |  17 ++-
 .../lemmatizer/LemmatizerContextGenerator.java     |  12 +-
 .../tools/lemmatizer/LemmatizerEvaluator.java      |  31 ++---
 .../tools/lemmatizer/LemmatizerFactory.java        |  10 +-
 .../opennlp/tools/lemmatizer/LemmatizerME.java     | 146 +++++++++++++++------
 .../opennlp/tools/lemmatizer/LemmatizerModel.java  |  81 ++++++++++--
 .../opennlp/tools/lemmatizer/package-info.java     |   2 +-
 .../opennlp/tools/lemmatizer/DummyLemmatizer.java  |   7 +-
 16 files changed, 390 insertions(+), 175 deletions(-)

diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
index f9597a9c..a9ce6b26 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/lemmatizer/MorfologikLemmatizer.java
@@ -36,12 +36,26 @@ public class MorfologikLemmatizer implements Lemmatizer {
 
   private final Dictionary dictionary;
 
-  public MorfologikLemmatizer(Path dictionaryPath) throws IllegalArgumentException,
-      IOException {
+  /**
+   * Initializes a {@link MorfologikLemmatizer} and related {@link Dictionary}
+   * from the input tab separated dictionary.
+   *
+   * @param dictionaryPath The dictionary referenced via a valid, readable {@link Path}.
+   *
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryPath}.
+   */
+  public MorfologikLemmatizer(Path dictionaryPath) throws IOException {
     this(Dictionary.read(dictionaryPath));
   }
 
-  public MorfologikLemmatizer(Dictionary dictionary) throws IllegalArgumentException {
+  /**
+   * Initializes a {@link MorfologikLemmatizer} and related {@link Dictionary}
+   * from the input tab separated dictionary.
+   *
+   * @param dictionary The {@link Dictionary} to be used.
+   */
+  public MorfologikLemmatizer(Dictionary dictionary) {
     this.dictionary = dictionary;
   }
 
@@ -77,14 +91,7 @@ public class MorfologikLemmatizer implements Lemmatizer {
     return lemmas;
   }
 
-
-  /**
-   * Generates a lemma tags for the word and postag returning the result in list of possible lemmas.
-   *
-   * @param toks an array of the tokens
-   * @param tags an array of the pos tags
-   * @return an list of possible lemmas for each token in the sequence.
-   */
+  @Override
   public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
     List<List<String>> lemmas = new ArrayList<>();
     for (int i = 0; i < toks.size(); i++) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
index 7da7107b..0e720f7d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerContextGenerator.java
@@ -23,18 +23,19 @@ import java.util.regex.Pattern;
 
 /**
  * Simple feature generator for learning statistical lemmatizers.
- * Features based on Grzegorz Chrupała. 2008. Towards a Machine-Learning
- * Architecture for Lexical Functional Grammar Parsing. PhD dissertation,
- * Dublin City University
- * @version 2016-02-15
+ * <p>
+ * Features based on Grzegorz Chrupała. 2008.
+ * <a href="http://grzegorz.chrupala.me/papers/phd-single.pdf">
+ * Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing.
+ * </a> PhD dissertation, Dublin City University
  */
 public class DefaultLemmatizerContextGenerator implements LemmatizerContextGenerator {
 
   private static final int PREFIX_LENGTH = 5;
   private static final int SUFFIX_LENGTH = 7;
 
-  private static Pattern hasCap = Pattern.compile("[A-Z]");
-  private static Pattern hasNum = Pattern.compile("[0-9]");
+  private static final Pattern PATTERN_HAS_CAP = Pattern.compile("[A-Z]");
+  private static final Pattern PATTERN_HAS_NUM = Pattern.compile("[0-9]");
 
   public DefaultLemmatizerContextGenerator() {
   }
@@ -55,11 +56,13 @@ public class DefaultLemmatizerContextGenerator implements LemmatizerContextGener
     return suffs;
   }
 
+  @Override
   public String[] getContext(int index, String[] sequence, String[] priorDecisions,
       Object[] additionalContext) {
     return getContext(index, sequence, (String[]) additionalContext[0], priorDecisions);
   }
 
+  @Override
   public String[] getContext(int index, String[] toks, String[] tags, String[] preds) {
     // Word
     String w0;
@@ -102,11 +105,11 @@ public class DefaultLemmatizerContextGenerator implements LemmatizerContextGener
       features.add("h");
     }
 
-    if (hasCap.matcher(lex).find()) {
+    if (PATTERN_HAS_CAP.matcher(lex).find()) {
       features.add("c");
     }
 
-    if (hasNum.matcher(lex).find()) {
+    if (PATTERN_HAS_NUM.matcher(lex).find()) {
       features.add("d");
     }
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
index 8a697ebd..9f843126 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DefaultLemmatizerSequenceValidator.java
@@ -19,9 +19,13 @@ package opennlp.tools.lemmatizer;
 
 import opennlp.tools.util.SequenceValidator;
 
+/**
+ * The default lemmatizer {@link SequenceValidator} implementation.
+ */
 public class DefaultLemmatizerSequenceValidator implements SequenceValidator<String> {
 
   //TODO implement this
+  @Override
   public boolean validSequence(int i, String[] sequence, String[] s, String outcome) {
     return true;
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
index e4936085..1416643d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/DictionaryLemmatizer.java
@@ -25,6 +25,7 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -33,47 +34,106 @@ import java.util.List;
 import java.util.Map;
 
 /**
- * Lemmatize by simple dictionary lookup into a hashmap built from a file
- * containing, for each line, word\tabpostag\tablemma.
- * @version 2014-07-08
+ * A {@link Lemmatizer} implementation that works by simple dictionary lookup into
+ * a {@link Map} built from a file containing, for each line:
+ * <p>
+ * {@code word\tabpostag\tablemma}.
  */
 public class DictionaryLemmatizer implements Lemmatizer {
 
-  /**
+  /*
    * The hashmap containing the dictionary.
    */
   private final Map<List<String>, List<String>> dictMap = new HashMap<>();
 
   /**
-   * Construct a hashmap from the input tab separated dictionary.
+   * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+   * from the input tab separated dictionary.
+   * <p>
+   * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+   * Alternatively, if multiple lemmas are possible for each word-postag pair,
+   * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
    *
-   * The input file should have, for each line, word\tabpostag\tablemma.
-   * Alternatively, if multiple lemmas are possible for each word,postag pair,
-   * then the format should be word\tab\postag\tablemma01#lemma02#lemma03
+   * @param dictionaryStream The dictionary referenced by an open {@link InputStream}.
+   * @param charset The {@link Charset character encoding} of the dictionary.
    *
-   * @param dictionary the input dictionary via inputstream
-   * @param charset the encoding of the inputstream
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryStream}.
    */
-  public DictionaryLemmatizer(final InputStream dictionary, Charset charset) throws IOException {
-    init(dictionary, charset);
+  public DictionaryLemmatizer(final InputStream dictionaryStream, Charset charset)
+          throws IOException {
+    init(dictionaryStream, charset);
   }
 
-  public DictionaryLemmatizer(final InputStream dictionary) throws IOException {
-    this(dictionary, StandardCharsets.UTF_8);
+  /**
+   * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+   * from the input tab separated dictionary.
+   * <p>
+   * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+   * Alternatively, if multiple lemmas are possible for each word-postag pair,
+   * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+   *
+   * @param dictionaryStream The dictionary referenced by an open {@link InputStream}.
+   *
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryStream}.
+   */
+  public DictionaryLemmatizer(final InputStream dictionaryStream) throws IOException {
+    this(dictionaryStream, StandardCharsets.UTF_8);
   }
 
+  /**
+   * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+   * from the input tab separated dictionary.
+   * <p>
+   * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+   * Alternatively, if multiple lemmas are possible for each word-postag pair,
+   * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+   *
+   * @param dictionaryFile The dictionary referenced by a valid, readable {@link File}.
+   *
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryFile}.
+   */
   public DictionaryLemmatizer(File dictionaryFile) throws IOException {
     this(dictionaryFile, StandardCharsets.UTF_8);
   }
 
+  /**
+   * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+   * from the input tab separated dictionary.
+   * <p>
+   * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+   * Alternatively, if multiple lemmas are possible for each word-postag pair,
+   * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+   *
+   * @param dictionaryFile The dictionary referenced by a valid, readable {@link File}.
+   * @param charset The {@link Charset character encoding} of the dictionary.
+   *
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryFile}.
+   */
   public DictionaryLemmatizer(File dictionaryFile, Charset charset) throws IOException {
     try (InputStream in = new FileInputStream(dictionaryFile)) {
       init(in, charset);
     }
   }
 
-  public DictionaryLemmatizer(Path dictionaryFile) throws IOException {
-    this(dictionaryFile.toFile());
+  /**
+   * Initializes a {@link DictionaryLemmatizer} and related {@link HashMap}
+   * from the input tab separated dictionary.
+   * <p>
+   * The input file should have, for each line, {@code word\tabpostag\tablemma}.
+   * Alternatively, if multiple lemmas are possible for each word-postag pair,
+   * then the format should be {@code word\tab\postag\tablemma01#lemma02#lemma03}.
+   *
+   * @param dictionaryPath The dictionary referenced via a valid, readable {@link Path}.
+   *
+   * @throws IOException Thrown if IO errors occurred while reading in from
+   *                     {@code dictionaryPath}.
+   */
+  public DictionaryLemmatizer(Path dictionaryPath) throws IOException {
+    init(Files.newInputStream(dictionaryPath), StandardCharsets.UTF_8);
   }
 
   private void init(InputStream dictionary, Charset charset) throws IOException {
@@ -87,29 +147,24 @@ public class DictionaryLemmatizer implements Lemmatizer {
     }
   }
   /**
-   * Get the Map containing the dictionary.
-   *
-   * @return dictMap the Map
+   * @return Retrieves the {@link Map} containing the dictionary.
    */
   public Map<List<String>, List<String>> getDictMap() {
     return this.dictMap;
   }
 
   /**
-   * Get the dictionary keys (word and postag).
-   *
-   * @param word
-   *          the surface form word
-   * @param postag
-   *          the assigned postag
-   * @return returns the dictionary keys
+   * @param word The surface form word.
+   * @param postag The assigned postag.
+   *               
+   * @return Retrieves the dictionary keys (word and postag).
    */
   private List<String> getDictKeys(final String word, final String postag) {
-    final List<String> keys = new ArrayList<>(Arrays.asList(word.toLowerCase(), postag));
-    return keys;
+    return new ArrayList<>(Arrays.asList(word.toLowerCase(), postag));
   }
 
 
+  @Override
   public String[] lemmatize(final String[] tokens, final String[] postags) {
     List<String> lemmas = new ArrayList<>();
     for (int i = 0; i < tokens.length; i++) {
@@ -118,6 +173,7 @@ public class DictionaryLemmatizer implements Lemmatizer {
     return lemmas.toArray(new String[lemmas.size()]);
   }
 
+  @Override
   public List<List<String>> lemmatize(final List<String> tokens, final List<String> posTags) {
     List<List<String>> allLemmas = new ArrayList<>();
     for (int i = 0; i < tokens.size(); i++) {
@@ -127,13 +183,14 @@ public class DictionaryLemmatizer implements Lemmatizer {
   }
 
   /**
-   * Lookup lemma in a dictionary. Outputs "O" if not found.
+   * Lookup lemma in a dictionary. Outputs {@code "0"} if no lemma could be found
+   * for the specified {@code word}.
    *
-   * @param word
-   *          the token
-   * @param postag
-   *          the postag
-   * @return the lemma
+   * @param word The token to look up the lemma for.
+   * @param postag The postag.
+   *
+   * @return The corresponding lemma, or {@code "0"} if no lemma for {@code word}
+   *         could be found.
    */
   private String lemmatize(final String word, final String postag) {
     String lemma;
@@ -149,14 +206,13 @@ public class DictionaryLemmatizer implements Lemmatizer {
   }
 
   /**
-   * Lookup every lemma for a word,pos tag in a dictionary. Outputs "O" if not
-   * found.
+   * Lookup every lemma for a word,pos tag in a dictionary. Outputs {@code "0"} if no
+   * lemmas could be found for the specified {@code word}.
+   *
+   * @param word The token to look up the lemma for.
+   * @param postag The postag.
    *
-   * @param word
-   *          the token
-   * @param postag
-   *          the postag
-   * @return every lemma
+   * @return A list of relevant lemmas.
    */
   private List<String> getAllLemmas(final String word, final String postag) {
     List<String> lemmasList = new ArrayList<>();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
index fea5f3b5..455f145c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSample.java
@@ -26,7 +26,7 @@ import java.util.Objects;
 import opennlp.tools.commons.Sample;
 
 /**
- * Represents an lemmatized sentence.
+ * Represents a lemmatized sentence.
  */
 public class LemmaSample implements Sample {
 
@@ -39,25 +39,22 @@ public class LemmaSample implements Sample {
   private final List<String> lemmas;
 
   /**
-   * Represents one lemma sample.
-   * @param tokens the token
-   * @param tags the postags
-   * @param lemmas the lemmas
+   * Initializes a {@link LemmaSample} instance with the given parameters.
+   *
+   * @param tokens The tokens.
+   * @param tags The postags.
+   * @param lemmas The lemmas for {@code tokens}.
    */
   public LemmaSample(String[] tokens, String[] tags, String[] lemmas) {
-
-    validateArguments(tokens.length, tags.length, lemmas.length);
-
-    this.tokens = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tokens)));
-    this.tags = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(tags)));
-    this.lemmas = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(lemmas)));
+    this(Arrays.asList(tokens), Arrays.asList(tags), Arrays.asList(lemmas));
   }
 
   /**
-   * Lemma Sample constructor.
-   * @param tokens the tokens
-   * @param tags the postags
-   * @param lemmas the lemmas
+   * Initializes a {@link LemmaSample} instance with the given parameters.
+   *
+   * @param tokens The tokens.
+   * @param tags The postags.
+   * @param lemmas The lemmas for {@code tokens}.
    */
   public LemmaSample(List<String> tokens, List<String> tags, List<String> lemmas) {
 
@@ -68,14 +65,23 @@ public class LemmaSample implements Sample {
     this.lemmas = Collections.unmodifiableList(new ArrayList<>(lemmas));
   }
 
+  /**
+   * @return Retrieves the tokens of a {@link LemmaSample}.
+   */
   public String[] getTokens() {
     return tokens.toArray(new String[tokens.size()]);
   }
 
+  /**
+   * @return Retrieves the postags of a {@link LemmaSample}.
+   */
   public String[] getTags() {
     return tags.toArray(new String[tags.size()]);
   }
 
+  /**
+   * @return Retrieves the lemmas of a {@link LemmaSample}.
+   */
   public String[] getLemmas() {
     return lemmas.toArray(new String[lemmas.size()]);
   }
@@ -85,9 +91,8 @@ public class LemmaSample implements Sample {
     if (tokensSize != tagsSize || tagsSize != lemmasSize) {
       throw new IllegalArgumentException(
           "All arrays must have the same length: " +
-              "sentenceSize: " + tokensSize +
-              ", tagsSize: " + tagsSize +
-              ", predsSize: " + lemmasSize + "!");
+              "sentenceSize: " + tokensSize + ", tagsSize: " + tagsSize +
+                  ", predsSize: " + lemmasSize + "!");
     }
   }
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
index a8d71e87..d16e5aa9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleEventStream.java
@@ -27,22 +27,26 @@ import opennlp.tools.util.AbstractEventStream;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * Class for creating an event stream out of data files for training a probabilistic lemmatizer.
+ * Class for creating an event stream out of data files for training a probabilistic {@link Lemmatizer}.
  */
 public class LemmaSampleEventStream extends AbstractEventStream<LemmaSample> {
 
-  private LemmatizerContextGenerator contextGenerator;
+  private final LemmatizerContextGenerator contextGenerator;
 
   /**
-   * Creates a new event stream based on the specified data stream using the specified context generator.
+   * Creates a new event stream based on the specified data stream using a
+   * {@link LemmatizerContextGenerator}.
+   *
    * @param d The data stream for this event stream.
-   * @param cg The context generator which should be used in the creation of events for this event stream.
+   * @param cg The {@link LemmatizerContextGenerator} which should be used in the
+   *           creation of events for this event stream {@code d}.
    */
   public LemmaSampleEventStream(ObjectStream<LemmaSample> d, LemmatizerContextGenerator cg) {
     super(d);
     this.contextGenerator = cg;
   }
 
+  @Override
   protected Iterator<Event> createEvents(LemmaSample sample) {
 
     if (sample != null) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
index 48c5e3e5..a086a9e4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleSequenceStream.java
@@ -25,6 +25,9 @@ import opennlp.tools.ml.model.Sequence;
 import opennlp.tools.ml.model.SequenceStream;
 import opennlp.tools.util.ObjectStream;
 
+/**
+ * A {@link SequenceStream} implementation encapsulating {@link LemmaSample samples}.
+ */
 public class LemmaSampleSequenceStream implements SequenceStream<LemmaSample> {
 
   private final ObjectStream<LemmaSample> samples;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
index 9c661a52..b09b05c4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmaSampleStream.java
@@ -26,16 +26,23 @@ import opennlp.tools.util.ObjectStream;
 
 
 /**
- * Reads data for training and testing the lemmatizer. The format consists of:
- * word\tpostag\tlemma.
- * @version 2016-02-16
+ * Reads data for training and testing the {@link Lemmatizer}.
+ * <p>
+ * The format consists of:
+ * {@code word\tpostag\tlemma}.
  */
 public class LemmaSampleStream extends FilterObjectStream<String, LemmaSample> {
 
+  /**
+   * Initializes a {@link LemmaSampleStream instance}.
+   *
+   * @param samples A plain text {@link ObjectStream line stream}.
+   */
   public LemmaSampleStream(ObjectStream<String> samples) {
     super(samples);
   }
 
+  @Override
   public LemmaSample read() throws IOException {
 
     List<String> toks = new ArrayList<>();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
index 933eec10..4b6e9910 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/Lemmatizer.java
@@ -20,27 +20,26 @@ package opennlp.tools.lemmatizer;
 import java.util.List;
 
 /**
- * The interface for lemmatizers.
+ * The common interface for lemmatizers.
  */
 public interface Lemmatizer {
 
   /**
-   * Generates lemmas for the word and postag returning the result in an array.
+   * Generates lemmas for the word and postag.
    *
-   * @param toks an array of the tokens
+   * @param toks An array of the tokens
    * @param tags an array of the pos tags
    *
-   * @return an array of possible lemmas for each token in the sequence.
+   * @return An array of possible lemmas for each token in the {@code toks} sequence.
    */
   String[] lemmatize(String[] toks, String[] tags);
 
   /**
-   * Generates a lemma tags for the word and postag returning the result in a list
-   * of every possible lemma for each token and postag.
+   * Generates lemma tags for the word and postag.
    *
-   * @param toks an array of the tokens
-   * @param tags an array of the pos tags
-   * @return a list of every possible lemma for each token in the sequence.
+   * @param toks An array of the tokens
+   * @param tags An array of the pos tags
+   * @return A list of every possible lemma for each token in the {@code toks} sequence.
    */
   List<List<String>> lemmatize(List<String> toks, List<String> tags);
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
index 1b6fc0b1..8654f35f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerContextGenerator.java
@@ -20,18 +20,20 @@ package opennlp.tools.lemmatizer;
 import opennlp.tools.util.BeamSearchContextGenerator;
 
 /**
- * Interface for the context generator used for probabilistic lemmatizer.
+ * Interface for the context generator used for probabilistic {@link Lemmatizer}.
  */
 public interface LemmatizerContextGenerator extends BeamSearchContextGenerator<String> {
 
   /**
    * Returns the contexts for lemmatizing of the specified index.
-   * @param i The index of the token in the specified toks array for which the context should be constructed.
-   * @param toks The tokens of the sentence.  The <code>toString</code> methods of
+   * 
+   * @param i The index of the token in the specified {@code toks} array for which
+   *          the context should be constructed.
+   * @param toks The tokens of the sentence. The {@code toString()} methods of
    *             these objects should return the token text.
-   * @param tags The POS tags for the the specified tokens.
+   * @param tags The POS tags for the specified {@code toks}.
    * @param lemmas The previous decisions made in the tagging of this sequence.
-   *               Only indices less than i will be examined.
+   *               Only indices less than {@code i} will be examined.
    * @return An array of predictive contexts on which a model basis its decisions.
    */
   String[] getContext(int i, String[] toks, String[] tags, String[] lemmas);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
index 4a64a378..0a8acf48 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerEvaluator.java
@@ -23,19 +23,19 @@ import opennlp.tools.util.eval.Mean;
 /**
  * The {@link LemmatizerEvaluator} measures the performance of
  * the given {@link Lemmatizer} with the provided reference
- * {@link LemmaSample}s.
+ * {@link LemmaSample samples}.
  */
 public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
 
-  private Lemmatizer lemmatizer;
+  private final Lemmatizer lemmatizer;
 
-  private Mean wordAccuracy = new Mean();
+  private final Mean wordAccuracy = new Mean();
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link LemmatizerEvaluator} instance with the given {@link Lemmatizer}.
    *
-   * @param aLemmatizer a lemmatizer
-   * @param listeners an array of evaluation listeners
+   * @param aLemmatizer The {@link Lemmatizer} to evaluate.
+   * @param listeners The {@link LemmatizerEvaluationMonitor evaluation listeners}.
    */
   public LemmatizerEvaluator(Lemmatizer aLemmatizer, LemmatizerEvaluationMonitor ... listeners) {
     super(listeners);
@@ -44,14 +44,14 @@ public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
 
   /**
    * Evaluates the given reference {@link LemmaSample} object.
-   *
+   * <p>
    * This is done by tagging the sentence from the reference
    * {@link LemmaSample} with the {@link Lemmatizer}. The
    * tags are then used to update the word accuracy score.
    *
    * @param reference the reference {@link LemmaSample}.
    *
-   * @return the predicted {@link LemmaSample}.
+   * @return The predicted {@link LemmaSample}.
    */
   @Override
   protected LemmaSample processSample(LemmaSample reference) {
@@ -71,29 +71,24 @@ public class LemmatizerEvaluator extends Evaluator<LemmaSample> {
   }
 
   /**
-   * Retrieves the word accuracy.
-   *
-   * This is defined as:
-   * word accuracy = correctly detected tags / total words
+   * Accuracy is defined as:
+   * {@code word accuracy = correctly detected tags / total words}
    *
-   * @return the word accuracy
+   * @return Retrieves the word accuracy.
    */
   public double getWordAccuracy() {
     return wordAccuracy.mean();
   }
 
   /**
-   * Retrieves the total number of words considered
-   * in the evaluation.
-   *
-   * @return the word count
+   * @return Retrieves the total number of words considered in the evaluation.
    */
   public long getWordCount() {
     return wordAccuracy.count();
   }
 
   /**
-   * Represents this objects as human readable {@link String}.
+   * Returns this object's human-readable {@link String} representation.
    */
   @Override
   public String toString() {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
index 1d804a85..3a924a63 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerFactory.java
@@ -55,9 +55,7 @@ public class LemmatizerFactory extends BaseToolFactory {
       return ExtensionLoader.instantiateExtension(LemmatizerFactory.class, subclassName);
     } catch (Exception e) {
       String msg = "Could not instantiate the " + subclassName
-          + ". The initialization throw an exception.";
-      System.err.println(msg);
-      e.printStackTrace();
+          + ". The initialization threw an exception.";
       throw new InvalidFormatException(msg, e);
     }
   }
@@ -67,10 +65,16 @@ public class LemmatizerFactory extends BaseToolFactory {
     // no additional artifacts
   }
 
+  /**
+   * @return Retrieves a new {@link SequenceValidator} instance.
+   */
   public SequenceValidator<String> getSequenceValidator() {
     return new DefaultLemmatizerSequenceValidator();
   }
 
+  /**
+   * @return Retrieves a new {@link LemmatizerContextGenerator} instance.
+   */
   public LemmatizerContextGenerator getContextGenerator() {
     return new DefaultLemmatizerContextGenerator();
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
index 5b59b874..4a19c516 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java
@@ -40,11 +40,16 @@ import opennlp.tools.util.StringUtil;
 import opennlp.tools.util.TrainingParameters;
 
 /**
- * A probabilistic {@link Lemmatizer}. Tries to predict the induced permutation class
- * for each word depending on its surrounding context. Based on
- * Grzegorz Chrupała. 2008. Towards a Machine-Learning Architecture
- * for Lexical Functional Grammar Parsing. PhD dissertation, Dublin City University.
- * http://grzegorz.chrupala.me/papers/phd-single.pdf
+ * A probabilistic {@link Lemmatizer} implementation.
+ * <p>
+ * Tries to predict the induced permutation class for each word depending on
+ * its surrounding context.
+ * <p>
+ * Based on Grzegorz Chrupała. 2008.
+ * <a href="http://grzegorz.chrupala.me/papers/phd-single.pdf">
+ * Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing.
+ * </a> PhD dissertation, Dublin City University
+ *
  */
 public class LemmatizerME implements Lemmatizer {
 
@@ -59,10 +64,11 @@ public class LemmatizerME implements Lemmatizer {
   private final SequenceValidator<String> sequenceValidator;
 
   /**
-   * Initializes the current instance with the provided model
-   * and the default beam size of 3.
+   * Initializes a {@link LemmatizerME} with the provided
+   * {@link LemmatizerModel model} and a default
+   * {@code beam size} of {@code 3}.
    *
-   * @param model the model
+   * @param model The {@link LemmatizerModel} to be used.
    */
   public LemmatizerME(LemmatizerModel model) {
 
@@ -87,12 +93,14 @@ public class LemmatizerME implements Lemmatizer {
     }
   }
 
+  @Override
   public String[] lemmatize(String[] toks, String[] tags) {
     String[] ses = predictSES(toks, tags);
     return decodeLemmas(toks, ses);
   }
 
-  @Override public List<List<String>> lemmatize(List<String> toks,
+  @Override
+  public List<List<String>> lemmatize(List<String> toks,
       List<String> tags) {
     String[] tokens = toks.toArray(new String[toks.size()]);
     String[] posTags = tags.toArray(new String[tags.size()]);
@@ -106,9 +114,11 @@ public class LemmatizerME implements Lemmatizer {
 
   /**
    * Predict Short Edit Script (automatically induced lemma class).
-   * @param toks the array of tokens
-   * @param tags the array of pos tags
-   * @return an array containing the lemma classes
+   *
+   * @param toks An array of tokens.
+   * @param tags An array of postags.
+   *             
+   * @return An array of possible lemma classes for each token in {@code toks}.
    */
   public String[] predictSES(String[] toks, String[] tags) {
     bestSequence = model.bestSequence(toks, new Object[] {tags}, contextGenerator, sequenceValidator);
@@ -118,10 +128,12 @@ public class LemmatizerME implements Lemmatizer {
 
   /**
    * Predict all possible lemmas (using a default upper bound).
-   * @param numLemmas the default number of lemmas
-   * @param toks the tokens
-   * @param tags the postags
-   * @return a double array containing all posible lemmas for each token and postag pair
+   * 
+   * @param numLemmas The default number of lemmas
+   * @param toks An array of tokens.
+   * @param tags An array of postags.
+   *             
+   * @return A 2-dimensional array containing all possible lemmas for each token and postag pair.
    */
   public String[][] predictLemmas(int numLemmas, String[] toks, String[] tags) {
     Sequence[] bestSequences = model.bestSequences(numLemmas, toks, new Object[] {tags},
@@ -137,9 +149,11 @@ public class LemmatizerME implements Lemmatizer {
 
   /**
    * Decodes the lemma from the word and the induced lemma class.
-   * @param toks the array of tokens
-   * @param preds the predicted lemma classes
-   * @return the array of decoded lemmas
+   *
+   * @param toks An array of tokens.
+   * @param preds An array of predicted lemma classes.
+   *              
+   * @return The array of decoded lemmas.
    */
   public static String[] decodeLemmas(String[] toks, String[] preds) {
     List<String> lemmas = new ArrayList<>();
@@ -153,6 +167,14 @@ public class LemmatizerME implements Lemmatizer {
     return lemmas.toArray(new String[lemmas.size()]);
   }
 
+  /**
+   * Encodes the word given its lemmas.
+   *
+   * @param toks An array of tokens.
+   * @param lemmas An array of lemmas.
+   *               
+   * @return The array of lemma classes.
+   */
   public static String[] encodeLemmas(String[] toks, String[] lemmas) {
     List<String> sesList = new ArrayList<>();
     for (int i = 0; i < toks.length; i++) {
@@ -165,21 +187,36 @@ public class LemmatizerME implements Lemmatizer {
     return sesList.toArray(new String[sesList.size()]);
   }
 
+  /**
+   * @param sentence An array of tokens.
+   * @param tags An array of postags.
+   *             
+   * @return Retrieves the top-k {@link Sequence sequences}.
+   */
   public Sequence[] topKSequences(String[] sentence, String[] tags) {
     return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
         new Object[] { tags }, contextGenerator, sequenceValidator);
   }
 
+  /**
+   * @param sentence An array of tokens.
+   * @param tags An array of postags.
+   * @param minSequenceScore The minimum score to be achieved.
+   *
+   * @return Retrieves the top-k {@link Sequence sequences}.
+   */
   public Sequence[] topKSequences(String[] sentence, String[] tags, double minSequenceScore) {
     return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, minSequenceScore,
         contextGenerator, sequenceValidator);
   }
 
   /**
-   * Populates the specified array with the probabilities of the last decoded sequence.  The
-   * sequence was determined based on the previous call to <code>lemmatize</code>.  The
-   * specified array should be at least as large as the number of tokens in the
-   * previous call to <code>lemmatize</code>.
+   * Populates the specified array with the probabilities of the last decoded sequence.
+   * The sequence was determined based on the previous call to
+   * {@link #lemmatize(String[], String[])}.
+   * <p>
+   * The specified array should be at least as large as the number of tokens in the
+   * previous call to {@link #lemmatize(String[], String[])}.
    *
    * @param probs An array used to hold the probabilities of the last decoded sequence.
    */
@@ -188,49 +225,57 @@ public class LemmatizerME implements Lemmatizer {
   }
 
   /**
-   * Returns an array with the probabilities of the last decoded sequence.  The
-   * sequence was determined based on the previous call to <code>chunk</code>.
-   * @return An array with the same number of probabilities as tokens were sent to <code>chunk</code>
-   *     when it was last called.
+   * Returns an array with the probabilities of the last decoded sequence.
+   * The sequence was determined based on the previous call to
+   * {@link #lemmatize(String[], String[])}.
+   *
+   * @return An array with the same number of probabilities as tokens were sent to
+   *         {@link #lemmatize(String[], String[])} when it was last called.
    */
   public double[] probs() {
     return bestSequence.getProbs();
   }
 
-  public static LemmatizerModel train(String languageCode,
-      ObjectStream<LemmaSample> samples, TrainingParameters trainParams,
-      LemmatizerFactory posFactory) throws IOException {
-
-    int beamSize = trainParams.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER,
-            LemmatizerME.DEFAULT_BEAM_SIZE);
-
-    LemmatizerContextGenerator contextGenerator = posFactory.getContextGenerator();
+  /**
+   * Starts a training of a {@link LemmatizerModel} with the given parameters.
+   *
+   * @param languageCode The ISO conform language code.
+   * @param samples The {@link ObjectStream} of {@link LemmaSample} used as input for training.
+   * @param params The {@link TrainingParameters} for the context of the training.
+   * @param factory The {@link LemmatizerFactory} for creating related objects defined
+   *                via {@code params}.
+   *
+   * @return A valid, trained {@link LemmatizerModel} instance.
+   * @throws IOException Thrown if IO errors occurred.
+   */
+  public static LemmatizerModel train(String languageCode, ObjectStream<LemmaSample> samples,
+                                      TrainingParameters params, LemmatizerFactory factory)
+          throws IOException {
 
+    LemmatizerContextGenerator contextGenerator = factory.getContextGenerator();
     Map<String, String> manifestInfoEntries = new HashMap<>();
-
-    TrainerType trainerType = TrainerFactory.getTrainerType(trainParams);
+    TrainerType trainerType = TrainerFactory.getTrainerType(params);
 
     MaxentModel lemmatizerModel = null;
     SequenceClassificationModel<String> seqLemmatizerModel = null;
     if (TrainerType.EVENT_MODEL_TRAINER.equals(trainerType)) {
       ObjectStream<Event> es = new LemmaSampleEventStream(samples, contextGenerator);
 
-      EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams,
+      EventTrainer trainer = TrainerFactory.getEventTrainer(params,
           manifestInfoEntries);
       lemmatizerModel = trainer.train(es);
     }
     else if (TrainerType.EVENT_MODEL_SEQUENCE_TRAINER.equals(trainerType)) {
       LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator);
       EventModelSequenceTrainer trainer =
-          TrainerFactory.getEventModelSequenceTrainer(trainParams, manifestInfoEntries);
+          TrainerFactory.getEventModelSequenceTrainer(params, manifestInfoEntries);
       lemmatizerModel = trainer.train(ss);
     }
     else if (TrainerType.SEQUENCE_TRAINER.equals(trainerType)) {
       SequenceTrainer<LemmaSample> trainer = TrainerFactory.getSequenceModelTrainer(
-          trainParams, manifestInfoEntries);
+              params, manifestInfoEntries);
 
       // TODO: This will probably cause issue, since the feature generator uses the outcomes array
-
       LemmaSampleSequenceStream ss = new LemmaSampleSequenceStream(samples, contextGenerator);
       seqLemmatizerModel = trainer.train(ss);
     }
@@ -238,19 +283,34 @@ public class LemmatizerME implements Lemmatizer {
       throw new IllegalArgumentException("Trainer type is not supported: " + trainerType);
     }
 
+    int beamSize = params.getIntParameter(BeamSearch.BEAM_SIZE_PARAMETER,
+            LemmatizerME.DEFAULT_BEAM_SIZE);
     if (lemmatizerModel != null) {
-      return new LemmatizerModel(languageCode, lemmatizerModel, beamSize, manifestInfoEntries, posFactory);
+      return new LemmatizerModel(languageCode, lemmatizerModel, beamSize, manifestInfoEntries, factory);
     }
     else {
-      return new LemmatizerModel(languageCode, seqLemmatizerModel, manifestInfoEntries, posFactory);
+      return new LemmatizerModel(languageCode, seqLemmatizerModel, manifestInfoEntries, factory);
     }
   }
 
+  /**
+   * @param sentence An array of tokens.
+   * @param tags An array of postags.
+   *
+   * @return Retrieves the top-k {@link Sequence lemma classes}.
+   */
   public Sequence[] topKLemmaClasses(String[] sentence, String[] tags) {
     return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
         new Object[] { tags }, contextGenerator, sequenceValidator);
   }
 
+  /**
+   * @param sentence An array of tokens.
+   * @param tags An array of postags.
+   * @param minSequenceScore The minimum score to be achieved.
+   *
+   * @return Retrieves the top-k {@link Sequence lemma classes}.
+   */
   public Sequence[] topKLemmaClasses(String[] sentence, String[] tags, double minSequenceScore) {
     return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { tags }, minSequenceScore,
         contextGenerator, sequenceValidator);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
index a4f7c556..3fa2aac3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerModel.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Map;
 import java.util.Properties;
@@ -34,8 +35,7 @@ import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.BaseModel;
 
 /**
- * The {@link LemmatizerModel} is the model used
- * by a learnable {@link Lemmatizer}.
+ * The {@link LemmatizerModel} is the model used by a learnable {@link Lemmatizer}.
  *
  * @see LemmatizerME
  */
@@ -45,6 +45,14 @@ public class LemmatizerModel extends BaseModel {
   private static final String COMPONENT_NAME = "StatisticalLemmatizer";
   private static final String LEMMATIZER_MODEL_ENTRY_NAME = "lemmatizer.model";
 
+  /**
+   * Initializes a {@link LemmatizerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param lemmatizerModel A valid {@link SequenceClassificationModel}.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link LemmatizerFactory} for creating related objects.
+   */
   public LemmatizerModel(String languageCode, SequenceClassificationModel<String> lemmatizerModel,
       Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -52,11 +60,28 @@ public class LemmatizerModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link LemmatizerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param lemmatizerModel A valid {@link MaxentModel}.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link LemmatizerFactory} for creating related objects.
+   */
   public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel,
       Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
     this(languageCode, lemmatizerModel, LemmatizerME.DEFAULT_BEAM_SIZE, manifestInfoEntries, factory);
   }
 
+  /**
+   * Initializes a {@link LemmatizerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param lemmatizerModel A valid {@link MaxentModel}.
+   * @param beamSize The size of the beam that should be used when decoding sequences.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link LemmatizerFactory} for creating related objects.
+   */
   public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, int beamSize,
       Map<String, String> manifestInfoEntries, LemmatizerFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -67,23 +92,58 @@ public class LemmatizerModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link LemmatizerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param lemmatizerModel A valid {@link MaxentModel}.
+   * @param factory The {@link LemmatizerFactory} for creating related objects.
+   */
   public LemmatizerModel(String languageCode, MaxentModel lemmatizerModel, LemmatizerFactory factory) {
     this(languageCode, lemmatizerModel, null, factory);
   }
 
-  public LemmatizerModel(InputStream in) throws IOException, InvalidFormatException {
+  /**
+   * Initializes a {@link LemmatizerModel} instance via a valid {@link InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public LemmatizerModel(InputStream in) throws IOException {
     super(COMPONENT_NAME, in);
   }
 
-  public LemmatizerModel(File modelFile) throws IOException, InvalidFormatException {
+  /**
+   * Initializes a {@link LemmatizerModel} instance via a valid {@link File}.
+   *
+   * @param modelFile The {@link File} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public LemmatizerModel(File modelFile) throws IOException {
     super(COMPONENT_NAME, modelFile);
   }
 
-  public LemmatizerModel(Path modelPath) throws IOException, InvalidFormatException {
-    this(modelPath.toFile());
+  /**
+   * Initializes a {@link LemmatizerModel} instance via a valid {@link Path}.
+   *
+   * @param modelPath The {@link Path} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public LemmatizerModel(Path modelPath) throws IOException {
+    super(COMPONENT_NAME, Files.newInputStream(modelPath));
   }
 
-  public LemmatizerModel(URL modelURL) throws IOException, InvalidFormatException {
+  /**
+   * Initializes a {@link LemmatizerModel} instance via a valid {@link URL}.
+   *
+   * @param modelURL The {@link URL} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public LemmatizerModel(URL modelURL) throws IOException {
     super(COMPONENT_NAME, modelURL);
   }
 
@@ -96,6 +156,9 @@ public class LemmatizerModel extends BaseModel {
     }
   }
 
+  /**
+   * @return Retrieves a {@link SequenceClassificationModel} instance.
+   */
   public SequenceClassificationModel<String> getLemmatizerSequenceModel() {
 
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
@@ -123,7 +186,9 @@ public class LemmatizerModel extends BaseModel {
     return LemmatizerFactory.class;
   }
 
-
+  /**
+   * @return Retrieves the active {@link LemmatizerFactory}.
+   */
   public LemmatizerFactory getFactory() {
     return (LemmatizerFactory) this.toolFactory;
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
index 74d21487..f8f0cd8e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/package-info.java
@@ -16,6 +16,6 @@
  */
 
 /**
- * Package related with the lemmatizer tool
+ * Package related to the lemmatizer functionality.
  */
 package opennlp.tools.lemmatizer;
diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
index dcfc883f..02a84dfd 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/DummyLemmatizer.java
@@ -22,17 +22,18 @@ import java.util.Arrays;
 import java.util.List;
 
 /**
- * This dummy lemmatizer implementation simulates a LemmatizerME. The file has
- * samples of sentences, with target and predicted values.
+ * This dummy lemmatizer implementation simulates a {@link LemmatizerME}.
+ * The file has samples of sentences, with target and predicted values.
  */
 public class DummyLemmatizer implements Lemmatizer {
 
-  private DummyLemmaSampleStream mSampleStream;
+  private final DummyLemmaSampleStream mSampleStream;
 
   public DummyLemmatizer(DummyLemmaSampleStream aSampleStream) {
     mSampleStream = aSampleStream;
   }
 
+  @Override
   public String[] lemmatize(String[] toks, String[] tags) {
     try {
       LemmaSample predsSample = mSampleStream.read();