You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ki...@apache.org on 2022/12/04 13:29:36 UTC

[opennlp] branch master updated: OPENNLP-1403 Enhance JavaDoc in opennlp.tools.langdetect and opennlp.tools.languagemodel packages

This is an automated email from the ASF dual-hosted git repository.

kinow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new babe7310 OPENNLP-1403 Enhance JavaDoc in opennlp.tools.langdetect and opennlp.tools.languagemodel packages
babe7310 is described below

commit babe7310d327f1d61864f05f73fecf91e498ba10
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Sun Dec 4 12:53:56 2022 +0100

    OPENNLP-1403 Enhance JavaDoc in opennlp.tools.langdetect and opennlp.tools.languagemodel packages
    
    - adds missing JavaDoc
    - improves existing documentation for clarity
    - removes superfluous text
    - adds package-info.java for opennlp.tools.langdetect package
    - adds 'final' modifier where useful and applicable
    - adds 'Override' annotation where useful and applicable
    - removes deprecated `calculateProbability(StringList tokens)` method in LanguageModel and privatized related impl in NGramLanguageModel
    - fixes some typos
---
 .../opennlp/tools/chunker/ChunkSampleStream.java   |  4 +-
 .../tools/chunker/ChunkerCrossValidator.java       |  3 +
 .../main/java/opennlp/tools/chunker/ChunkerME.java |  2 +-
 .../DefaultLanguageDetectorContextGenerator.java   | 16 ++---
 .../java/opennlp/tools/langdetect/Language.java    |  7 ++
 .../opennlp/tools/langdetect/LanguageDetector.java | 17 ++++-
 .../tools/langdetect/LanguageDetectorConfig.java   | 29 +++-----
 .../LanguageDetectorContextGenerator.java          | 10 ++-
 .../langdetect/LanguageDetectorCrossValidator.java | 35 +++++----
 .../LanguageDetectorEvaluationMonitor.java         |  5 +-
 .../langdetect/LanguageDetectorEvaluator.java      | 25 +++----
 .../langdetect/LanguageDetectorEventStream.java    | 15 ++--
 .../tools/langdetect/LanguageDetectorFactory.java  | 20 +++++-
 .../tools/langdetect/LanguageDetectorME.java       | 51 ++++++-------
 .../tools/langdetect/LanguageDetectorModel.java    | 38 +++++++++-
 .../langdetect/LanguageDetectorSampleStream.java   | 10 ++-
 .../opennlp/tools/langdetect/LanguageSample.java   |  2 +-
 .../langdetect/ProbingLanguageDetectionResult.java |  9 ++-
 ...torEvaluationMonitor.java => package-info.java} | 11 +--
 .../opennlp/tools/languagemodel/LanguageModel.java | 17 ++---
 .../tools/languagemodel/NGramLanguageModel.java    | 83 +++++++++++++++-------
 21 files changed, 251 insertions(+), 158 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
index 3d70fa13..85e62bbc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
@@ -34,9 +34,9 @@ import opennlp.tools.util.ObjectStream;
 public class ChunkSampleStream extends FilterObjectStream<String, ChunkSample> {
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link ChunkSampleStream instance}.
    *
-   * @param samples a plain text line stream
+   * @param samples A plain text {@link ObjectStream line stream}.
    */
   public ChunkSampleStream(ObjectStream<String> samples) {
     super(samples);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
index 41b54cd5..41b7c0d6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
@@ -24,6 +24,9 @@ import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.FMeasure;
 
+/**
+ * Cross validator for {@link Chunker}.
+ */
 public class ChunkerCrossValidator {
 
   private final String languageCode;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index ebaf6f62..e1246fa7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -154,7 +154,7 @@ public class ChunkerME implements Chunker {
   }
 
   /**
-   * Start a training of a {@link ChunkerModel} with the given parameters.
+   * Starts a training of a {@link ChunkerModel} with the given parameters.
    *
    * @param lang The ISO conform language code.
    * @param in The {@link ObjectStream} of {@link ChunkSample} used as input for training.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/DefaultLanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/DefaultLanguageDetectorContextGenerator.java
index 8d25201b..26fffa8b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/DefaultLanguageDetectorContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/DefaultLanguageDetectorContextGenerator.java
@@ -34,11 +34,12 @@ public class DefaultLanguageDetectorContextGenerator implements LanguageDetector
   protected final CharSequenceNormalizer normalizer;
 
   /**
-   * Creates a customizable @{@link DefaultLanguageDetectorContextGenerator} that computes ngrams from text
-   * @param minLength min ngrams chars
-   * @param maxLength max ngrams chars
-   * @param normalizers zero or more normalizers to
-   *                    be applied in to the text before extracting ngrams
+   * Creates a customizable {@link DefaultLanguageDetectorContextGenerator} that computes ngrams from text.
+   *
+   * @param minLength The min number of ngrams characters. Must be greater than {@code 0}.
+   * @param maxLength The max number of ngrams characters. Must be greater than {@code 0}
+   *                  and must be greater than {@code minLength}.
+   * @param normalizers zero or more normalizers to be applied in to the text before extracting ngrams.
    */
   public DefaultLanguageDetectorContextGenerator(int minLength, int maxLength,
                                                  CharSequenceNormalizer... normalizers) {
@@ -48,11 +49,6 @@ public class DefaultLanguageDetectorContextGenerator implements LanguageDetector
     this.normalizer = new AggregateCharSequenceNormalizer(normalizers);
   }
 
-  /**
-   * Generates the context for a document using character ngrams.
-   * @param document document to extract context from
-   * @return the generated context
-   */
   @Override
   public String[] getContext(CharSequence document) {
     Collection<String> context = new ArrayList<>();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
index 05f40dcd..02b2d0b7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/Language.java
@@ -29,10 +29,17 @@ public class Language implements Serializable {
   private final String lang;
   private final double confidence;
 
+  /**
+   * @param lang The language identifier.
+   */
   public Language(String lang) {
     this(lang, 0);
   }
 
+  /**
+   * @param lang The language identifier.
+   * @param confidence The confidence computed during language detection.
+   */
   public Language(String lang, double confidence) {
     Objects.requireNonNull(lang, "lang must not be null");
     this.lang = lang;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
index 7e4579e2..8de9805f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetector.java
@@ -20,14 +20,29 @@ package opennlp.tools.langdetect;
 import java.io.Serializable;
 
 /**
- * The interface for LanguageDetector which provide the @{@link Language} according to the context.
+ * The interface for {@link LanguageDetector} which predicts the {@link Language} for a context.
  */
 public interface LanguageDetector extends Serializable {
 
+  /**
+   * Predicts the {@link Language languages} for the full {@code content} length.
+   *
+   * @param content The textual content to detect potential {@link Language languages} from.
+   * @return the predicted languages
+   */
   Language[] predictLanguages(CharSequence content);
 
+  /**
+   * Predicts the {@link Language} for the full {@code content} length.
+   *
+   * @param content The textual content to detect potential {@link Language languages} from.
+   * @return the language with the highest confidence
+   */
   Language predictLanguage(CharSequence content);
 
+  /**
+   * @return Retrieves an array of language (codes) that are supported by a {@link LanguageDetector}.
+   */
   String[] getSupportedLanguages();
 
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorConfig.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorConfig.java
index 303bda37..eb9290ec 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorConfig.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorConfig.java
@@ -34,11 +34,8 @@ public class LanguageDetectorConfig {
   private int minConsecImprovements = DEFAULT_MIN_CONSEC_IMPROVEMENTS;
   private double minDiff = DEFAULT_MIN_DIFF;
 
-
   /**
-   * Maximum length in codepoints of text to process.
-   *
-   * @return
+   * @return The maximum length in codepoints of text to process.
    */
   public int getMaxLength() {
     return maxLength;
@@ -49,14 +46,12 @@ public class LanguageDetectorConfig {
   }
 
   /**
-   * Size in codepoints of chunk to process at each
-   * step for the probing detection.
-   * <p>
    * After processing a chunk of this size, the probing
    * detection will compute probabilities and determine
    * if there is enough confidence to stop.
    *
-   * @return
+   * @return The size in codepoints of chunk to process at each step for
+   *         the probing detection.
    */
   public int getChunkSize() {
     return chunkSize;
@@ -67,14 +62,12 @@ public class LanguageDetectorConfig {
   }
 
   /**
-   * Minimum number of consecutive increased probabilities
-   * for the top language required in probing detection
-   * to stop early.
-   * <p>
-   * If this value equals 0, probing detection will
+   * If this value equals {@code 0}, probing detection will
    * rely solely on {@link #getMinDiff()}
    *
-   * @return minimum consecutive improvements
+   * @return The minimum number of consecutive increased probabilities
+   *         for the top language required in probing detection
+   *         to stop early.
    */
   public int getMinConsecImprovements() {
     return minConsecImprovements;
@@ -85,13 +78,11 @@ public class LanguageDetectorConfig {
   }
 
   /**
-   * Minimum difference in confidence between the top predicted
-   * language and the next most likely language.
-     * <p>
-   * If this value equals 0, probing detection will
+   * If this value equals {@code 0}, probing detection will
    * rely solely on {@link #getMinConsecImprovements()}
    *
-   * @return
+   * @return The minimum difference in confidence between the top predicted
+   *         language and the next most likely language.
    */
   public double getMinDiff() {
     return minDiff;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
index 39de4d7a..af0d13e6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
@@ -20,8 +20,16 @@ package opennlp.tools.langdetect;
 import java.io.Serializable;
 
 /**
- * A context generator interface for language detector.
+ * A context generator interface for {@link LanguageDetector}.
  */
 public interface LanguageDetectorContextGenerator extends Serializable {
+
+  /**
+   * Retrieves the contexts for a {@code document} using character ngrams.
+   *
+   * @param document The textual input used to extract context from.
+   *
+   * @return An array of contexts on which a model basis its decisions.
+   */
   String[] getContext(CharSequence document);
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorCrossValidator.java
index ce1823af..9aa85153 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorCrossValidator.java
@@ -19,29 +19,32 @@ package opennlp.tools.langdetect;
 
 import java.io.IOException;
 
-import opennlp.tools.doccat.FeatureGenerator;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.Mean;
 
 /**
- * Cross validator for language detector
+ * Cross validator for {@link LanguageDetector}.
  */
 public class LanguageDetectorCrossValidator {
 
   private final TrainingParameters params;
 
-  private Mean documentAccuracy = new Mean();
+  private final Mean documentAccuracy = new Mean();
 
-  private LanguageDetectorEvaluationMonitor[] listeners;
+  private final LanguageDetectorEvaluationMonitor[] listeners;
 
-  private LanguageDetectorFactory factory;
+  private final LanguageDetectorFactory factory;
 
 
   /**
-   * Creates a {@link LanguageDetectorCrossValidator} with the given
-   * {@link FeatureGenerator}s.
+   * Initializes a {@link LanguageDetectorCrossValidator} with the
+   * given {@link TrainingParameters parameters}.
+   *
+   * @param mlParams The {@link TrainingParameters} for the context of cross validation.
+   * @param factory The {@link LanguageDetectorFactory} for creating related objects.
+   * @param listeners the {@link LanguageDetectorEvaluationMonitor evaluation listeners}.
    */
   public LanguageDetectorCrossValidator(TrainingParameters mlParams,
                                         LanguageDetectorFactory factory,
@@ -54,12 +57,10 @@ public class LanguageDetectorCrossValidator {
   /**
    * Starts the evaluation.
    *
-   * @param samples
-   *          the data to train and test
-   * @param nFolds
-   *          number of folds
+   * @param samples The {@link ObjectStream} of {@link LanguageSample samples} to train and test with.
+   * @param nFolds Number of folds. It must be greater than zero.
    *
-   * @throws IOException
+   * @throws IOException Thrown if IO errors occurred.
    */
   public void evaluate(ObjectStream<LanguageSample> samples, int nFolds)
       throws IOException {
@@ -87,19 +88,15 @@ public class LanguageDetectorCrossValidator {
   }
 
   /**
-   * Retrieves the accuracy for all iterations.
-   *
-   * @return the word accuracy
+   * @return Retrieves the word accuracy for all iterations.
    */
   public double getDocumentAccuracy() {
     return documentAccuracy.mean();
   }
 
   /**
-   * Retrieves the number of words which where validated over all iterations.
-   * The result is the amount of folds multiplied by the total number of words.
-   *
-   * @return the word count
+   * @return Retrieves the number of words which where validated over all iterations.
+   *         The result is the amount of folds multiplied by the total number of words.
    */
   public long getDocumentCount() {
     return documentAccuracy.count();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java
index 30f33137..f3ca3f88 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java
@@ -20,9 +20,8 @@ package opennlp.tools.langdetect;
 import opennlp.tools.util.eval.EvaluationMonitor;
 
 /**
- * {@link EvaluationMonitor} for Language Detector.
+ * A marker interface for evaluating {@link LanguageDetector language detectors}.
  */
-public interface LanguageDetectorEvaluationMonitor extends
-    EvaluationMonitor<LanguageSample> {
+public interface LanguageDetectorEvaluationMonitor extends EvaluationMonitor<LanguageSample> {
 
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluator.java
index bbf73c32..211ec36e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluator.java
@@ -31,14 +31,15 @@ import opennlp.tools.util.eval.Mean;
  */
 public class LanguageDetectorEvaluator extends Evaluator<LanguageSample> {
 
-  private LanguageDetector languageDetector;
+  private final LanguageDetector languageDetector;
 
-  private Mean accuracy = new Mean();
+  private final Mean accuracy = new Mean();
 
   /**
-   * Initializes the current instance.
+   * Initializes an instance to evaluate a {@link LanguageDetector}.
    *
-   * @param langDetect the language detector instance
+   * @param langDetect the {@link LanguageDetector} to evaluate.
+   * @param listeners the {@link LanguageDetectorEvaluationMonitor evaluation listeners}.
    */
   public LanguageDetectorEvaluator(LanguageDetector langDetect,
                                    LanguageDetectorEvaluationMonitor ... listeners) {
@@ -48,21 +49,18 @@ public class LanguageDetectorEvaluator extends Evaluator<LanguageSample> {
 
   /**
    * Evaluates the given reference {@link LanguageSample} object.
-   *
-   * This is done by categorizing the document from the provided
+   * This is achieved by categorizing the document of the provided
    * {@link LanguageSample}. The detected language is then used
    * to calculate and update the score.
    *
    * @param sample the reference {@link LanguageSample}.
+   * @return The processed {@link LanguageSample}.
    */
   public LanguageSample processSample(LanguageSample sample) {
 
     CharSequence document = sample.getContext();
-
     Language predicted = languageDetector.predictLanguage(document);
 
-
-
     if (sample.getLanguage().getLang().equals(predicted.getLang())) {
       accuracy.add(1);
     }
@@ -74,11 +72,8 @@ public class LanguageDetectorEvaluator extends Evaluator<LanguageSample> {
   }
 
   /**
-   * Retrieves the accuracy of provided {@link DocumentCategorizer}.
-   *
-   * accuracy = correctly categorized documents / total documents
-   *
-   * @return the accuracy
+   * @return Retrieves the accuracy of provided {@link DocumentCategorizer}.
+   *         Here: {@code accuracy = correctly categorized documents / total documents}.
    */
   public double getAccuracy() {
     return accuracy.mean();
@@ -89,7 +84,7 @@ public class LanguageDetectorEvaluator extends Evaluator<LanguageSample> {
   }
 
   /**
-   * Represents this objects as human readable {@link String}.
+   * Represents this object as human-readable {@link String}.
    */
   @Override
   public String toString() {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEventStream.java
index 19e6d466..8c76ff4b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEventStream.java
@@ -24,28 +24,29 @@ import opennlp.tools.util.AbstractEventStream;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * Iterator-like class for modeling language detector events.
+ * Iterator-like class for modeling an event stream of {@link LanguageSample samples}.
  */
 public class LanguageDetectorEventStream extends AbstractEventStream<LanguageSample> {
 
-  private LanguageDetectorContextGenerator mContextGenerator;
+  private final LanguageDetectorContextGenerator mContextGenerator;
 
   /**
-   * Initializes the current instance via samples and feature generators.
+   * Initializes an instance via samples and feature generators.
    *
-   * @param data {@link ObjectStream} of {@link LanguageSample}s
+   * @param data An {@link ObjectStream} of {@link LanguageSample samples} as input data.
+   * @param cg A {@link LanguageDetectorContextGenerator} used for the event stream {@code data}.
    */
   public LanguageDetectorEventStream(ObjectStream<LanguageSample> data,
-                                     LanguageDetectorContextGenerator contextGenerator) {
+                                     LanguageDetectorContextGenerator cg) {
     super(data);
 
-    mContextGenerator = contextGenerator;
+    mContextGenerator = cg;
   }
 
   @Override
   protected Iterator<Event> createEvents(final LanguageSample sample) {
 
-    return new Iterator<Event>() {
+    return new Iterator<>() {
 
       private boolean isVirgin = true;
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
index b9898877..a397f4ac 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
@@ -28,8 +28,10 @@ import opennlp.tools.util.normalizer.UrlCharSequenceNormalizer;
 
 
 /**
- * Default factory used by Language Detector. Extend this class to change the Language Detector
- * behaviour, such as the {@link LanguageDetectorContextGenerator}.
+ * Default factory used by {@link LanguageDetector}.
+ *
+ * Extend this class to change the Language Detector behaviour,
+ * such as the {@link LanguageDetectorContextGenerator}.
  * The default {@link DefaultLanguageDetectorContextGenerator} will use char n-grams of
  * size 1 to 3 and the following normalizers:
  * <ul>
@@ -39,10 +41,12 @@ import opennlp.tools.util.normalizer.UrlCharSequenceNormalizer;
  * <li> {@link NumberCharSequenceNormalizer}
  * <li> {@link ShrinkCharSequenceNormalizer}
  * </ul>
- *
  */
 public class LanguageDetectorFactory extends BaseToolFactory {
 
+  /**
+   * @return Retrieves a {@link LanguageDetectorContextGenerator}.
+   */
   public LanguageDetectorContextGenerator getContextGenerator() {
     return new DefaultLanguageDetectorContextGenerator(1, 3,
         EmojiCharSequenceNormalizer.getInstance(),
@@ -52,6 +56,16 @@ public class LanguageDetectorFactory extends BaseToolFactory {
         ShrinkCharSequenceNormalizer.getInstance());
   }
 
+  /**
+   * Instantiates a {@link LanguageDetectorFactory} via a given {@code subclassName}.
+   *
+   * @param subclassName The class name used for instantiation. If {@code null}, an
+   *                     instance of {@link LanguageDetectorFactory} will be returned
+   *                     per default. Otherwise, the {@link ExtensionLoader} mechanism
+   *                     is applied to load the requested {@code subclassName}.
+   *                     
+   * @return A valid {@link LanguageDetectorFactory} instance.
+   */
   public static LanguageDetectorFactory create(String subclassName)
       throws InvalidFormatException {
     if (subclassName == null) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorME.java
index a10b0e22..5679252a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorME.java
@@ -33,7 +33,7 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.TrainingParameters;
 
 /**
- * Implements learnable Language Detector
+ * Implements a learnable {@link LanguageDetector}.
  *
  * <p>
  * This will process the entire string when called with
@@ -63,37 +63,25 @@ import opennlp.tools.util.TrainingParameters;
 public class LanguageDetectorME implements LanguageDetector {
 
   protected LanguageDetectorModel model;
-  private LanguageDetectorContextGenerator mContextGenerator;
+  private final LanguageDetectorContextGenerator mContextGenerator;
 
   /**
-   * Initializes the current instance with a language detector model. Default feature
-   * generation is used.
+   * Initializes an instance with a specific {@link LanguageDetectorModel}.
+   * Default feature generation is used.
    *
-   * @param model the language detector model
+   * @param model the {@link LanguageDetectorModel} to be used.
    */
   public LanguageDetectorME(LanguageDetectorModel model) {
     this.model = model;
     this.mContextGenerator = model.getFactory().getContextGenerator();
   }
 
-  /**
-   * This will process the full content length.
-   *
-   * @param content
-   * @return the predicted languages
-   */
   @Override
   public Language[] predictLanguages(CharSequence content) {
     return predict(arrayToCounts(
             mContextGenerator.getContext(content)));
   }
-
-  /**
-   * This will process the full content length.
-   *
-   * @param content
-   * @return the language with the highest confidence
-   */
+  
   @Override
   public Language predictLanguage(CharSequence content) {
     return predictLanguages(content)[0];
@@ -115,7 +103,7 @@ public class LanguageDetectorME implements LanguageDetector {
    * are met.
    *
    * @param content content to be processed
-   * @return result
+   * @return A computed {@link ProbingLanguageDetectionResult}.
    */
   public ProbingLanguageDetectionResult probingPredictLanguages(CharSequence content) {
     return probingPredictLanguages(content,
@@ -127,15 +115,16 @@ public class LanguageDetectorME implements LanguageDetector {
    * specified in {@link LanguageDetectorConfig#DEFAULT_LANGUAGE_DETECTOR_CONFIG}
    * are met.
    *
-   * @param content content to process
-   * @param config config to customize detection
-   * @return
+   * @param content The textual content to process.
+   * @param config The {@link LanguageDetectorConfig} to customize detection.
+   *
+   * @return A computed {@link ProbingLanguageDetectionResult}.
    */
   public ProbingLanguageDetectionResult probingPredictLanguages(CharSequence content,
                                                                 LanguageDetectorConfig config) {
     //list of the languages that received the highest
     //confidence over the last n chunk detections
-    List<Language[]> predictions = new LinkedList();
+    List<Language[]> predictions = new LinkedList<>();
     int start = 0;//where to start the next chunk in codepoints
     Language[] currPredictions = null;
     //cache ngram counts across chunks
@@ -202,13 +191,14 @@ public class LanguageDetectorME implements LanguageDetector {
   }
 
   /**
-   * Override this for different behavior to determine if there is enough
+   * Overriding this for different behavior to determine if there is enough
    * confidence in the predictions to stop.
    *
    * @param predictionsQueue queue of earlier predictions
    * @param newPredictions most recent predictions
    * @param ngramCounts -- not currently used, but might be useful
-   * @return whether or not enough text has been processed to make a determination
+   * @return {@code true} if enough text has been processed to make a determination,
+   *         else {@code false}.
    */
   boolean seenEnough(List<Language[]> predictionsQueue, Language[] newPredictions,
                      Map<String, MutableInt> ngramCounts, LanguageDetectorConfig config) {
@@ -265,6 +255,17 @@ public class LanguageDetectorME implements LanguageDetector {
                     codepoints.length);
   }
 
+  /**
+   * Starts a training of a {@link LanguageDetectorModel} with the given parameters.
+   *
+   * @param samples The {@link ObjectStream} of {@link LanguageSample} used as input for training.
+   * @param mlParams The {@link TrainingParameters} for the context of the training.
+   * @param factory The {@link LanguageDetectorFactory} for creating related objects defined
+   *                via {@code mlParams}.
+   *
+   * @return A valid, trained {@link LanguageDetectorModel} instance.
+   * @throws IOException Thrown if IO errors occurred.
+   */
   public static LanguageDetectorModel train(ObjectStream<LanguageSample> samples,
                                             TrainingParameters mlParams,
                                             LanguageDetectorFactory factory)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorModel.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorModel.java
index c0d9703e..a37b64f0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorModel.java
@@ -30,13 +30,22 @@ import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.BaseModel;
 
 /**
- * A model for language detection
+ * The {@link LanguageDetectorModel} is the model used by a learnable {@link LanguageDetector}.
+ *
+ * @see LanguageDetectorME
  */
 public class LanguageDetectorModel extends BaseModel {
 
   private static final String COMPONENT_NAME = "LanguageDetectorME";
   private static final String LANGDETECT_MODEL_ENTRY_NAME = "langdetect.model";
 
+  /**
+   * Initializes a {@link LanguageDetectorModel} instance via given parameters.
+   *
+   * @param langdetectModel A valid {@link MaxentModel}.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link LanguageDetectorFactory} for creating related objects.
+   */
   public LanguageDetectorModel(MaxentModel langdetectModel,
                                Map<String, String> manifestInfoEntries,
                                LanguageDetectorFactory factory) {
@@ -46,14 +55,35 @@ public class LanguageDetectorModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link LanguageDetectorModel} instance via a valid {@link InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public LanguageDetectorModel(InputStream in) throws IOException {
     super(COMPONENT_NAME, in);
   }
 
+  /**
+   * Initializes a {@link LanguageDetectorModel} instance via a valid {@link File}.
+   *
+   * @param modelFile The {@link File} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public LanguageDetectorModel(File modelFile) throws IOException {
     super(COMPONENT_NAME, modelFile);
   }
 
+  /**
+   * Initializes a {@link LanguageDetectorModel} instance via a valid {@link URL}.
+   *
+   * @param modelURL The {@link URL} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public LanguageDetectorModel(URL modelURL) throws IOException {
     super(COMPONENT_NAME, modelURL);
   }
@@ -67,6 +97,9 @@ public class LanguageDetectorModel extends BaseModel {
     }
   }
 
+  /**
+   * @return Retrieves the active {@link LanguageDetectorFactory}.
+   */
   public LanguageDetectorFactory getFactory() {
     return (LanguageDetectorFactory) this.toolFactory;
   }
@@ -76,6 +109,9 @@ public class LanguageDetectorModel extends BaseModel {
     return LanguageDetectorFactory.class;
   }
 
+  /**
+   * @return Retrieves a {@link MaxentModel}.
+   */
   public MaxentModel getMaxentModel() {
     return (MaxentModel) artifactMap.get(LANGDETECT_MODEL_ENTRY_NAME);
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorSampleStream.java
index 2a407f7f..bb06547e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorSampleStream.java
@@ -23,8 +23,8 @@ import opennlp.tools.util.FilterObjectStream;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * This class reads in string encoded training samples, parses them and
- * outputs {@link LanguageSample} objects.
+ * This class reads in string encoded {@link ObjectStream training samples}, parses them
+ * and outputs {@link LanguageSample} objects.
  * <p>
  * Format:<br>
  * Each line contains one sample document.<br>
@@ -34,10 +34,16 @@ import opennlp.tools.util.ObjectStream;
 public class LanguageDetectorSampleStream
     extends FilterObjectStream<String, LanguageSample> {
 
+  /**
+   * Initializes a {@link LanguageDetectorSampleStream instance}.
+   *
+   * @param samples A plain text {@link ObjectStream line stream}.
+   */
   public LanguageDetectorSampleStream(ObjectStream<String> samples) {
     super(samples);
   }
 
+  @Override
   public LanguageSample read() throws IOException {
     String sampleString;
     while ((sampleString = samples.read()) != null) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageSample.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageSample.java
index 041d5966..ae7ead62 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageSample.java
@@ -21,7 +21,7 @@ import java.io.Serializable;
 import java.util.Objects;
 
 /**
- * Class which holds a classified document and its @{@link Language}.
+ * Holds a classified document and its {@link Language}.
  */
 public class LanguageSample implements Serializable {
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/ProbingLanguageDetectionResult.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/ProbingLanguageDetectionResult.java
index b3e8c1ff..bd56b777 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/ProbingLanguageDetectionResult.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/ProbingLanguageDetectionResult.java
@@ -17,6 +17,9 @@
 
 package opennlp.tools.langdetect;
 
+/**
+ * A data container encapsulating language detection results.
+ */
 public class ProbingLanguageDetectionResult {
   private final Language[] languages;
   private final int length;
@@ -26,13 +29,15 @@ public class ProbingLanguageDetectionResult {
     this.length = length;
   }
 
+  /**
+   * @return The {@link Language languages} detected.
+   */
   public Language[] getLanguages() {
     return languages;
   }
 
   /**
-   *
-   * @return length in codepoints of text processed
+   * @return The length in codepoints of text processed.
    */
   public int getLength() {
     return length;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/package-info.java
similarity index 80%
copy from opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java
copy to opennlp-tools/src/main/java/opennlp/tools/langdetect/package-info.java
index 30f33137..35c7f754 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/package-info.java
@@ -15,14 +15,7 @@
  * limitations under the License.
  */
 
-package opennlp.tools.langdetect;
-
-import opennlp.tools.util.eval.EvaluationMonitor;
-
 /**
- * {@link EvaluationMonitor} for Language Detector.
+ * Package related to predicting languages from samples of text.
  */
-public interface LanguageDetectorEvaluationMonitor extends
-    EvaluationMonitor<LanguageSample> {
-
-}
+package opennlp.tools.langdetect;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
index 8366925f..2e8a1909 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/LanguageModel.java
@@ -28,25 +28,16 @@ public interface LanguageModel {
   /**
    * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary.
    *
-   * @param tokens the text tokens to calculate the probability for
-   * @return the probability of the given text tokens in the vocabulary
-   * @deprecated use {@link #calculateProbability(String...)}
-   */
-  @Deprecated
-  double calculateProbability(StringList tokens);
-
-  /**
-   * Calculate the probability of a series of tokens (e.g. a sentence), given a vocabulary.
+   * @param tokens the text tokens to calculate the {@code probability} for.
    *
-   * @param tokens the text tokens to calculate the probability for
    * @return the probability of the given text tokens in the vocabulary
    */
   double calculateProbability(String... tokens);
 
   /**
-   * Predict the most probable output sequence of tokens, given an input sequence of tokens.
+   * Predict the most probable output sequence of tokens, given an input sequence of {@code tokens}.
    *
-   * @param tokens a sequence of tokens
+   * @param tokens a sequence of tokens.
    * @return the most probable subsequent token sequence
    * @deprecated use {@link #predictNextTokens(String...)}
    */
@@ -54,7 +45,7 @@ public interface LanguageModel {
   StringList predictNextTokens(StringList tokens);
 
   /**
-   * Predict the most probable output sequence of tokens, given an input sequence of tokens.
+   * Predict the most probable output sequence of tokens, given an input sequence of {@code tokens}.
    *
    * @param tokens a sequence of tokens
    * @return the most probable subsequent token sequence
diff --git a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
index 4dda6875..2b3888ae 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/languagemodel/NGramLanguageModel.java
@@ -25,8 +25,8 @@ import opennlp.tools.ngram.NGramUtils;
 import opennlp.tools.util.StringList;
 
 /**
- * A {@link opennlp.tools.languagemodel.LanguageModel} based on a {@link opennlp.tools.ngram.NGramModel}
- * using Stupid Backoff to get the probabilities of the ngrams.
+ * A {@link LanguageModel} based on a {@link NGramModel} using Stupid Backoff to get
+ * the probabilities of the ngrams.
  */
 public class NGramLanguageModel extends NGramModel implements LanguageModel {
 
@@ -34,45 +34,65 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
 
   private final int n;
 
+  /**
+   * Initializes an {@link NGramLanguageModel} with {@link #DEFAULT_N}.
+   */
   public NGramLanguageModel() {
     this(DEFAULT_N);
   }
 
+  /**
+   * Initializes an {@link NGramLanguageModel} with the given {@code n} for the ngram size.
+   *
+   * @param n The size of the ngrams to be used. Must be greater than {@code 0}.
+   *          
+   * @throws IllegalArgumentException Thrown if one of the arguments was invalid.
+   */
   public NGramLanguageModel(int n) {
+    if (n <= 0) {
+      throw new IllegalArgumentException("Parameter 'n' must be greater than 0.");
+    }
     this.n = n;
   }
 
+  /**
+   * Initializes a {@link NGramLanguageModel} instance via a valid {@link InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   * @throws IllegalArgumentException Thrown if one of the arguments was invalid.
+   */
   public NGramLanguageModel(InputStream in) throws IOException {
     this(in, DEFAULT_N);
   }
 
-  public NGramLanguageModel(InputStream in, int n)
-      throws IOException {
+  /**
+   * Initializes a {@link NGramLanguageModel} instance via a valid {@link InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   * @param n The size of the ngrams to be used. Must be greater than {@code 0}.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   * @throws IllegalArgumentException Thrown if one of the arguments was invalid.
+   */
+  public NGramLanguageModel(InputStream in, int n) throws IOException {
     super(in);
+    if (n <= 0) {
+      throw new IllegalArgumentException("Parameter 'n' must be greater than 0.");
+    }
     this.n = n;
   }
 
+  /**
+   * Adds further tokens.
+   *
+   * @param tokens Text elements to add to the {@link NGramLanguageModel}.
+   */
   public void add(String... tokens) {
     add(new StringList(tokens), 1, n);
   }
 
-  @Override
-  public double calculateProbability(StringList tokens) {
-    double probability = 0d;
-    if (size() > 0) {
-      for (StringList ngram : NGramUtils.getNGrams(tokens, n)) {
-        double score = stupidBackoff(ngram);
-        probability += StrictMath.log(score);
-        if (Double.isNaN(probability)) {
-          probability = 0d;
-          break;
-        }
-      }
-      probability = StrictMath.exp(probability);
-    }
-    return probability;
-  }
-
   @Override
   public double calculateProbability(String... tokens) {
     double probability = 0d;
@@ -91,6 +111,7 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
   }
 
   @Override
+  @Deprecated
   public StringList predictNextTokens(StringList tokens) {
     double maxProb = Double.NEGATIVE_INFINITY;
     StringList token = null;
@@ -114,6 +135,22 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
     return token;
   }
 
+  private double calculateProbability(StringList tokens) {
+    double probability = 0d;
+    if (size() > 0) {
+      for (StringList ngram : NGramUtils.getNGrams(tokens, n)) {
+        double score = stupidBackoff(ngram);
+        probability += StrictMath.log(score);
+        if (Double.isNaN(probability)) {
+          probability = 0d;
+          break;
+        }
+      }
+      probability = StrictMath.exp(probability);
+    }
+    return probability;
+  }
+
   @Override
   public String[] predictNextTokens(String... tokens) {
     double maxProb = Double.NEGATIVE_INFINITY;
@@ -121,9 +158,7 @@ public class NGramLanguageModel extends NGramModel implements LanguageModel {
 
     for (StringList ngram : this) {
       String[] sequence = new String[ngram.size() + tokens.length];
-      for (int i = 0; i < tokens.length; i++) {
-        sequence[i] = tokens[i];
-      }
+      System.arraycopy(tokens, 0, sequence, 0, tokens.length);
       for (int i = 0; i < ngram.size(); i++) {
         sequence[i + tokens.length] = ngram.getToken(i);
       }