You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/12 13:03:05 UTC

[opennlp] branch master updated: OPENNLP-1408 Enhance JavaDoc in opennlp.tools.doccat package (#451)

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e9b3b4b OPENNLP-1408 Enhance JavaDoc in opennlp.tools.doccat package (#451)
6e9b3b4b is described below

commit 6e9b3b4b8f94f5e20754edbee88612ea59fb6814
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Mon Dec 12 14:02:59 2022 +0100

    OPENNLP-1408 Enhance JavaDoc in opennlp.tools.doccat package (#451)
    
    - adds missing JavaDoc
    - improves existing documentation for clarity
    - removes superfluous text
    - adds 'final' modifier where useful and applicable
    - adds 'Override' annotation where useful and applicable
    - fixes several typos
---
 .../tools/doccat/BagOfWordsFeatureGenerator.java   |  8 +++
 .../opennlp/tools/doccat/DoccatCrossValidator.java | 35 +++++-----
 .../tools/doccat/DoccatEvaluationMonitor.java      |  2 +-
 .../java/opennlp/tools/doccat/DoccatFactory.java   | 33 ++++++++--
 .../java/opennlp/tools/doccat/DoccatModel.java     | 42 ++++++++++++
 .../opennlp/tools/doccat/DocumentCategorizer.java  | 59 +++++++++--------
 .../DocumentCategorizerContextGenerator.java       | 11 +++-
 .../tools/doccat/DocumentCategorizerEvaluator.java | 27 ++++----
 .../doccat/DocumentCategorizerEventStream.java     | 31 +++++----
 .../tools/doccat/DocumentCategorizerME.java        | 74 ++++++++++------------
 .../java/opennlp/tools/doccat/DocumentSample.java  | 23 ++++++-
 .../opennlp/tools/doccat/DocumentSampleStream.java | 16 ++++-
 .../opennlp/tools/doccat/FeatureGenerator.java     |  8 +--
 .../tools/doccat/NGramFeatureGenerator.java        | 23 +++----
 14 files changed, 246 insertions(+), 146 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
index 51a3277e..fc598c9f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
@@ -32,10 +32,18 @@ public class BagOfWordsFeatureGenerator implements FeatureGenerator {
 
   private final boolean useOnlyAllLetterTokens;
 
+  /**
+   * Instantiates a default {@link BagOfWordsFeatureGenerator} instance.
+   */
   public BagOfWordsFeatureGenerator() {
     this(false);
   }
 
+  /**
+   * Instantiates a {@link BagOfWordsFeatureGenerator} instance.
+   *
+   * @param useOnlyAllLetterTokens Whether to use only all-letter tokens, or not.
+   */
   BagOfWordsFeatureGenerator(boolean useOnlyAllLetterTokens) {
     this.useOnlyAllLetterTokens = useOnlyAllLetterTokens;
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java
index 106b82d2..ebfe6a09 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java
@@ -25,7 +25,7 @@ import opennlp.tools.util.eval.CrossValidationPartitioner;
 import opennlp.tools.util.eval.Mean;
 
 /**
- * Cross validator for document categorization
+ * Cross validator for {@link DocumentCategorizer}.
  */
 public class DoccatCrossValidator {
 
@@ -33,16 +33,21 @@ public class DoccatCrossValidator {
 
   private final TrainingParameters params;
 
-  private Mean documentAccuracy = new Mean();
+  private final Mean documentAccuracy = new Mean();
 
-  private DoccatEvaluationMonitor[] listeners;
+  private final DoccatEvaluationMonitor[] listeners;
 
-  private DoccatFactory factory;
+  private final DoccatFactory factory;
 
 
   /**
-   * Creates a {@link DoccatCrossValidator} with the given
-   * {@link FeatureGenerator}s.
+   * Instantiates a {@link DoccatCrossValidator} with the
+   * given {@link FeatureGenerator generators}.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param mlParams The {@link TrainingParameters} for the context of cross validation.
+   * @param factory The {@link DoccatFactory} for creating related objects.
+   * @param listeners the {@link DoccatEvaluationMonitor evaluation listeners}.
    */
   public DoccatCrossValidator(String languageCode, TrainingParameters mlParams,
       DoccatFactory factory, DoccatEvaluationMonitor ... listeners) {
@@ -55,12 +60,10 @@ public class DoccatCrossValidator {
   /**
    * Starts the evaluation.
    *
-   * @param samples
-   *          the data to train and test
-   * @param nFolds
-   *          number of folds
+   * @param samples The {@link ObjectStream} of {@link DocumentSample samples} to train and test with.
+   * @param nFolds Number of folds. It must be greater than zero.
    *
-   * @throws IOException
+   * @throws IOException Thrown if IO errors occurred.
    */
   public void evaluate(ObjectStream<DocumentSample> samples, int nFolds)
       throws IOException {
@@ -88,19 +91,15 @@ public class DoccatCrossValidator {
   }
 
   /**
-   * Retrieves the accuracy for all iterations.
-   *
-   * @return the word accuracy
+   * @return Retrieves the accuracy for all iterations.
    */
   public double getDocumentAccuracy() {
     return documentAccuracy.mean();
   }
 
   /**
-   * Retrieves the number of words which where validated over all iterations.
-   * The result is the amount of folds multiplied by the total number of words.
-   *
-   * @return the word count
+   * @return Retrieves the number of words which where validated over all iterations.
+   *         The result is the amount of folds multiplied by the total number of words.
    */
   public long getDocumentCount() {
     return documentAccuracy.count();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
index f7b5a6f5..951f8d0f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatEvaluationMonitor.java
@@ -20,7 +20,7 @@ package opennlp.tools.doccat;
 import opennlp.tools.util.eval.EvaluationMonitor;
 
 /**
- * {@link EvaluationMonitor} for doccat.
+ * A marker interface for evaluating {@link DocumentCategorizer doccat}.
  */
 public interface DoccatEvaluationMonitor extends
     EvaluationMonitor<DocumentSample> {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
index babab7cd..9d2fb946 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java
@@ -27,7 +27,7 @@ import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ext.ExtensionLoader;
 
 /**
- * The factory that provides Doccat default implementations and resources
+ * The factory that provides Doccat default implementations and resources.
  */
 public class DoccatFactory extends BaseToolFactory {
 
@@ -36,13 +36,19 @@ public class DoccatFactory extends BaseToolFactory {
   private FeatureGenerator[] featureGenerators;
 
   /**
-   * Creates a {@link DoccatFactory} that provides the default implementation of
+   * Instantiates a {@link DoccatFactory} that provides the default implementation of
    * the resources.
    */
   public DoccatFactory() {}
 
+  /**
+   * Instantiates a {@link DoccatFactory} that provides the default implementation of
+   * the resources.
+   *
+   * @param featureGenerators The {@link FeatureGenerator featureGenerators} to use.
+   */
   public DoccatFactory(final FeatureGenerator[] featureGenerators) {
-    this.featureGenerators = featureGenerators;
+    init(featureGenerators);
   }
 
   protected void init(FeatureGenerator[] featureGenerators) {
@@ -78,6 +84,17 @@ public class DoccatFactory extends BaseToolFactory {
     // nothing to validate
   }
 
+  /**
+   * Factory method the framework uses create a new {@link DoccatFactory}.
+   *
+   * @param subclassName The name of the class implementing the {@link DoccatFactory}.
+   * @param featureGenerators The {@link FeatureGenerator featureGenerators} to use.
+   *
+   * @return A valid {@link DoccatFactory} instance.
+   *
+   * @throws InvalidFormatException Thrown if the {@link ExtensionLoader} mechanism failed to
+   *                                create the factory associated with {@code subclassName}.
+   */
   public static DoccatFactory create(String subclassName, FeatureGenerator[] featureGenerators)
       throws InvalidFormatException {
     if (subclassName == null) {
@@ -91,9 +108,7 @@ public class DoccatFactory extends BaseToolFactory {
       return theFactory;
     } catch (Exception e) {
       String msg = "Could not instantiate the " + subclassName
-          + ". The initialization throw an exception.";
-      System.err.println(msg);
-      e.printStackTrace();
+          + ". The initialization threw an exception.";
       throw new InvalidFormatException(msg, e);
     }
   }
@@ -109,6 +124,9 @@ public class DoccatFactory extends BaseToolFactory {
     return fgs;
   }
 
+  /**
+   * @return Retrieves the {@link FeatureGenerator generators} used.
+   */
   public FeatureGenerator[] getFeatureGenerators() {
     if (featureGenerators == null) {
       if (artifactProvider != null) {
@@ -126,6 +144,9 @@ public class DoccatFactory extends BaseToolFactory {
     return featureGenerators;
   }
 
+  /**
+   * @param featureGenerators The {@link FeatureGenerator featureGenerators} to use.
+   */
   public void setFeatureGenerators(FeatureGenerator[] featureGenerators) {
     this.featureGenerators = featureGenerators;
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
index 1b5c1640..26776eae 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
@@ -38,6 +38,14 @@ public class DoccatModel extends BaseModel {
   private static final String COMPONENT_NAME = "DocumentCategorizerME";
   private static final String DOCCAT_MODEL_ENTRY_NAME = "doccat.model";
 
+  /**
+   * Initializes a {@link DoccatModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param doccatModel A valid {@link MaxentModel} to be used.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link DoccatFactory} to be used.
+   */
   public DoccatModel(String languageCode, MaxentModel doccatModel,
       Map<String, String> manifestInfoEntries, DoccatFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -46,18 +54,46 @@ public class DoccatModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link DoccatModel} instance via a valid {@link InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public DoccatModel(InputStream in) throws IOException {
     super(COMPONENT_NAME, in);
   }
 
+  /**
+   * Initializes a {@link DoccatModel} instance via a valid {@link File}.
+   *
+   * @param modelFile The {@link File} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public DoccatModel(File modelFile) throws IOException {
     super(COMPONENT_NAME, modelFile);
   }
 
+  /**
+   * Initializes a {@link DoccatModel} instance via a valid {@link Path}.
+   *
+   * @param modelPath The {@link Path} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public DoccatModel(Path modelPath) throws IOException {
     this(modelPath.toFile());
   }
 
+  /**
+   * Initializes a {@link DoccatModel} instance via a valid {@link URL}.
+   *
+   * @param modelURL The {@link URL} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
   public DoccatModel(URL modelURL) throws IOException {
     super(COMPONENT_NAME, modelURL);
   }
@@ -71,6 +107,9 @@ public class DoccatModel extends BaseModel {
     }
   }
 
+  /**
+   * @return Retrieves the active {@link DoccatFactory}.
+   */
   public DoccatFactory getFactory() {
     return (DoccatFactory) this.toolFactory;
   }
@@ -80,6 +119,9 @@ public class DoccatModel extends BaseModel {
     return DoccatFactory.class;
   }
 
+  /**
+   * @return Retrieves the active {@link MaxentModel}.
+   */
   public MaxentModel getMaxentModel() {
     return (MaxentModel) artifactMap.get(DOCCAT_MODEL_ENTRY_NAME);
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
index b180549b..3ef92c4c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizer.java
@@ -27,75 +27,78 @@ import java.util.SortedMap;
 public interface DocumentCategorizer {
 
   /**
-   * Categorize the given text provided as tokens along with
-   * the provided extra information
+   * Categorizes the given {@code text} provided as tokens along with
+   * the provided {@code extraInformation}.
    *
-   * @param text the tokens of text to categorize
-   * @param extraInformation extra information
-   * @return per category probabilities
+   * @param text The tokens of text to categorize.
+   * @param extraInformation The extra information used for this context.
+   * @return The per category probabilities.
    */
   double[] categorize(String[] text, Map<String, Object> extraInformation);
 
   /**
-   * Categorizes the given text, provided in separate tokens.
-   * @param text the tokens of text to categorize
-   * @return per category probabilities
+   * Categorizes the given {@code text}, provided in separate tokens.
+   * 
+   * @param text The tokens of text to categorize.
+   * @return The per category probabilities.
    */
   double[] categorize(String[] text);
 
   /**
-   * get the best category from previously generated outcome probabilities
+   * Retrieves the best category from previously generated {@code outcome} probabilities
    *
-   * @param outcome a vector of outcome probabilities
-   * @return the best category String
+   * @param outcome An array of computed outcome probabilities.
+   * @return The best category represented as String.
    */
   String getBestCategory(double[] outcome);
 
   /**
-   * get the index of a certain category
+   * Retrieves the index of a certain category.
    *
-   * @param category the category
-   * @return an index
+   * @param category The category for which the {@code index} is to be found.
+   * @return The index.
    */
   int getIndex(String category);
 
   /**
-   * get the category at a given index
+   * Retrieves the category at a given {@code index}.
    *
-   * @param index the index
-   * @return a category
+   * @param index The index for which the {@code category} shall be found.
+   * @return The category represented as String.
    */
   String getCategory(int index);
 
   /**
-   * get the number of categories
+   * Retrieves the number of categories.
    *
-   * @return the no. of categories
+   * @return The no. of categories.
    */
   int getNumberOfCategories();
 
   /**
-   * get the name of the category associated with the given probabilties
+   * Retrieves the name of the category associated with the given probabilities.
    *
-   * @param results the probabilities of each category
-   * @return the name of the outcome
+   * @param results The probabilities of each category.
+   * @return The name of the outcome.
    */
   String getAllResults(double[] results);
 
   /**
-   * Returns a map in which the key is the category name and the value is the score
+   * Retrieves a {@link Map} in which the key is the category name and the value is the score.
    *
-   * @param text the input text to classify
-   * @return a map with the score as a key. The value is a Set of categories with the score.
+   * @param text The tokenized input text to classify.
+   * @return A {@link Map} with the score as a key.
    */
   Map<String, Double> scoreMap(String[] text);
 
   /**
-   * Get a map of the scores sorted in ascending aorder together with their associated categories.
-   * Many categories can have the same score, hence the Set as value
+   * Retrieves a {@link SortedMap} of the scores sorted in ascending order,
+   * together with their associated categories.
+   * <p> 
+   * Many categories can have the same score, hence the {@link Set} as value.
    *
    * @param text the input text to classify
-   * @return a map with the score as a key. The value is a Set of categories with the score.
+   * @return A {@link SortedMap} with the score as a key.
    */
   SortedMap<Double, Set<String>> sortedScoreMap(String[] text);
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
index e12f16bf..24dc288f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerContextGenerator.java
@@ -22,12 +22,19 @@ import java.util.LinkedList;
 import java.util.Map;
 
 /**
- * Context generator for document categorizer
+ * Context generator for {@link DocumentCategorizer}.
+ *
+ * @see DocumentCategorizer
  */
 class DocumentCategorizerContextGenerator {
 
-  private FeatureGenerator[] mFeatureGenerators;
+  private final FeatureGenerator[] mFeatureGenerators;
 
+  /**
+   * Instantiates a {@link DocumentCategorizerContextGenerator} instance.
+   *
+   * @param featureGenerators The {@link FeatureGenerator featureGenerators} to use.
+   */
   DocumentCategorizerContextGenerator(FeatureGenerator... featureGenerators) {
     mFeatureGenerators = featureGenerators;
   }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
index c501280f..a417f974 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEvaluator.java
@@ -24,21 +24,23 @@ import opennlp.tools.util.eval.Mean;
 /**
  * The {@link DocumentCategorizerEvaluator} measures the performance of
  * the given {@link DocumentCategorizer} with the provided reference
- * {@link DocumentSample}s.
+ * {@link DocumentSample samples}.
  *
  * @see DocumentCategorizer
  * @see DocumentSample
+ * @see Evaluator
  */
 public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
 
-  private DocumentCategorizer categorizer;
+  private final DocumentCategorizer categorizer;
 
-  private Mean accuracy = new Mean();
+  private final Mean accuracy = new Mean();
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link DocumentCategorizerEvaluator} instance.
    *
-   * @param categorizer the document categorizer instance
+   * @param categorizer the {@link DocumentCategorizer} instance.
+   * @param listeners the {@link DoccatEvaluationMonitor evaluation listeners}.
    */
   public DocumentCategorizerEvaluator(DocumentCategorizer categorizer,
       DoccatEvaluationMonitor ... listeners) {
@@ -47,13 +49,14 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
   }
 
   /**
-   * Evaluates the given reference {@link DocumentSample} object.
-   *
+   * Evaluates the given reference {@link DocumentSample sample}.
+   * <p>
    * This is done by categorizing the document from the provided
    * {@link DocumentSample}. The detected category is then used
    * to calculate and update the score.
    *
-   * @param sample the reference {@link TokenSample}.
+   * @param sample The reference {@link TokenSample}.
+   * @return The processed {@link TokenSample}.
    */
   public DocumentSample processSample(DocumentSample sample) {
 
@@ -74,11 +77,9 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
   }
 
   /**
-   * Retrieves the accuracy of provided {@link DocumentCategorizer}.
-   *
-   * accuracy = correctly categorized documents / total documents
+   * {@code accuracy = correctly categorized documents / total documents}
    *
-   * @return the accuracy
+   * @return Retrieves the accuracy of provided {@link DocumentCategorizer}.
    */
   public double getAccuracy() {
     return accuracy.mean();
@@ -89,7 +90,7 @@ public class DocumentCategorizerEvaluator extends Evaluator<DocumentSample> {
   }
 
   /**
-   * Represents this objects as human readable {@link String}.
+   * Represents this object as human-readable {@link String}.
    */
   @Override
   public String toString() {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
index 3fbe63d7..723a73e8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerEventStream.java
@@ -28,46 +28,45 @@ import opennlp.tools.util.ObjectStream;
  */
 public class DocumentCategorizerEventStream extends AbstractEventStream<DocumentSample> {
 
-  private DocumentCategorizerContextGenerator mContextGenerator;
+  private final DocumentCategorizerContextGenerator mContextGenerator;
 
   /**
-   * Initializes the current instance via samples and feature generators.
+   * Initializes a {@link DocumentCategorizerEventStream} via samples and
+   * {@link FeatureGenerator feature generators}.
    *
-   * @param data {@link ObjectStream} of {@link DocumentSample}s
-   *
-   * @param featureGenerators the feature generators
+   * @param samples {@link ObjectStream} of {@link DocumentSample samples}.
+   * @param featureGenerators One or more {@link FeatureGenerator} to use.
    */
-  public DocumentCategorizerEventStream(ObjectStream<DocumentSample> data,
+  public DocumentCategorizerEventStream(ObjectStream<DocumentSample> samples,
       FeatureGenerator... featureGenerators) {
-    super(data);
-
-    mContextGenerator =
-        new DocumentCategorizerContextGenerator(featureGenerators);
+    super(samples);
+    mContextGenerator = new DocumentCategorizerContextGenerator(featureGenerators);
   }
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link DocumentCategorizerEventStream} via samples.
+   * {@link BagOfWordsFeatureGenerator} is used as feature generator.
    *
-   * @param samples {@link ObjectStream} of {@link DocumentSample}s
+   * @param samples {@link ObjectStream} of {@link DocumentSample samples}.
    */
   public DocumentCategorizerEventStream(ObjectStream<DocumentSample> samples) {
     super(samples);
-
-    mContextGenerator =
-        new DocumentCategorizerContextGenerator(new BagOfWordsFeatureGenerator());
+    mContextGenerator = new DocumentCategorizerContextGenerator(new BagOfWordsFeatureGenerator());
   }
 
   @Override
   protected Iterator<Event> createEvents(final DocumentSample sample) {
 
-    return new Iterator<Event>() {
+    return new Iterator<>() {
 
       private boolean isVirgin = true;
 
+      @Override
       public boolean hasNext() {
         return isVirgin;
       }
 
+      @Override
       public Event next() {
 
         isVirgin = false;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
index 9dc41d74..8d2155d7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
@@ -33,23 +33,18 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.TrainingParameters;
 
 /**
- * Maxent implementation of {@link DocumentCategorizer}.
+ * A Max-Ent based implementation of {@link DocumentCategorizer}.
  */
 public class DocumentCategorizerME implements DocumentCategorizer {
+  
+  private final DoccatModel model;
+  private final DocumentCategorizerContextGenerator mContextGenerator;
 
   /**
-   * Shared default thread safe feature generator.
-   */
-  private static FeatureGenerator defaultFeatureGenerator = new BagOfWordsFeatureGenerator();
-
-  private DoccatModel model;
-  private DocumentCategorizerContextGenerator mContextGenerator;
-
-  /**
-   * Initializes the current instance with a doccat model. Default feature
-   * generation is used.
+   * Initializes a {@link DocumentCategorizerME} instance with a doccat model.
+   * Default feature generation is used.
    *
-   * @param model the doccat model
+   * @param model the {@link DoccatModel} to be used for categorization.
    */
   public DocumentCategorizerME(DoccatModel model) {
     this.model = model;
@@ -58,34 +53,24 @@ public class DocumentCategorizerME implements DocumentCategorizer {
   }
 
   /**
-   * Categorize the given text provided as tokens along with
-   * the provided extra information
+   * Categorize the given {@code text} provided as tokens along with
+   * the provided extra information.
    *
-   * @param text text tokens to categorize
-   * @param extraInformation additional information
+   * @param text The text tokens to categorize.
+   * @param extraInformation Additional information for context to be used by the feature generator.
+   * @return The per category probabilities.
    */
   @Override
   public double[] categorize(String[] text, Map<String, Object> extraInformation) {
     return model.getMaxentModel().eval(
         mContextGenerator.getContext(text, extraInformation));
   }
-
-  /**
-   * Categorizes the given text.
-   *
-   * @param text the text to categorize
-   */
+  
   @Override
   public double[] categorize(String[] text) {
     return this.categorize(text, Collections.emptyMap());
   }
 
-  /**
-   * Returns a map in which the key is the category name and the value is the score
-   *
-   * @param text the input text to classify
-   * @return the score map
-   */
   @Override
   public Map<String, Double> scoreMap(String[] text) {
     Map<String, Double> probDist = new HashMap<>();
@@ -98,15 +83,7 @@ public class DocumentCategorizerME implements DocumentCategorizer {
     }
     return probDist;
   }
-
-  /**
-   * Returns a map with the score as a key in ascending order.
-   * The value is a Set of categories with the score.
-   * Many categories can have the same score, hence the Set as value
-   *
-   * @param text the input text to classify
-   * @return the sorted score map
-   */
+  
   @Override
   public SortedMap<Double, Set<String>> sortedScoreMap(String[] text) {
     SortedMap<Double, Set<String>> descendingMap = new TreeMap<>();
@@ -126,29 +103,44 @@ public class DocumentCategorizerME implements DocumentCategorizer {
     return descendingMap;
   }
 
+  @Override
   public String getBestCategory(double[] outcome) {
     return model.getMaxentModel().getBestOutcome(outcome);
   }
 
+  @Override
   public int getIndex(String category) {
     return model.getMaxentModel().getIndex(category);
   }
 
+  @Override
   public String getCategory(int index) {
     return model.getMaxentModel().getOutcome(index);
   }
 
+  @Override
   public int getNumberOfCategories() {
     return model.getMaxentModel().getNumOutcomes();
   }
 
+  @Override
   public String getAllResults(double[] results) {
     return model.getMaxentModel().getAllOutcomes(results);
   }
 
-  public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples,
-      TrainingParameters mlParams, DoccatFactory factory)
-          throws IOException {
+  /**
+   * Starts a training of a {@link DoccatModel} with the given parameters.
+   *
+   * @param lang The ISO conform language code.
+   * @param samples The {@link ObjectStream} of {@link DocumentSample} used as input for training.
+   * @param mlParams The {@link TrainingParameters} for the context of the training.
+   * @param factory The {@link DoccatFactory} for creating related objects defined via {@code mlParams}.
+   *
+   * @return A valid, trained {@link DoccatModel} instance.
+   * @throws IOException Thrown if IO errors occurred.
+   */
+  public static DoccatModel train(String lang, ObjectStream<DocumentSample> samples,
+      TrainingParameters mlParams, DoccatFactory factory) throws IOException {
 
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
@@ -158,6 +150,6 @@ public class DocumentCategorizerME implements DocumentCategorizer {
     MaxentModel model = trainer.train(
         new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators()));
 
-    return new DoccatModel(languageCode, model, manifestInfoEntries, factory);
+    return new DoccatModel(lang, model, manifestInfoEntries, factory);
   }
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
index 560a9b62..f06efff6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSample.java
@@ -36,10 +36,23 @@ public class DocumentSample implements Sample {
   private final List<String> text;
   private final Map<String, Object> extraInformation;
 
+  /**
+   * Initializes a {@link DocumentSample instance}.
+   *
+   * @param category The category to be used. Must not be {@code null}.
+   * @param text The plain text in a tokenized form. Must not be {@code null}.
+   */
   public DocumentSample(String category, String[] text) {
     this(category, text, null);
   }
 
+  /**
+   * Initializes a {@link DocumentSample instance}.
+   *
+   * @param category The category to be used.Must not be {@code null}.
+   * @param text The plain text in a tokenized form. Must not be {@code null}.
+   * @param extraInformation Additional information for context.
+   */
   public DocumentSample(String category, String[] text, Map<String, Object> extraInformation) {
     Objects.requireNonNull(text, "text must not be null");
 
@@ -53,14 +66,23 @@ public class DocumentSample implements Sample {
     }
   }
 
+  /**
+   * @return Retrieves the category.
+   */
   public String getCategory() {
     return category;
   }
 
+  /**
+   * @return Retrieves the text in a tokenized form.
+   */
   public String[] getText() {
     return text.toArray(new String[text.size()]);
   }
 
+  /**
+   * @return Retrieves contextual extra information.
+   */
   public Map<String, Object> getExtraInformation() {
     return extraInformation;
   }
@@ -69,7 +91,6 @@ public class DocumentSample implements Sample {
   public String toString() {
 
     StringBuilder sampleString = new StringBuilder();
-
     sampleString.append(category).append('\t');
 
     for (String s : text) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
index 9054eb7b..da8d9501 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentSampleStream.java
@@ -24,21 +24,31 @@ import opennlp.tools.util.FilterObjectStream;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * This class reads in string encoded training samples, parses them and
+ * Reads in string encoded training samples, parses them and
  * outputs {@link DocumentSample} objects.
  * <p>
  * Format:<br>
  * Each line contains one sample document.<br>
  * The category is the first string in the line followed by a tab and whitespace
- * separated document tokens.<br>
- * Sample line: category-string tab-char whitespace-separated-tokens line-break-char(s)<br>
+ * separated document tokens.
+ * <p>
+ * Sample line:
+ * {@code category-string tab-char whitespace-separated-tokens line-break-char(s)}
+ *
+ * @see DocumentSample
  */
 public class DocumentSampleStream extends FilterObjectStream<String, DocumentSample> {
 
+  /**
+   * Initializes a {@link DocumentSampleStream instance}.
+   *
+   * @param samples A plain text {@link ObjectStream line stream}.
+   */
   public DocumentSampleStream(ObjectStream<String> samples) {
     super(samples);
   }
 
+  @Override
   public DocumentSample read() throws IOException {
     String sampleString = samples.read();
 
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
index 2ed5a30a..ebb8fa0c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/FeatureGenerator.java
@@ -27,11 +27,11 @@ import java.util.Map;
 public interface FeatureGenerator {
 
   /**
-   * Extract features from given text fragments
+   * Extracts features from given {@code text} fragments.
    *
-   * @param text             the text fragments to extract features from
-   * @param extraInformation optional extra information to be used by the feature generator
-   * @return a collection of features
+   * @param text             The text fragments to extract features from
+   * @param extraInformation Optional extra information to be used by the {@link FeatureGenerator}.
+   * @return A collection of features.
    */
   Collection<String> extractFeatures(String[] text, Map<String, Object> extraInformation);
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java
index 15accdf9..54909064 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/doccat/NGramFeatureGenerator.java
@@ -27,7 +27,8 @@ import opennlp.tools.util.InvalidFormatException;
 
 /**
  * Generates ngram features for a document.
- * n-gram {@link FeatureGenerator}
+ *
+ * @see FeatureGenerator
  */
 public class NGramFeatureGenerator implements FeatureGenerator {
 
@@ -35,11 +36,11 @@ public class NGramFeatureGenerator implements FeatureGenerator {
   private final int maxGram;
 
   /**
-   * Constructor for ngrams.
+   * Instantiates an {@link NGramFeatureGenerator} instance with configurable ngram parameters.
    *
-   * @param minGram minGram value - which means minimum words in ngram features
-   * @param maxGram maxGram value - which means maximum words in ngram features
-   * @throws InvalidFormatException
+   * @param minGram The minimum words in ngram features.
+   * @param maxGram The maximum words in ngram features.
+   * @throws InvalidFormatException Thrown if parameter values are invalid or inconsistent.
    */
   public NGramFeatureGenerator(int minGram, int maxGram) throws InvalidFormatException {
     if (minGram > 0 && maxGram > 0) {
@@ -57,19 +58,15 @@ public class NGramFeatureGenerator implements FeatureGenerator {
   }
 
   /**
-   * Default constructor for Bi grams
+   * Instantiates an {@link NGramFeatureGenerator} instance with a Bi grams config.
+   *
+   * @throws InvalidFormatException Thrown if parameter values are invalid or inconsistent.
    */
   public NGramFeatureGenerator() throws InvalidFormatException {
     this(2, 2);
   }
 
-  /**
-   * Extract ngram features from given text fragments
-   *
-   * @param text      the text fragments to extract features from
-   * @param extraInfo optional extra information
-   * @return a collection of n gram features
-   */
+  @Override
   public Collection<String> extractFeatures(String[] text, Map<String, Object> extraInfo) {
     Objects.requireNonNull(text, "text must not be null");
     List<String> features = new ArrayList<>();