You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/12/10 14:19:30 UTC
[opennlp] branch master updated: OPENNLP-1406 Enhance JavaDoc in opennlp.tools.parser package (#449)
This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new 9cc6b59d OPENNLP-1406 Enhance JavaDoc in opennlp.tools.parser package (#449)
9cc6b59d is described below
commit 9cc6b59d49348ffb4133c6052beca53f2882e876
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Sat Dec 10 15:19:25 2022 +0100
OPENNLP-1406 Enhance JavaDoc in opennlp.tools.parser package (#449)
- adds missing JavaDoc
- improves existing documentation for clarity
- removes superfluous text
- removes orphaned (commented) code fragments in `..parser.treeinsert.ParserEventStream`
- fixes a missing variable assignment in `ParserCrossValidator` (a hidden bug)
- adds 'final' modifier where useful and applicable
- adds 'Override' annotation where useful and applicable
- fixes several typos
- corrects some inconsistencies in the `opennlp.tools.chunker` and `opennlp.tools.langdetect` package
---
.../tools/chunker/ChunkerCrossValidator.java | 2 +-
.../opennlp/tools/chunker/ChunkerEvaluator.java | 6 +-
.../java/opennlp/tools/chunker/ChunkerFactory.java | 16 +-
.../tools/langdetect/LanguageDetectorFactory.java | 4 +-
.../tools/parser/AbstractBottomUpParser.java | 95 +++++------
.../tools/parser/AbstractContextGenerator.java | 66 +++++---
.../tools/parser/AbstractParserEventStream.java | 20 ++-
.../tools/parser/ChunkContextGenerator.java | 18 +-
.../opennlp/tools/parser/ChunkSampleStream.java | 7 +-
.../src/main/java/opennlp/tools/parser/Cons.java | 4 +-
.../java/opennlp/tools/parser/Constituent.java | 5 +-
.../main/java/opennlp/tools/parser/GapLabeler.java | 13 +-
.../main/java/opennlp/tools/parser/HeadRules.java | 16 +-
.../src/main/java/opennlp/tools/parser/Parse.java | 188 +++++++++------------
.../opennlp/tools/parser/ParseSampleStream.java | 6 +
.../src/main/java/opennlp/tools/parser/Parser.java | 24 ++-
.../parser/ParserChunkerSequenceValidator.java | 5 +-
.../opennlp/tools/parser/ParserCrossValidator.java | 28 ++-
.../tools/parser/ParserEvaluationMonitor.java | 3 +
.../java/opennlp/tools/parser/ParserEvaluator.java | 43 ++---
.../opennlp/tools/parser/ParserEventTypeEnum.java | 3 +-
.../java/opennlp/tools/parser/ParserFactory.java | 28 +++
.../java/opennlp/tools/parser/ParserModel.java | 144 ++++++++++++++--
.../main/java/opennlp/tools/parser/ParserType.java | 7 +
.../java/opennlp/tools/parser/PosSampleStream.java | 8 +-
.../parser/chunking/BuildContextGenerator.java | 24 ++-
.../parser/chunking/CheckContextGenerator.java | 13 +-
.../java/opennlp/tools/parser/chunking/Parser.java | 92 +++++++---
.../tools/parser/chunking/ParserEventStream.java | 55 +++---
.../opennlp/tools/parser/lang/en/HeadRules.java | 37 ++--
.../parser/lang/es/AncoraSpanishHeadRules.java | 33 ++--
.../parser/treeinsert/AttachContextGenerator.java | 11 +-
.../parser/treeinsert/BuildContextGenerator.java | 16 +-
.../parser/treeinsert/CheckContextGenerator.java | 30 +++-
.../opennlp/tools/parser/treeinsert/Parser.java | 153 ++++++++++++-----
.../tools/parser/treeinsert/ParserEventStream.java | 54 +++---
.../java/opennlp/tools/util/BaseToolFactory.java | 3 +-
37 files changed, 845 insertions(+), 435 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
index 41b7c0d6..a18cd7d9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
@@ -37,7 +37,7 @@ public class ChunkerCrossValidator {
private final ChunkerFactory chunkerFactory;
/**
- * Initializes a {@link ChunkerModel} instance via given parameters.
+ * Initializes a {@link ChunkerCrossValidator} instance via given parameters.
*
* @param languageCode An ISO conform language code.
* @param factory The {@link ChunkerFactory} for creating related objects.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
index 3c8fa3ed..beca5bb4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
@@ -39,10 +39,10 @@ public class ChunkerEvaluator extends Evaluator<ChunkSample> {
private final Chunker chunker;
/**
- * Initializes the current instance with the given {@link Chunker}.
+ * Initializes a {@link ChunkerEvaluator} instance with the given {@link Chunker}.
*
- * @param chunker the {@link Chunker} to evaluate.
- * @param listeners the {@link ChunkerEvaluationMonitor evaluation listeners}.
+ * @param chunker The {@link Chunker} to evaluate.
+ * @param listeners The {@link ChunkerEvaluationMonitor evaluation listeners}.
*/
public ChunkerEvaluator(Chunker chunker, ChunkerEvaluationMonitor... listeners) {
super(listeners);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
index 961a7381..b3a8e28c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
@@ -32,6 +32,18 @@ public class ChunkerFactory extends BaseToolFactory {
public ChunkerFactory() {
}
+ /**
+ * Instantiates a {@link ChunkerFactory} via a given {@code subclassName}.
+ *
+ * @param subclassName The class name used for instantiation. If {@code null}, an
+ * instance of {@link ChunkerFactory} will be returned
+ * per default. Otherwise, the {@link ExtensionLoader} mechanism
+ * is applied to load the requested {@code subclassName}.
+ *
+ * @return A valid {@link ChunkerFactory} instance.
+ * @throws InvalidFormatException Thrown if the {@link ExtensionLoader} mechanism failed to
+ * create the factory associated with {@code subclassName}.
+ */
public static ChunkerFactory create(String subclassName)
throws InvalidFormatException {
if (subclassName == null) {
@@ -42,9 +54,7 @@ public class ChunkerFactory extends BaseToolFactory {
return ExtensionLoader.instantiateExtension(ChunkerFactory.class, subclassName);
} catch (Exception e) {
String msg = "Could not instantiate the " + subclassName
- + ". The initialization throw an exception.";
- System.err.println(msg);
- e.printStackTrace();
+ + ". The initialization threw an exception.";
throw new InvalidFormatException(msg, e);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
index a397f4ac..0c947120 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorFactory.java
@@ -65,6 +65,8 @@ public class LanguageDetectorFactory extends BaseToolFactory {
* is applied to load the requested {@code subclassName}.
*
* @return A valid {@link LanguageDetectorFactory} instance.
+ * @throws InvalidFormatException Thrown if the {@link ExtensionLoader} mechanism failed to
+ * create the factory associated with {@code subclassName}.
*/
public static LanguageDetectorFactory create(String subclassName)
throws InvalidFormatException {
@@ -79,7 +81,7 @@ public class LanguageDetectorFactory extends BaseToolFactory {
return theFactory;
} catch (Exception e) {
String msg = "Could not instantiate the " + subclassName
- + ". The initialization throw an exception.";
+ + ". The initialization threw an exception.";
throw new InvalidFormatException(msg, e);
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
index bcec9ddc..8bee7fb8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractBottomUpParser.java
@@ -74,17 +74,17 @@ public abstract class AbstractBottomUpParser implements Parser {
*/
public static final double defaultAdvancePercentage = 0.95;
- /**
+ /*
* Completed parses.
*/
- private SortedSet<Parse> completeParses;
+ private final SortedSet<Parse> completeParses;
- /**
+ /*
* Incomplete parses which will be advanced.
*/
private SortedSet<Parse> odh;
- /**
+ /*
* Incomplete parses which have been advanced.
*/
private SortedSet<Parse> ndh;
@@ -116,11 +116,6 @@ public abstract class AbstractBottomUpParser implements Parser {
*/
public static final String TOK_NODE = "TK";
- /**
- * The integer 0.
- */
- public static final Integer ZERO = 0;
-
/**
* Prefix for outcomes starting a constituent.
*/
@@ -190,7 +185,9 @@ public abstract class AbstractBottomUpParser implements Parser {
/**
* Specifies whether the parser should report when it was unable to find a parse for
* a particular sentence.
- * @param errorReporting If true then un-parsed sentences are reported, false otherwise.
+ *
+ * @param errorReporting {@code true} if un-parsed sentences should be reported,
+ * {@code false} otherwise.
*/
public void setErrorReporting(boolean errorReporting) {
this.reportFailedParse = errorReporting;
@@ -199,7 +196,8 @@ public abstract class AbstractBottomUpParser implements Parser {
/**
* Assigns parent references for the specified parse so that they
* are consistent with the children references.
- * @param p The parse whose parent references need to be assigned.
+ *
+ * @param p The {@link Parse} whose parent references need to be assigned.
*/
public static void setParents(Parse p) {
Parse[] children = p.getChildren();
@@ -210,13 +208,13 @@ public abstract class AbstractBottomUpParser implements Parser {
}
/**
- * Removes the punctuation from the specified set of chunks, adds it to the parses
- * adjacent to the punctuation is specified, and returns a new array of parses with
- * the punctuation removed.
+ * Removes the punctuation from the specified set of {@code chunks}, adds it to the
+ * parses adjacent to the punctuation is specified, and returns a new array of parses
+ * with the punctuation removed.
*
- * @param chunks A set of parses.
- * @param punctSet The set of punctuation which is to be removed.
- * @return An array of parses which is a subset of chunks with punctuation removed.
+ * @param chunks A set of {@link Parse parses}.
+ * @param punctSet The set of punctuation to be removed.
+ * @return Array of {@link Parse parses} which is a subset of chunks with punctuation removed.
*/
public static Parse[] collapsePunctuation(Parse[] chunks, Set<String> punctSet) {
List<Parse> collapsedParses = new ArrayList<>(chunks.length);
@@ -251,21 +249,23 @@ public abstract class AbstractBottomUpParser implements Parser {
/**
- * Advances the specified parse and returns the an array advanced parses whose
+ * Advances the specified {@link Parse} and returns the an array advanced parses whose
* probability accounts for more than the specified amount of probability mass.
*
- * @param p The parse to advance.
+ * @param p The {@link Parse} to advance.
* @param probMass The amount of probability mass that should be accounted for
* by the advanced parses.
*/
protected abstract Parse[] advanceParses(final Parse p, double probMass);
/**
- * Adds the "TOP" node to the specified parse.
- * @param p The complete parse.
+ * Adds the {@link #TOP_NODE} to the specified parse.
+ *
+ * @param p The complete {@link Parse}.
*/
protected abstract void advanceTop(Parse p);
+ @Override
public Parse[] parse(Parse tokens, int numParses) {
if (createDerivationString) tokens.setDerivation(new StringBuffer(100));
odh.clear();
@@ -367,6 +367,7 @@ public abstract class AbstractBottomUpParser implements Parser {
}
}
+ @Override
public Parse parse(Parse tokens) {
if (tokens.getChildCount() > 0) {
@@ -380,10 +381,11 @@ public abstract class AbstractBottomUpParser implements Parser {
}
/**
- * Returns the top chunk sequences for the specified parse.
- * @param p A pos-tag assigned parse.
+ * Returns the top chunk sequences for the specified {@link Parse}.
+ *
+ * @param p A pos-tag assigned {@link Parse}.
* @param minChunkScore A minimum score below which chunks should not be advanced.
- * @return The top chunk assignments to the specified parse.
+ * @return The top chunk assignments to the specified {@link Parse}.
*/
protected Parse[] advanceChunks(final Parse p, double minChunkScore) {
// chunk
@@ -408,7 +410,6 @@ public abstract class AbstractBottomUpParser implements Parser {
int start = -1;
int end = 0;
String type = null;
- //System.err.print("sequence "+si+" ");
for (int j = 0; j <= tags.length; j++) {
// if (j != tags.length) {System.err.println(words[j]+" "
// +ptags[j]+" "+tags[j]+" "+probs.get(j));}
@@ -421,7 +422,6 @@ public abstract class AbstractBottomUpParser implements Parser {
}
else { //make previous constituent if it exists
if (type != null) {
- //System.err.println("inserting tag "+tags[j]);
Parse p1 = p.getChildren()[start];
Parse p2 = p.getChildren()[end];
// System.err.println("Putting "+type+" at "+start+","+end+" for "
@@ -460,8 +460,9 @@ public abstract class AbstractBottomUpParser implements Parser {
}
/**
- * Advances the parse by assigning it POS tags and returns multiple tag sequences.
- * @param p The parse to be tagged.
+ * Advances the {@link Parse} by assigning it POS tags and returns multiple tag sequences.
+ *
+ * @param p The {@link Parse} to be tagged.
* @return Parses with different POS-tag sequence assignments.
*/
protected Parse[] advanceTags(final Parse p) {
@@ -480,7 +481,6 @@ public abstract class AbstractBottomUpParser implements Parser {
if (createDerivationString) newParses[i].getDerivation().append(i).append(".");
for (int j = 0; j < words.length; j++) {
Parse word = children[j];
- //System.err.println("inserting tag "+tags[j]);
double prob = probs[j];
newParses[i].insert(new Parse(word.getText(), word.getSpan(), tags[j], prob,j));
newParses[i].addProb(StrictMath.log(prob));
@@ -490,11 +490,12 @@ public abstract class AbstractBottomUpParser implements Parser {
}
/**
- * Determines the mapping between the specified index into the specified parses without punctuation to
- * the corresponding index into the specified parses.
- * @param index An index into the parses without punctuation.
- * @param nonPunctParses The parses without punctuation.
- * @param parses The parses wit punctuation.
+ * Determines the mapping between the specified {@code index} into the specified {@link Parse parses}
+ * without punctuation to the corresponding index into the specified {@link Parse parses}.
+ *
+ * @param index An index into the {@link Parse parses} without punctuation.
+ * @param nonPunctParses The {@link Parse parses} without punctuation.
+ * @param parses The {@link Parse parses} with punctuation.
* @return An index into the specified parses which corresponds to the same node the specified index
* into the parses with punctuation.
*/
@@ -516,14 +517,15 @@ public abstract class AbstractBottomUpParser implements Parser {
}
/**
- * Creates a n-gram dictionary from the specified data stream using the specified
+ * Creates a n-gram {@link Dictionary} from the specified data stream using the specified
* head rule and specified cut-off.
*
- * @param data The data stream of parses.
- * @param rules The head rules for the parses.
- * @param params can contain a cutoff, the minimum number of entries required for the
- * n-gram to be saved as part of the dictionary.
- * @return A dictionary object.
+ * @param data The data stream of {@link Parse parses}.
+ * @param rules The {@link HeadRules} for the parses.
+ * @param params The {@link TrainingParameters} which can contain a {@code cutoff},
+ * the minimum number of entries required for the n-gram to be saved as
+ * part of the {@link Dictionary}.
+ * @return A {@link Dictionary} instance.
*/
public static Dictionary buildDictionary(ObjectStream<Parse> data, HeadRules rules,
TrainingParameters params) throws IOException {
@@ -542,7 +544,7 @@ public abstract class AbstractBottomUpParser implements Parser {
}
mdict.add(new StringList(words), 1, 1);
- //add tri-grams and bi-grams for inital sequence
+ //add tri-grams and bi-grams for initial sequence
Parse[] chunks = collapsePunctuation(ParserEventStream.getInitialChunks(p),
rules.getPunctuationTags());
String[] cwords = new String[chunks.length];
@@ -594,20 +596,19 @@ public abstract class AbstractBottomUpParser implements Parser {
ci++;
}
}
- //System.err.println("gas,and="+mdict.getCount((new TokenList(new String[] {"gas","and"}))));
mdict.cutoff(cutoff, Integer.MAX_VALUE);
return mdict.toDictionary(true);
}
/**
- * Creates a n-gram dictionary from the specified data stream using the specified
- * head rule and specified cut-off.
+ * Creates a n-gram {@link Dictionary} from the specified data stream using {@link HeadRules}
+ * and specified cut-off.
*
- * @param data The data stream of parses.
- * @param rules The head rules for the parses.
+ * @param data The data stream of {@link Parse parses}.
+ * @param rules The {@link HeadRules} for the {@link Parse parses}.
* @param cutoff The minimum number of entries required for the n-gram to be
* saved as part of the dictionary.
- * @return A dictionary object.
+ * @return A {@link Dictionary} instance.
*/
public static Dictionary buildDictionary(ObjectStream<Parse> data, HeadRules rules, int cutoff)
throws IOException {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
index 0948a07b..e72525ef 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractContextGenerator.java
@@ -36,24 +36,26 @@ public abstract class AbstractContextGenerator {
/**
* Creates punctuation feature for the specified punctuation at the specified index
- * based on the punctuation mark.
+ * {@code i} based on the {@code punctuation} mark.
+ *
* @param punct The punctuation which is in context.
- * @param i The index of the punctuation with relative to the parse.
- * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index.
+ * @param i The index of the {@code punctuation} with relative to the parse.
+ * @return Punctuation feature for the parse and the punctuation at the specified index.
*/
protected String punct(Parse punct, int i) {
- return String.valueOf(i) + "=" + punct.getCoveredText();
+ return i + "=" + punct.getCoveredText();
}
/**
- * Creates punctuation feature for the specified punctuation at the specfied index
- * based on the punctuation's tag.
+ * Creates punctuation feature for the specified punctuation at the specified index
+ * {@code i} based on the {@code punctuation}'s tag.
+ *
* @param punct The punctuation which is in context.
- * @param i The index of the punctuation relative to the parse.
- * @return Punctuation feature for the specified parse and the specified punctuation at the specfied index.
+ * @param i The index of the {@code punctuation} relative to the parse.
+ * @return Punctuation feature for the parse and the punctuation at the specified index.
*/
protected String punctbo(Parse punct, int i) {
- return String.valueOf(i) + "=" + punct.getType();
+ return i + "=" + punct.getType();
}
protected String cons(Parse p, int i) {
@@ -88,10 +90,13 @@ public abstract class AbstractContextGenerator {
/**
* Generates a string representing the grammar rule production that the specified parse
- * is starting. The rule is of the form p.type -> c.children[0..n].type.
- * @param p The parse which stats teh production.
+ * is starting.
+ * <p>
+ * The rule is of the form {@code p.type -> c.children[0..n].type}.
+ *
+ * @param p The {@link Parse} which stats the production.
* @param includePunctuation Whether punctuation should be included in the production.
- * @return a string representing the grammar rule production that the specified parse
+ * @return A string representing the grammar rule production that the specified parse
* is starting.
*/
protected String production(Parse p, boolean includePunctuation) {
@@ -150,17 +155,19 @@ public abstract class AbstractContextGenerator {
/**
* Creates cons features involving the 3 specified nodes and adds them to the specified feature list.
+ *
* @param features The list of features.
- * @param c0 The first node.
- * @param c1 The second node.
- * @param c2 The third node.
- * @param punct1s The punctuation between the first and second node.
- * @param punct2s The punctuation between the second and third node.
- * @param trigram Specifies whether lexical tri-gram features between these nodes should be generated.
- * @param bigram1 Specifies whether lexical bi-gram features between the first and second
- * node should be generated.
- * @param bigram2 Specifies whether lexical bi-gram features between the second and third
- * node should be generated.
+ * @param c0 The first {@link Cons node}.
+ * @param c1 The second {@link Cons node}.
+ * @param c2 The third {@link Cons node}.
+ * @param punct1s The punctuation between {@code c0} and {@code c1}.
+ * @param punct2s The punctuation between {@code c1} and {@code c2}.
+ * @param trigram Specifies whether lexical tri-gram features between these nodes
+ * should be generated.
+ * @param bigram1 Specifies whether lexical bi-gram features between {@code c0} and {@code c1}
+ * should be generated.
+ * @param bigram2 Specifies whether lexical bi-gram features between {@code c1} and {@code c2}
+ * should be generated.
*/
protected void cons3(List<String> features, Cons c0, Cons c1, Cons c2, Collection<Parse> punct1s,
Collection<Parse> punct2s, boolean trigram, boolean bigram1, boolean bigram2) {
@@ -298,9 +305,10 @@ public abstract class AbstractContextGenerator {
}
/**
- * Generates features for nodes surrounding a completed node of the specified type.
- * @param node A surrounding node.
- * @param i The index of the surrounding node with respect to the completed node.
+ * Generates features for nodes surrounding a completed node of the specified {@code type}.
+ *
+ * @param node A surrounding {@link Parse node}.
+ * @param i The index of the surrounding {@code node} with respect to the completed node.
* @param type The type of the completed node.
* @param punctuation The punctuation adjacent and between the specified surrounding node.
* @param features A list to which features are added.
@@ -358,8 +366,9 @@ public abstract class AbstractContextGenerator {
/**
* Produces features to determine whether the specified child node is part of
* a complete constituent of the specified type and adds those features to the
- * specfied list.
- * @param child The parse node to consider.
+ * specified list.
+ *
+ * @param child The {@link Parse node} to consider.
* @param i A string indicating the position of the child node.
* @param type The type of constituent being built.
* @param features List to add features to.
@@ -397,7 +406,8 @@ public abstract class AbstractContextGenerator {
/**
* Populates specified nodes array with left-most right frontier
* node with a unique head. If the right frontier doesn't contain
- * enough nodes, then nulls are placed in the array elements.
+ * enough nodes, then {@code nulls} are placed in the array elements.
+ *
* @param rf The current right frontier.
* @param nodes The array to be populated.
*/
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java
index 4716fd5e..6a9f8109 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/AbstractParserEventStream.java
@@ -127,10 +127,11 @@ public abstract class AbstractParserEventStream extends opennlp.tools.util.Abstr
}
/**
- * Produces all events for the specified sentence chunks
- * and adds them to the specified list.
- * @param newEvents A list of events to be added to.
- * @param chunks Pre-chunked constituents of a sentence.
+ * Produces all events for the specified sentence {@code chunks}
+ * and adds them to the specified {@code newEvents} list.
+ *
+ * @param newEvents A list of {@link Event events} to be added to.
+ * @param chunks Pre-chunked {@link Parse constituents} of a sentence.
*/
protected abstract void addParseEvents(List<Event> newEvents, Parse[] chunks);
@@ -188,10 +189,13 @@ public abstract class AbstractParserEventStream extends opennlp.tools.util.Abstr
}
/**
- * Returns true if the specified child is the last child of the specified parent.
- * @param child The child parse.
- * @param parent The parent parse.
- * @return true if the specified child is the last child of the specified parent; false otherwise.
+ * Returns {@code true} if the {@link Parse child} is the last child of the specified
+ * {@link Parse parent}.
+ *
+ * @param child The child {@link Parse}.
+ * @param parent The parent {@link Parse}.
+ * @return {@code true} if the specified child is the last child of the specified parent,
+ * {@code false} otherwise.
*/
protected boolean lastChild(Parse child, Parse parent) {
Parse[] kids = AbstractBottomUpParser.collapsePunctuation(parent.getChildren(),punctSet);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index e32cead8..076ee68e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -25,7 +25,7 @@ import opennlp.tools.util.Cache;
import opennlp.tools.util.TokenTag;
/**
- * Creates predivtive context for the pre-chunking phases of parsing.
+ * Creates predictive context for the pre-chunking phases of parsing.
*/
public class ChunkContextGenerator implements ChunkerContextGenerator {
@@ -33,11 +33,18 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
private Cache<String, String[]> contextsCache;
private Object wordsKey;
-
+ /**
+ * Initializes a {@link ChunkContextGenerator} instance.
+ */
public ChunkContextGenerator() {
this(0);
}
+ /**
+ * Initializes a {@link ChunkContextGenerator} instance with a custom {@code cacheSize}.
+ *
+ * @param cacheSize The cache size. Must be greater than {@code 0} to have an effect.
+ */
public ChunkContextGenerator(int cacheSize) {
super();
if (cacheSize > 0) {
@@ -45,17 +52,24 @@ public class ChunkContextGenerator implements ChunkerContextGenerator {
}
}
+ /**
+ * @deprecated Use {@link #getContext(int, String[], String[], String[])} instead.
+ */
@Deprecated
public String[] getContext(Object o) {
Object[] data = (Object[]) o;
return getContext((Integer) data[0], (String[]) data[1], (String[]) data[2], (String[]) data[3]);
}
+ /**
+ * @deprecated Use {@link #getContext(int, String[], String[], String[])} instead.
+ */
@Deprecated
public String[] getContext(int i, String[] words, String[] prevDecisions, Object[] ac) {
return getContext(i,words,(String[]) ac[0],prevDecisions);
}
+ @Override
public String[] getContext(int i, String[] words, String[] tags, String[] preds) {
List<String> features = new ArrayList<>(19);
int x_2 = i - 2;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java
index 675ac233..e7ee6634 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkSampleStream.java
@@ -28,6 +28,11 @@ import opennlp.tools.util.ObjectStream;
public class ChunkSampleStream extends FilterObjectStream<Parse, ChunkSample> {
+ /**
+ * Initializes a {@link ChunkSampleStream instance}.
+ *
+ * @param in A {@link ObjectStream stream} used as input.
+ */
public ChunkSampleStream(ObjectStream<Parse> in) {
super(in);
}
@@ -62,8 +67,8 @@ public class ChunkSampleStream extends FilterObjectStream<Parse, ChunkSample> {
return chunks.toArray(new Parse[chunks.size()]);
}
+ @Override
public ChunkSample read() throws IOException {
-
Parse parse = samples.read();
if (parse != null) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Cons.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Cons.java
index 2bc7491f..533e7517 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Cons.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Cons.java
@@ -18,7 +18,9 @@
package opennlp.tools.parser;
/**
- * Class to hold feature information about a specific parse node.
+ * Holds feature information about a specific {@link Parse} node.
+ *
+ * @see Parse
*/
public class Cons {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Constituent.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Constituent.java
index 006864ab..2b042090 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Constituent.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Constituent.java
@@ -15,13 +15,14 @@
* limitations under the License.
*/
-
package opennlp.tools.parser;
import opennlp.tools.util.Span;
/**
- * Class used to hold constituents when reading parses.
+ * Holds constituents when reading {@link Parse parses}.
+ *
+ * @see Parse
*/
public class Constituent {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/GapLabeler.java b/opennlp-tools/src/main/java/opennlp/tools/parser/GapLabeler.java
index dbc8c45f..f8e84473 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/GapLabeler.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/GapLabeler.java
@@ -21,13 +21,18 @@ package opennlp.tools.parser;
import java.util.Stack;
/**
- * Interface for labeling nodes which contain traces so that these traces can be predicted
- * by the parser.
+ * Represents a labeler for nodes which contain traces so that these traces can be predicted
+ * by a {@link Parser}.
+ *
+ * @see Parser
*/
public interface GapLabeler {
+
/**
- * Labels the constituents found in the stack with gap labels if appropriate.
- * @param stack The stack of un-completed constituents.
+ * Labels {@link Constituent constituents} found in the {@code stack} with gap labels
+ * if appropriate.
+ *
+ * @param stack The {@link Stack} of un-completed {@link Constituent constituents}.
*/
void labelGaps(Stack<Constituent> stack);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
index bf4c0699..10f6911c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/HeadRules.java
@@ -21,23 +21,23 @@ package opennlp.tools.parser;
import java.util.Set;
/**
- * Interface for encoding the head rules associated with parsing.
+ * Encoder for head rules associated with parsing.
*/
public interface HeadRules {
/**
- * Returns the head constituent for the specified constituents of the specified type.
+ * Retrieves the head {@link Parse constituent} for the specified constituents of given {@code type}.
*
- * @param constituents The constituents which make up a constituent of the specified type.
- * @param type The type of a constituent which is made up of the specified constituents.
- * @return The constituent which is the head.
+ * @param constituents The {@link Parse constituents} which make up a constituent of the
+ * specified {@code type}.
+ * @param type The type of a constituent which is made up of the {@code constituents}.
+ * @return The {@link Parse constituent} which represents the head.
*/
Parse getHead(Parse[] constituents, String type);
/**
- * Returns the set of punctuation tags. Attachment decisions for these tags will not be modeled.
- *
- * @return the set of punctuation tags.
+ * @return Retrieves the set of punctuation tags.
+ * Attachment decisions for these tags will not be modeled.
*/
Set<String> getPunctuationTags();
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
index af2924ab..185ec93f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parse.java
@@ -48,7 +48,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
* The text string on which this parse is based.
* This object is shared among all parses for the same sentence.
*/
- private String text;
+ private final String text;
/**
* The character offsets into the text for this constituent.
@@ -106,17 +106,17 @@ public class Parse implements Cloneable, Comparable<Parse> {
* The pattern used to find the base constituent label of a
* Penn Treebank labeled constituent.
*/
- private static Pattern typePattern = Pattern.compile("^([^ =-]+)");
+ private static final Pattern typePattern = Pattern.compile("^([^ =-]+)");
/**
* The pattern used to find the function tags.
*/
- private static Pattern funTypePattern = Pattern.compile("^[^ =-]+-([^ =-]+)");
+ private static final Pattern funTypePattern = Pattern.compile("^[^ =-]+-([^ =-]+)");
/**
* The patter used to identify tokens in Penn Treebank labeled constituents.
*/
- private static Pattern tokenPattern = Pattern.compile("^[^ ()]+ ([^ ()]+)\\s*\\)");
+ private static final Pattern tokenPattern = Pattern.compile("^[^ ()]+ ([^ ()]+)\\s*\\)");
/**
* The set of punctuation parses which are between this parse and the previous parse.
@@ -136,13 +136,13 @@ public class Parse implements Cloneable, Comparable<Parse> {
private static boolean useFunctionTags;
/**
- * Creates a new parse node for this specified text and span of the specified type
- * with the specified probability and the specified head index.
+ * Initializes a {@link Parse node} for this specified {@code text} and {@code span} of the
+ * specified {@code type} with probability {@code p} and the head {@code index}.
*
* @param text The text of the sentence for which this node is a part of.
- * @param span The character offsets for this node within the specified text.
+ * @param span The {@link Span character offsets} for this node within the specified {@code text}.
* @param type The constituent label of this node.
- * @param p The probability of this parse.
+ * @param p The probability of this {@link Parse}.
* @param index The token index of the head of this parse.
*/
public Parse(String text, Span span, String type, double p, int index) {
@@ -158,11 +158,11 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Creates a new parse node for this specified text and span of the specified type with
- * the specified probability and the specified head and head index.
+ * Initializes a {@link Parse node} for this specified {@code text} and {@code span} of the
+ * specified {@code type} with probability {@code p} and the head {@code index}.
*
* @param text The text of the sentence for which this node is a part of.
- * @param span The character offsets for this node within the specified text.
+ * @param span The {@link Span character offsets} for this node within the specified {@code text}.
* @param type The constituent label of this node.
* @param p The probability of this parse.
* @param h The head token of this parse.
@@ -190,9 +190,9 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Clones the right frontier of parse up to the specified node.
+ * Clones the right frontier of {@link Parse} up to the specified {@code node}.
*
- * @param node The last node in the right frontier of the parse tree which should be cloned.
+ * @param node The last {@code node} in the right frontier of the parse tree to be cloned.
* @return A clone of this parse and its right frontier up to and including the specified node.
*/
public Parse clone(Parse node) {
@@ -208,10 +208,10 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Clones the right frontier of this root parse up to and including the specified node.
+ * Clones the right frontier of this root {@link Parse} up to and including the specified node.
*
- * @param node The last node in the right frontier of the parse tree which should be cloned.
- * @param parseIndex The child index of the parse for this root node.
+ * @param node The last {@code node} in the right frontier of the parse tree to be cloned.
+ * @param parseIndex The child index of the parse for this root {@code node}.
* @return A clone of this root parse and its right frontier up to and including the specified node.
*/
public Parse cloneRoot(Parse node, int parseIndex) {
@@ -224,7 +224,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Specifies whether function tags should be included as part of the constituent type.
*
- * @param uft true is they should be included; false otherwise.
+ * @param uft {@code true} is they should be included, {@code false} otherwise.
*/
public static void useFunctionTags(boolean uft) {
useFunctionTags = uft;
@@ -241,18 +241,15 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the constituent label for this node of the parse.
- *
- * @return The constituent label for this node of the parse.
+ * @return Retrieves the constituent label for this node of the parse.
*/
public String getType() {
return type;
}
/**
- * Returns the set of punctuation parses that occur immediately before this parse.
- *
- * @return the set of punctuation parses that occur immediately before this parse.
+ * @return Retrieves the set of punctuation {@link Parse parses} that occur
+ * immediately before this parse.
*/
public Collection<Parse> getPreviousPunctuationSet() {
return prevPunctSet;
@@ -261,7 +258,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Designates that the specified punctuation should is prior to this parse.
*
- * @param punct The punctuation.
+ * @param punct The {@link Parse punctuation} to be added.
*/
public void addPreviousPunctuation(Parse punct) {
if (this.prevPunctSet == null) {
@@ -271,9 +268,8 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the set of punctuation parses that occur immediately after this parse.
- *
- * @return the set of punctuation parses that occur immediately after this parse.
+ * @return Retrieves the set of punctuation {@link Parse parses} that occur
+ * immediately after this parse.
*/
public Collection<Parse> getNextPunctuationSet() {
return nextPunctSet;
@@ -282,7 +278,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Designates that the specified punctuation follows this parse.
*
- * @param punct The punctuation set.
+ * @param punct The {@link Parse punctuation} set.
*/
public void addNextPunctuation(Parse punct) {
if (this.nextPunctSet == null) {
@@ -292,28 +288,28 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Sets the set of punctuation tags which follow this parse.
+ * Sets the {@link Parse punctuation tags} which follow this parse.
*
- * @param punctSet The set of punctuation tags which follow this parse.
+ * @param punctSet The punctuation tags which follow this parse.
*/
public void setNextPunctuation(Collection<Parse> punctSet) {
this.nextPunctSet = punctSet;
}
/**
- * Sets the set of punctuation tags which preceed this parse.
+ * Sets the {@link Parse punctuation tags} which precede this parse.
*
- * @param punctSet The set of punctuation tags which preceed this parse.
+ * @param punctSet The punctuation tags which precede this parse.
*/
public void setPrevPunctuation(Collection<Parse> punctSet) {
this.prevPunctSet = punctSet;
}
/**
- * Inserts the specified constituent into this parse based on its text span.This
- * method assumes that the specified constituent can be inserted into this parse.
+ * Inserts the specified constituent into this parse based on its text span.
+ * This method assumes that the specified constituent can be inserted into this parse.
*
- * @param constituent The constituent to be inserted.
+ * @param constituent The {@link Parse constituent} to be inserted.
*/
public void insert(final Parse constituent) {
Span ic = constituent.span;
@@ -356,9 +352,9 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Appends the specified string buffer with a string representation of this parse.
+ * Fills the specified {@link StringBuffer} with a string representation of this parse.
*
- * @param sb A string buffer into which the parse string can be appended.
+ * @param sb A {@link StringBuffer} into which the parse string can be appended.
*/
public void show(StringBuffer sb) {
int start;
@@ -388,7 +384,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Displays this parse using Penn Treebank-style formatting.
+ * Prints this parse using Penn Treebank-style formatting.
*/
public void show() {
StringBuffer sb = new StringBuffer(text.length() * 4);
@@ -396,11 +392,9 @@ public class Parse implements Cloneable, Comparable<Parse> {
System.out.println(sb);
}
-
/**
- * Returns the probability associated with the pos-tag sequence assigned to this parse.
- *
- * @return The probability associated with the pos-tag sequence assigned to this parse.
+ * @return Retrieves the probability associated with the pos-tag sequence assigned
+ * to this parse.
*/
public double getTagSequenceProb() {
//System.err.println("Parse.getTagSequenceProb: "+type+" "+this);
@@ -422,9 +416,8 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns whether this parse is complete.
- *
- * @return Returns true if the parse contains a single top-most node.
+ * @return {@code true} if the parse contains a single top-most node (=complete),
+ * {@code false} otherwise.
*/
public boolean complete() {
return (parts.size() == 1);
@@ -435,7 +428,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Represents this parse in a human readable way.
+ * Represents this {@link Parse} in a human-readable way.
*/
@Override
public String toString() {
@@ -449,28 +442,21 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the text of the sentence over which this parse was formed.
- *
- * @return The text of the sentence over which this parse was formed.
+ * @return Retrieves the text of the sentence over which this parse was formed.
*/
public String getText() {
return text;
}
/**
- * Returns the character offsets for this constituent.
- *
- * @return The character offsets for this constituent.
+ * @return Retrieves the {@link Span character offsets} for this constituent.
*/
public Span getSpan() {
return span;
}
/**
- * Returns the log of the product of the probability associated with all the
- * decisions which formed this constituent.
- *
- * @return The log of the product of the probability associated with all the
+ * @return Retrieves the {@code log} of the product of the probability associated with all the
* decisions which formed this constituent.
*/
public double getProb() {
@@ -478,7 +464,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Adds the specified probability log to this current log for this parse.
+ * Adds the specified {@code logProb} to this current log for this parse.
*
* @param logProb The probability of an action performed on this parse.
*/
@@ -487,9 +473,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the child constituents of this constituent
- * .
- * @return The child constituents of this constituent.
+ * @return Retrieves the {@link Parse child constituents} of this constituent.
*/
public Parse[] getChildren() {
return parts.toArray(new Parse[parts.size()]);
@@ -542,11 +526,11 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Sister adjoins this node's last child and the specified sister node and returns their
- * new parent node. The new parent node replace this nodes last child.
+ * new parent node. The new parent node replace this node's last child.
*
- * @param sister The node to be adjoined.
- * @param rules The head rules for the parser.
- * @return The new parent node of this node and the specified sister node.
+ * @param sister The {@link Parse node} to be adjoined.
+ * @param rules The {@link HeadRules} for the parser.
+ * @return The new {@link Parse parent node} of this node and the specified sister node.
*/
public Parse adjoin(Parse sister, HeadRules rules) {
Parse lastChild = parts.get(parts.size() - 1);
@@ -587,45 +571,38 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the number of children for this parse node.
- *
- * @return the number of children for this parse node.
+ * @return Retrieves the number of children for this parse node.
*/
public int getChildCount() {
return parts.size();
}
/**
- * Returns the index of this specified child.
- *
* @param child A child of this parse.
*
- * @return the index of this specified child or -1 if the specified child is not a child of this parse.
+ * @return Retrieves the index of this specified child or {@code -1}
+ * if the specified child is not a child of this parse.
*/
public int indexOf(Parse child) {
return parts.indexOf(child);
}
/**
- * Returns the head constituent associated with this constituent.
- *
- * @return The head constituent associated with this constituent.
+ * @return Retrieves the head constituent associated with this constituent.
*/
public Parse getHead() {
return head;
}
/**
- * Returns the index within a sentence of the head token for this parse.
- *
- * @return The index within a sentence of the head token for this parse.
+ * @return Retrieves the index within a sentence of the head token for this parse.
*/
public int getHeadIndex() {
return headIndex;
}
/**
- * Returns the label assigned to this parse node during parsing
+ * Retrieves the label assigned to this parse node during parsing
* which specifies how this node will be formed into a constituent.
*
* @return The outcome label assigned to this node during parsing.
@@ -635,7 +612,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Assigns this parse the specified label. This is used by parsing schemes to
+ * Assigns this parse the specified label. This is used by parsing schemes to
* tag parsing nodes while building.
*
* @param label A label indicating something about the stage of building for this parse node.
@@ -731,13 +708,10 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the string containing the token for the specified portion of the parse string or
- * null if the portion of the parse string does not represent a token.
- *
* @param rest The portion of the parse string remaining to be processed.
*
- * @return The string containing the token for the specified portion of the parse string or
- * null if the portion of the parse string does not represent a token.
+ * @return Retrieves the string containing the token for the specified portion of the parse
+ * string or {@code null} if the portion of the parse string does not represent a token.
*/
private static String getToken(String rest) {
Matcher tokenMatcher = tokenPattern.matcher(rest);
@@ -751,7 +725,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
* Computes the head parses for this parse and its sub-parses and stores this information
* in the parse data structure.
*
- * @param rules The head rules which determine how the head of the parse is computed.
+ * @param rules The {@link HeadRules} which determine how the head of the parse is computed.
*/
public void updateHeads(HeadRules rules) {
if (parts != null && parts.size() != 0) {
@@ -778,7 +752,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Prune the specified sentence parse of vacuous productions.
*
- * @param parse
+ * @param parse The sentence {@link Parse}.
*/
public static void pruneParse(Parse parse) {
List<Parse> nodes = new LinkedList<>();
@@ -822,24 +796,25 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
- * Parses the specified tree-bank style parse string and return a Parse structure for that string.
+ * Parses the specified tree-bank style parse string and return a {@link Parse} structure
+ * for that string.
*
- * @param parse A tree-bank style parse string.
+ * @param parse A tree-bank style {@link Parse} string.
*
- * @return a Parse structure for the specified tree-bank style parse string.
+ * @return A {@link Parse} structure for the specified tree-bank style parse string.
*/
public static Parse parseParse(String parse) {
return parseParse(parse,null);
}
/**
- * Parses the specified tree-bank style parse string and return a Parse structure
+ * Parses the specified tree-bank style {@link Parse} string and return a {@link Parse} structure
* for that string.
*
- * @param parse A tree-bank style parse string.
- * @param gl The gap labeler.
+ * @param parse A tree-bank style {@link Parse} string.
+ * @param gl The {@link GapLabeler} to be used.
*
- * @return a Parse structure for the specified tree-bank style parse string.
+ * @return A {@link Parse} structure for the specified tree-bank style parse string.
*/
public static Parse parseParse(String parse, GapLabeler gl) {
StringBuilder text = new StringBuilder();
@@ -896,9 +871,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the parent parse node of this constituent.
- *
- * @return The parent parse node of this constituent.
+ * @return Retrieves the parent parse node of this constituent.
*/
public Parse getParent() {
return parent;
@@ -916,7 +889,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Indicates whether this parse node is a pos-tag.
*
- * @return true if this node is a pos-tag, false otherwise.
+ * @return {@code true} if this node is a pos-tag, {@code false} otherwise.
*/
public boolean isPosTag() {
return (parts.size() == 1 &&
@@ -924,9 +897,9 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns true if this constituent contains no sub-constituents.
+ * Indicates whether this parse node contains no sub-constituents.
*
- * @return true if this constituent contains no sub-constituents; false otherwise.
+ * @return {@code true} if this constituent contains no sub-constituents; {@code false} otherwise.
*/
public boolean isFlat() {
boolean flat = true;
@@ -945,9 +918,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the parse nodes which are children of this node and which are pos tags.
- *
- * @return the parse nodes which are children of this node and which are pos tags.
+ * @return Retrieves the parse nodes which are children of this node and which are pos tags.
*/
public Parse[] getTagNodes() {
List<Parse> tags = new LinkedList<>();
@@ -982,7 +953,7 @@ public class Parse implements Cloneable, Comparable<Parse> {
/**
* Returns the deepest shared parent of this node and the specified node.
* If the nodes are identical then their parent is returned.
- * If one node is the parent of the other then the parent node is returned.
+ * If one node is the parent of the other than the parent node is returned.
*
* @param node The node from which parents are compared to this node's parents.
*
@@ -1034,9 +1005,8 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Returns the derivation string for this parse if one has been created.
- *
- * @return the derivation string for this parse or null if no derivation string has been created.
+ * @return Retrieves the derivation string for this parse or {@code null}
+ * if no derivation string has been created.
*/
public StringBuffer getDerivation() {
return derivation;
@@ -1078,11 +1048,11 @@ public class Parse implements Cloneable, Comparable<Parse> {
}
/**
- * Utility method to inserts named entities.
+ * Utility method to insert named entities.
*
- * @param tag
- * @param names
- * @param tokens
+ * @param tag A token representing a tag.
+ * @param names An array of {@link Span names}.
+ * @param tokens An array of {@link Parse tokens}.
*/
public static void addNames(String tag, Span[] names, Parse[] tokens) {
for (Span nameTokenSpan : names) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParseSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParseSampleStream.java
index 4bd27775..82cfe0a7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParseSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParseSampleStream.java
@@ -24,10 +24,16 @@ import opennlp.tools.util.ObjectStream;
public class ParseSampleStream extends FilterObjectStream<String, Parse> {
+ /**
+ * Initializes a {@link ParseSampleStream instance}.
+ *
+ * @param in A plain text {@link ObjectStream stream} used as input.
+ */
public ParseSampleStream(ObjectStream<String> in) {
super(in);
}
+ @Override
public Parse read() throws IOException {
String parse = samples.read();
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java
index 688921b8..8b5d3a7e 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/Parser.java
@@ -18,29 +18,35 @@
package opennlp.tools.parser;
/**
- * Interface for full-syntactic parsers.
+ * Defines common methods for full-syntactic parsers.
*/
public interface Parser {
/**
- * Returns the specified number of parses or fewer for the specified tokens. <br>
+ * Returns the specified number of parses or fewer for the specified tokens.
+ * <p>
+ *
* <b>Note:</b> The nodes within
* the returned parses are shared with other parses and therefore their parent node references
- * will not be consistent with their child node reference. {@link Parse#setParent(Parse)}
- * can be used to make the parents consistent with a particular parse, but subsequent calls
- * to <code>setParents</code> can invalidate the results of earlier calls.<br>
- * @param tokens A parse containing the tokens with a single parent node.
+ * will not be consistent with their child node reference.
+ * <p>
+ *
+ * {@link Parse#setParent(Parse)} can be used to make the parents consistent with a
+ * particular parse, but subsequent calls to <code>setParents</code> can invalidate the
+ * results of earlier calls.<br>
+ *
+ * @param tokens A {@link Parse} containing the tokens with a single parent node.
* @param numParses The number of parses desired.
- * @return the specified number of parses for the specified tokens.
+ * @return the specified number of {@link Parse parses} for the specified {@code tokens}.
*/
Parse[] parse(Parse tokens, int numParses);
/**
- * Returns a parse for the specified parse of tokens.
+ * Returns a {@link Parse} for the specified {@link Parse} of {@code tokens}.
*
* @param tokens The root node of a flat parse containing only tokens.
* @return A full parse of the specified tokens or the flat chunks of the tokens if a
- * fullparse could not be found.
+ * full parse could not be found.
*/
Parse parse(Parse tokens);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
index 340aead2..233cb36f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
@@ -24,9 +24,12 @@ import opennlp.tools.parser.chunking.Parser;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.TokenTag;
+/**
+ * The parser chunker {@link SequenceValidator} implementation.
+ */
public class ParserChunkerSequenceValidator implements SequenceValidator<TokenTag> {
- private Map<String, String> continueStartMap;
+ private final Map<String, String> continueStartMap;
public ParserChunkerSequenceValidator(String[] outcomes) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
index c45f7021..c3577e33 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserCrossValidator.java
@@ -24,6 +24,9 @@ import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.eval.CrossValidationPartitioner;
import opennlp.tools.util.eval.FMeasure;
+/**
+ * Cross validator for a {@link Parser}.
+ */
public class ParserCrossValidator {
private final String languageCode;
@@ -34,18 +37,37 @@ public class ParserCrossValidator {
private final FMeasure fmeasure = new FMeasure();
- private ParserType parserType;
+ private final ParserType parserType;
- private ParserEvaluationMonitor[] monitors;
+ private final ParserEvaluationMonitor[] monitors;
+ /**
+ * Initializes a {@link ParserCrossValidator} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param params The {@link TrainingParameters} for the context of cross validation.
+ * @param rules The {@link HeadRules} for the context of cross validation.
+ * @param parserType The {@link ParserType} for the context of cross validation.
+ * @param monitors the {@link ParserEvaluationMonitor evaluation listeners}.
+ */
public ParserCrossValidator(String languageCode, TrainingParameters params,
HeadRules rules, ParserType parserType, ParserEvaluationMonitor... monitors) {
this.languageCode = languageCode;
this.params = params;
this.rules = rules;
this.parserType = parserType;
+ this.monitors = monitors;
}
+ /**
+ * Starts the evaluation.
+ *
+ * @param samples The {@link ObjectStream} of {@link Parse samples} to train and test with.
+ * @param nFolds Number of folds. It must be greater than zero.
+ *
+ * @throws IOException Thrown if IO errors occurred.
+ * @throws IllegalStateException Thrown if the currently active {@link ParserType} is not supported.
+ */
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException {
CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds);
@@ -54,7 +76,6 @@ public class ParserCrossValidator {
CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next();
ParserModel model;
-
if (ParserType.CHUNKING.equals(parserType)) {
model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
}
@@ -66,7 +87,6 @@ public class ParserCrossValidator {
}
ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
-
evaluator.evaluate(trainingSampleStream.getTestSampleStream());
fmeasure.mergeInto(evaluator.getFMeasure());
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluationMonitor.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluationMonitor.java
index 50cd2267..e78dba4a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluationMonitor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluationMonitor.java
@@ -19,5 +19,8 @@ package opennlp.tools.parser;
import opennlp.tools.util.eval.EvaluationMonitor;
+/**
+ * A marker interface for evaluating {@link Parser parsers}.
+ */
public interface ParserEvaluationMonitor extends EvaluationMonitor<Parse> {
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
index 54a11858..e8fc6db8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEvaluator.java
@@ -27,30 +27,35 @@ import opennlp.tools.util.eval.Evaluator;
import opennlp.tools.util.eval.FMeasure;
/**
- * Class for {@link Evaluator<Parse>}.
- * This ParserEvaluator behaves like EVALB with no exceptions, e.g,
- * without removing punctuation tags, or equality between ADVP and PRT
- * (as in COLLINS convention). To follow parsing evaluation conventions
- * (Bikel, Collins, Charniak, etc.) as in EVALB, options are to be added
- * to the {@code ParserEvaluatorTool}.
+ * This implementation of {@link Evaluator<Parse>} behaves like {@code EVALB} with no exceptions,
+ * e.g, without removing punctuation tags, or equality between {@code ADVP} and {@code PRT}, as
+ * in <a href="https://direct.mit.edu/coli/article/30/4/479/1858/Intricacies-of-Collins-Parsing-Model">
+ * COLLINS convention</a>.
+ * <p>
+ * To follow parsing evaluation conventions (Bikel, Collins, Charniak, etc.) as in {@code EVALB},
+ * options are to be added to the {@code ParserEvaluatorTool}.
*
+ * @see Parser
+ * @see Evaluator
+ * @see Parse
*/
public class ParserEvaluator extends Evaluator<Parse> {
- /**
+ /*
* Holds the evaluation results for the last run of {@link #processSample}.
*/
private final FMeasure fmeasure = new FMeasure();
- /**
+
+ /*
* The parser to evaluate.
*/
private final Parser parser;
-
+
/**
- * Construct a {@link Parser} with some evaluation monitors.
- *
- * @param aParser A valid {@link Parser} instance.
- * @param monitors the evaluation monitors
+ * Initializes a {@link ParserEvaluator} instance with the given {@link Parser}.
+ *
+ * @param aParser The {@link Parser} to evaluate.
+ * @param monitors The {@link ParserEvaluationMonitor evaluation listeners}.
*/
public ParserEvaluator(final Parser aParser, final ParserEvaluationMonitor... monitors) {
super(monitors);
@@ -58,9 +63,11 @@ public class ParserEvaluator extends Evaluator<Parse> {
}
/**
- * Obtain {@code Span}s for every parse in the sentence.
- * @param parse the parse from which to obtain the spans
- * @return an array containing every span for the parse
+ * Obtains {@link Span spans} for every parse in a sentence.
+ *
+ * @param parse The parse from which to obtain {@link Span spans}.
+ *
+ * @return An array of {@link Span spans} for the parse
*/
private static Span[] getConstituencySpans(final Parse parse) {
@@ -108,10 +115,6 @@ public class ParserEvaluator extends Evaluator<Parse> {
return prediction;
}
- /**
- * It returns the fmeasure result.
- * @return the fmeasure value
- */
public final FMeasure getFMeasure() {
return fmeasure;
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
index b67c1e72..325f06eb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserEventTypeEnum.java
@@ -19,13 +19,14 @@
package opennlp.tools.parser;
/**
- * Enumerated type of event types for the parser.
+ * Enumeration of event types for a {@link Parser}.
*/
public enum ParserEventTypeEnum {
BUILD,
CHECK,
+ // TODO Add reason why those enum values are deprecated
@Deprecated
CHUNK,
@Deprecated
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserFactory.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserFactory.java
index b7d476f9..91ed0955 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserFactory.java
@@ -22,6 +22,22 @@ public class ParserFactory {
private ParserFactory() {
}
+ /**
+ * Instantiates a {@link Parser} via a given {@code model} and
+ * other configuration parameters.
+ *
+ * @param model The {@link ParserModel} to use.
+ * @param beamSize The number of different parses kept during parsing.
+ * @param advancePercentage The minimal amount of probability mass which advanced outcomes
+ * must represent. Only outcomes which contribute to the top
+ * {@code advancePercentage} will be explored.
+ *
+ * @return A valid {@link Parser} instance.
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see Parser
+ * @see ParserModel
+ */
public static Parser create(ParserModel model, int beamSize, double advancePercentage) {
if (ParserType.CHUNKING.equals(model.getParserType())) {
@@ -36,6 +52,18 @@ public class ParserFactory {
}
}
+ /**
+ * Instantiates a {@link Parser} via a given {@code model} and
+ * default configuration parameters (see: {@link AbstractBottomUpParser}).
+ *
+ * @param model The {@link ParserModel} to use.
+ *
+ * @return A valid {@link Parser} instance.
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see Parser
+ * @see AbstractBottomUpParser
+ */
public static Parser create(ParserModel model) {
return create(model, AbstractBottomUpParser.defaultBeamSize,
AbstractBottomUpParser.defaultAdvancePercentage);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
index 86ea2f02..465457f8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
@@ -41,16 +41,15 @@ import opennlp.tools.util.model.ChunkerModelSerializer;
import opennlp.tools.util.model.POSModelSerializer;
/**
- * This is an abstract base class for {@link ParserModel} implementations.
+ * This is the default {@link ParserModel} implementation.
*/
-// TODO: Model should validate the artifact map
public class ParserModel extends BaseModel {
private static class HeadRulesSerializer implements
ArtifactSerializer<opennlp.tools.parser.lang.en.HeadRules> {
public opennlp.tools.parser.lang.en.HeadRules create(InputStream in)
- throws IOException, InvalidFormatException {
+ throws IOException {
return new opennlp.tools.parser.lang.en.HeadRules(new BufferedReader(
new InputStreamReader(in, StandardCharsets.UTF_8)));
}
@@ -77,10 +76,22 @@ public class ParserModel extends BaseModel {
private static final String PARSER_TYPE = "parser-type";
+ /**
+ * Initializes a {@link ParserModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @param attachModel A valid {@link MaxentModel} used to attach.
+ * @param parserTagger A valid {@link POSModel} to parse.
+ * @param chunkerTagger A valid {@link ChunkerModel} to chunk.
+ * @param headRules The {@link HeadRules} to to use for parsing.
+ * @param modelType The {@link ParserType} to use.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ */
public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
- MaxentModel attachModel, POSModel parserTagger,
- ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules,
- ParserType modelType, Map<String, String> manifestInfoEntries) {
+ MaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger,
+ HeadRules headRules, ParserType modelType, Map<String, String> manifestInfoEntries) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries);
@@ -110,34 +121,83 @@ public class ParserModel extends BaseModel {
checkArtifactMap();
}
+ /**
+ * Initializes a {@link ParserModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @param parserTagger A valid {@link POSModel} to parse.
+ * @param chunkerTagger A valid {@link ChunkerModel} to chunk.
+ * @param headRules The {@link HeadRules} to to use for parsing.
+ * @param modelType The {@link ParserType} to use.
+ */
public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
- MaxentModel attachModel, POSModel parserTagger,
- ChunkerModel chunkerTagger, opennlp.tools.parser.HeadRules headRules,
- ParserType modelType) {
+ MaxentModel attachModel, POSModel parserTagger, ChunkerModel chunkerTagger,
+ HeadRules headRules, ParserType modelType) {
this (languageCode, buildModel, checkModel, attachModel, parserTagger,
chunkerTagger, headRules, modelType, null);
}
+ /**
+ * Initializes a {@link ParserModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @param parserTagger A valid {@link POSModel} to parse.
+ * @param chunkerTagger A valid {@link ChunkerModel} to chunk.
+ * @param headRules The {@link HeadRules} to to use for parsing.
+ * @param type The {@link ParserType} to use.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ */
public ParserModel(String languageCode, MaxentModel buildModel, MaxentModel checkModel,
- POSModel parserTagger, ChunkerModel chunkerTagger,
- opennlp.tools.parser.HeadRules headRules, ParserType type,
- Map<String, String> manifestInfoEntries) {
+ POSModel parserTagger, ChunkerModel chunkerTagger, HeadRules headRules,
+ ParserType type, Map<String, String> manifestInfoEntries) {
this (languageCode, buildModel, checkModel, null, parserTagger,
chunkerTagger, headRules, type, manifestInfoEntries);
}
+ /**
+ * Initializes a {@link ParserModel} instance via a valid {@link InputStream}.
+ *
+ * @param in The {@link InputStream} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
public ParserModel(InputStream in) throws IOException {
super(COMPONENT_NAME, in);
}
+ /**
+ * Initializes a {@link ParserModel} instance via a valid {@link File}.
+ *
+ * @param modelFile The {@link File} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
public ParserModel(File modelFile) throws IOException {
super(COMPONENT_NAME, modelFile);
}
+ /**
+ * Initializes a {@link ParserModel} instance via a valid {@link Path}.
+ *
+ * @param modelPath The {@link Path} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
public ParserModel(Path modelPath) throws IOException {
this(modelPath.toFile());
}
+ /**
+ * Initializes a {@link ParserModel} instance via a valid {@link URL}.
+ *
+ * @param modelURL The {@link URL} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
public ParserModel(URL modelURL) throws IOException {
super(COMPONENT_NAME, modelURL);
}
@@ -148,10 +208,10 @@ public class ParserModel extends BaseModel {
super.createArtifactSerializers(serializers);
- // In 1.6.x the headrules artifact is serialized with the new API
- // which uses the Serializeable interface
+ // In 1.6.x the head rules artifact is serialized with the new API
+ // which uses the Serializable interface
// This change is not backward compatible with the 1.5.x models.
- // In order to laod 1.5.x model the English headrules serializer must be
+ // In order to load 1.5.x model the English head rules serializer must be
// put on the serializer map.
if (getVersion().getMajor() == 1 && getVersion().getMinor() == 5) {
@@ -162,53 +222,103 @@ public class ParserModel extends BaseModel {
serializers.put("chunker", new ChunkerModelSerializer());
}
+ /**
+ * @return Retrieves the {@link ParserType} as configured in the manifest.
+ */
public ParserType getParserType() {
return ParserType.parse(getManifestProperty(PARSER_TYPE));
}
+ /**
+ * @return Retrieves the {@link MaxentModel build model} as configured in the manifest.
+ */
public MaxentModel getBuildModel() {
return (MaxentModel) artifactMap.get(BUILD_MODEL_ENTRY_NAME);
}
+ /**
+ * @return Retrieves the {@link MaxentModel check model} as configured in the manifest.
+ */
public MaxentModel getCheckModel() {
return (MaxentModel) artifactMap.get(CHECK_MODEL_ENTRY_NAME);
}
+ /**
+ * @return Retrieves the {@link MaxentModel attach model} as configured in the manifest.
+ */
public MaxentModel getAttachModel() {
return (MaxentModel) artifactMap.get(ATTACH_MODEL_ENTRY_NAME);
}
+ /**
+ * @return Retrieves the {@link POSModel} as configured in the manifest.
+ */
public POSModel getParserTaggerModel() {
return (POSModel) artifactMap.get(PARSER_TAGGER_MODEL_ENTRY_NAME);
}
+ /**
+ * @return Retrieves the {@link ChunkerModel} as configured in the manifest.
+ */
public ChunkerModel getParserChunkerModel() {
return (ChunkerModel) artifactMap.get(CHUNKER_TAGGER_MODEL_ENTRY_NAME);
}
- public opennlp.tools.parser.HeadRules getHeadRules() {
+ /**
+ * @return Retrieves the {@link HeadRules} as configured in the manifest.
+ */
+ public HeadRules getHeadRules() {
return (opennlp.tools.parser.HeadRules)
artifactMap.get(HEAD_RULES_MODEL_ENTRY_NAME);
}
- // TODO: Update model methods should make sure properties are copied correctly ...
+ // TODO: (All!) Update model methods should make sure properties are copied correctly ...
+
+ /**
+ * Instantiates a new {@link ParserModel} instance from the existing configuration
+ * with the specified {@code buildModel} for exchange.
+ *
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @return A valid {@link ParserModel}.
+ */
public ParserModel updateBuildModel(MaxentModel buildModel) {
return new ParserModel(getLanguage(), buildModel, getCheckModel(), getAttachModel(),
getParserTaggerModel(), getParserChunkerModel(),
getHeadRules(), getParserType());
}
+ /**
+ * Instantiates a new {@link ParserModel} instance from the existing configuration
+ * with the specified {@code checkModel} for exchange.
+ *
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @return A valid {@link ParserModel}.
+ */
public ParserModel updateCheckModel(MaxentModel checkModel) {
return new ParserModel(getLanguage(), getBuildModel(), checkModel,
getAttachModel(), getParserTaggerModel(),
getParserChunkerModel(), getHeadRules(), getParserType());
}
+ /**
+ * Instantiates a new {@link ParserModel} instance from the existing configuration
+ * with the specified {@code taggerModel} for exchange.
+ *
+ * @param taggerModel A valid {@link POSModel} used to tag.
+ * @return A valid {@link ParserModel}.
+ */
public ParserModel updateTaggerModel(POSModel taggerModel) {
return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(),
taggerModel, getParserChunkerModel(), getHeadRules(), getParserType());
}
+ /**
+ * Instantiates a new {@link ParserModel} instance from the existing configuration
+ * with the specified {@code chunkModel} for exchange.
+ *
+ * @param chunkModel A valid {@link ChunkerModel} used to tag.
+ * @return A valid {@link ParserModel}.
+ */
public ParserModel updateChunkerModel(ChunkerModel chunkModel) {
return new ParserModel(getLanguage(), getBuildModel(), getCheckModel(), getAttachModel(),
getParserTaggerModel(), chunkModel, getHeadRules(), getParserType());
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserType.java b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserType.java
index e924a944..8539d356 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserType.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserType.java
@@ -17,10 +17,17 @@
package opennlp.tools.parser;
+/**
+ * Enumeration of supported {@link Parser} types.
+ */
public enum ParserType {
CHUNKING,
TREEINSERT;
+ /**
+ * @param type The string representation of the requested {@link ParserType}.
+ * @return The {@link ParserType} matching {@code type}, {@code null} otherwise.
+ */
public static ParserType parse(String type) {
if (ParserType.CHUNKING.name().equals(type)) {
return ParserType.CHUNKING;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
index 259d9f42..1c77aec3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/PosSampleStream.java
@@ -25,18 +25,22 @@ import opennlp.tools.util.ObjectStream;
public class PosSampleStream extends FilterObjectStream<Parse, POSSample> {
+ /**
+ * Initializes a {@link PosSampleStream instance}.
+ *
+ * @param in A {@link ObjectStream<Parse> stream} used as input.
+ */
public PosSampleStream(ObjectStream<Parse> in) {
super(in);
}
+ @Override
public POSSample read() throws IOException {
Parse parse = samples.read();
if (parse != null) {
-
Parse[] nodes = parse.getTagNodes();
-
String[] toks = new String[nodes.length];
String[] preds = new String[nodes.length];
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/BuildContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/BuildContextGenerator.java
index bff687df..c5930604 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/BuildContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/BuildContextGenerator.java
@@ -28,7 +28,9 @@ import opennlp.tools.parser.Parse;
import opennlp.tools.util.StringList;
/**
- * Class to generator predictive contexts for deciding how constituents should be combined together.
+ * Generates predictive contexts for deciding how constituents should be combined.
+ *
+ * @see AbstractContextGenerator
*/
public class BuildContextGenerator extends AbstractContextGenerator {
@@ -38,8 +40,7 @@ public class BuildContextGenerator extends AbstractContextGenerator {
private String[] trigram;
/**
- * Creates a new context generator for making decisions about combining constitients togehter.
- *
+ * Instantiates a {@link BuildContextGenerator} for making decisions about combining constituents.
*/
public BuildContextGenerator() {
super();
@@ -47,6 +48,12 @@ public class BuildContextGenerator extends AbstractContextGenerator {
useLabel = true;
}
+ /**
+ * Instantiates a {@link BuildContextGenerator} for making decisions about combining constituents
+ * using a {@link Dictionary}.
+ *
+ * @param dict A {@link Dictionary} to be used during context generation.
+ */
public BuildContextGenerator(Dictionary dict) {
this();
this.dict = dict;
@@ -61,11 +68,12 @@ public class BuildContextGenerator extends AbstractContextGenerator {
}
/**
- * Returns the predictive context used to determine how constituent at the specified index
- * should be combined with other contisuents.
- * @param constituents The constituents which have yet to be combined into new constituents.
- * @param index The index of the constituent whcihi is being considered.
- * @return the context for building constituents at the specified index.
+ * Finds the predictive context used to determine how constituent at the specified {@code index}
+ * should be combined with other constituents.
+ *
+ * @param constituents The {@link Parse constituents} which have yet to be combined into new constituents.
+ * @param index The index of the constituent which is being considered.
+ * @return The context for building constituents at the specified {@code index}.
*/
public String[] getContext(Parse[] constituents, int index) {
List<String> features = new ArrayList<>(100);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java
index 1e0b730e..8a94dcb7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/CheckContextGenerator.java
@@ -25,12 +25,14 @@ import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Parse;
/**
- * Class for generating predictive context for deciding when a constituent is complete.
+ * Generates predictive context for deciding when a constituent is complete.
+ *
+ * @see AbstractContextGenerator
*/
public class CheckContextGenerator extends AbstractContextGenerator {
/**
- * Creates a new context generator for generating predictive context for deciding
+ * Instantiates a {@link CheckContextGenerator} for generating predictive context for deciding
* when a constituent is complete.
*/
public CheckContextGenerator() {
@@ -43,10 +45,11 @@ public class CheckContextGenerator extends AbstractContextGenerator {
}
/**
- * Returns predictive context for deciding whether the specified constituents between the
- * specified start and end index can be combined to form a new constituent of the specified type.
+ * Finds predictive context for deciding whether the specified constituents between the
+ * specified {@code start} and {@code end} index can be combined to form a
+ * new constituent of the specified {@code type}.
*
- * @param constituents The constituents which have yet to be combined into new constituents.
+ * @param constituents The {@link Parse constituents} which have yet to be combined into new constituents.
* @param type The type of the new constituent proposed.
* @param start The first constituent of the proposed constituent.
* @param end The last constituent of the proposed constituent.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
index 9ad84280..0bf89b5b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/Parser.java
@@ -51,27 +51,48 @@ import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
/**
- * Class for a shift reduce style parser based on Adwait Ratnaparkhi's 1998 thesis.
+ * A shift reduce style {@link opennlp.tools.parser.Parser} implementation
+ * based on Adwait Ratnaparkhi's 1998 thesis.
+ *
+ * @see AbstractBottomUpParser
+ * @see opennlp.tools.parser.Parser
*/
public class Parser extends AbstractBottomUpParser {
- private MaxentModel buildModel;
- private MaxentModel checkModel;
+ private final MaxentModel buildModel;
+ private final MaxentModel checkModel;
- private BuildContextGenerator buildContextGenerator;
- private CheckContextGenerator checkContextGenerator;
+ private final BuildContextGenerator buildContextGenerator;
+ private final CheckContextGenerator checkContextGenerator;
- private double[] bprobs;
- private double[] cprobs;
+ private final double[] bprobs;
+ private final double[] cprobs;
private static final String TOP_START = START + TOP_NODE;
- private int topStartIndex;
- private Map<String, String> startTypeMap;
- private Map<String, String> contTypeMap;
+ private final int topStartIndex;
+ private final Map<String, String> startTypeMap;
+ private final Map<String, String> contTypeMap;
- private int completeIndex;
- private int incompleteIndex;
+ private final int completeIndex;
+ private final int incompleteIndex;
+ /**
+ * Instantiates a {@link Parser} via a given {@code model} and
+ * other configuration parameters. Uses the default implementations of
+ * {@link POSTaggerME} and {@link ChunkerME}.
+ *
+ * @param model The {@link ParserModel} to use.
+ * @param beamSize The number of different parses kept during parsing.
+ * @param advancePercentage The minimal amount of probability mass which advanced outcomes
+ * must represent. Only outcomes which contribute to the top
+ * {@code advancePercentage} will be explored.
+ *
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see ParserModel
+ * @see POSTaggerME
+ * @see ChunkerME
+ */
public Parser(ParserModel model, int beamSize, double advancePercentage) {
this(model.getBuildModel(), model.getCheckModel(),
new POSTaggerME(model.getParserTaggerModel()),
@@ -79,21 +100,37 @@ public class Parser extends AbstractBottomUpParser {
model.getHeadRules(), beamSize, advancePercentage);
}
+ /**
+ * Instantiates a {@link Parser} via a given {@code model}.
+ * Uses the default implementations of {@link POSTaggerME} and {@link ChunkerME}
+ * and default values for {@code beamSize} and {@code advancePercentage}.
+ *
+ * @param model The {@link ParserModel} to use.
+ *
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see ParserModel
+ * @see POSTaggerME
+ * @see ChunkerME
+ */
public Parser(ParserModel model) {
this(model, defaultBeamSize, defaultAdvancePercentage);
}
/**
- * Creates a new parser using the specified models and head rules using the specified beam
- * size and advance percentage.
- * @param buildModel The model to assign constituent labels.
- * @param checkModel The model to determine a constituent is complete.
- * @param tagger The model to assign pos-tags.
- * @param chunker The model to assign flat constituent labels.
- * @param headRules The head rules for head word perculation.
+ * Instantiates a {@link Parser} via a given {@code model} and other configuration parameters.
+ *
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @param tagger A valid {@link POSModel} used to tag.
+ * @param chunker A valid {@link ChunkerModel} used to chunk.
+ * @param headRules The {@link HeadRules} for head word percolation.
* @param beamSize The number of different parses kept during parsing.
- * @param advancePercentage The minimal amount of probability mass which advanced outcomes must represent.
- * Only outcomes which contribute to the top "advancePercentage" will be explored.
+ * @param advancePercentage The minimal amount of probability mass which advanced outcomes
+ * must represent. Only outcomes which contribute to the top
+ * {@code advancePercentage} will be explored.
+ * @see POSTagger
+ * @see Chunker
*/
private Parser(MaxentModel buildModel, MaxentModel checkModel, POSTagger tagger, Chunker chunker,
HeadRules headRules, int beamSize, double advancePercentage) {
@@ -267,6 +304,16 @@ public class Parser extends AbstractBottomUpParser {
}
}
+ /**
+ * Starts a training of a {@link ParserModel}.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param parseSamples The {@link ObjectStream<Parse> samples} as input.
+ * @param rules The {@link HeadRules} to use.
+ * @param mlParams The {@link TrainingParameters parameters} for training.
+ * @return A valid {@link ParserModel}.
+ * @throws IOException Thrown if IO errors occurred during training.
+ */
public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples,
HeadRules rules, TrainingParameters mlParams)
throws IOException {
@@ -299,13 +346,11 @@ public class Parser extends AbstractBottomUpParser {
POSModel posModel = POSTaggerME.train(languageCode, new PosSampleStream(parseSamples),
mlParams.getParameters("tagger"), new POSTaggerFactory());
-
parseSamples.reset();
// chunk
ChunkerModel chunkModel = ChunkerME.train(languageCode,
new ChunkSampleStream(parseSamples), mlParams.getParameters("chunker"), new ParserChunkerFactory());
-
parseSamples.reset();
// check
@@ -317,7 +362,6 @@ public class Parser extends AbstractBottomUpParser {
MaxentModel checkModel = checkTrainer.train(kes);
mergeReportIntoManifest(manifestInfoEntries, checkReportMap, "check");
- // TODO: Remove cast for HeadRules
return new ParserModel(languageCode, buildModel, checkModel,
posModel, chunkModel, rules,
ParserType.CHUNKING, manifestInfoEntries);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
index d99a1fa7..d876c0bc 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
@@ -29,8 +29,8 @@ import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.ObjectStream;
/**
- * Wrapper class for one of four parser event streams. The particular event stream is specified
- * at construction.
+ * Wrapper class for one of four {@link Parser shift-reduce parser} event streams.
+ * The particular {@link ParserEventTypeEnum event type} is specified at construction.
*/
public class ParserEventStream extends AbstractParserEventStream {
@@ -38,18 +38,35 @@ public class ParserEventStream extends AbstractParserEventStream {
protected CheckContextGenerator kcg;
/**
- * Create an event stream based on the specified data stream of the specified type using
- * the specified head rules.
+ * Instantiates a {@link ParserEventStream} based on the specified data stream
+ * of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
+ *
* @param d A 1-parse-per-line Penn Treebank Style parse.
- * @param rules The head rules.
- * @param etype The type of events desired (tag, chunk, build, or check).
- * @param dict A tri-gram dictionary to reduce feature generation.
+ * @param rules The {@link HeadRules head rules} to use.
+ * @param etype The {@link ParserEventTypeEnum type} of events desired.
+ * @param dict A tri-gram {@link Dictionary} to reduce feature generation.
+ *
+ * @see ParserEventTypeEnum
*/
public ParserEventStream(ObjectStream<Parse> d, HeadRules rules,
ParserEventTypeEnum etype, Dictionary dict) {
super(d,rules,etype,dict);
}
+ /**
+ * Instantiates a {@link ParserEventStream} based on the specified data stream
+ * of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
+ *
+ * @param d A 1-parse-per-line Penn Treebank Style parse.
+ * @param rules The {@link HeadRules head rules} to use.
+ * @param etype The {@link ParserEventTypeEnum type} of events desired.
+ *
+ * @see ParserEventTypeEnum
+ */
+ public ParserEventStream(ObjectStream<Parse> d, HeadRules rules, ParserEventTypeEnum etype) {
+ this (d,rules,etype,null);
+ }
+
@Override
protected void init() {
if (etype == ParserEventTypeEnum.BUILD) {
@@ -60,17 +77,12 @@ public class ParserEventStream extends AbstractParserEventStream {
}
}
-
-
- public ParserEventStream(ObjectStream<Parse> d, HeadRules rules, ParserEventTypeEnum etype) {
- this (d,rules,etype,null);
- }
-
/**
- * Returns true if the specified child is the first child of the specified parent.
- * @param child The child parse.
- * @param parent The parent parse.
- * @return true if the specified child is the first child of the specified parent; false otherwise.
+ * @param child The child {@link Parse}.
+ * @param parent The parent {@link Parse}.
+ *
+ * @return {@code true} if the specified {@code child} is the first child of the
+ * specified {@code parent}, {@code false} otherwise.
*/
protected boolean firstChild(Parse child, Parse parent) {
return AbstractBottomUpParser.collapsePunctuation(parent.getChildren(), punctSet)[0] == child;
@@ -111,10 +123,11 @@ public class ParserEventStream extends AbstractParserEventStream {
}
/**
- * Adds events for parsing (post tagging and chunking to the specified list of events for
- * the specified parse chunks.
- * @param parseEvents The events for the specified chunks.
- * @param chunks The incomplete parses to be parsed.
+ * Adds {@link Event events} for parsing (post tagging and chunking)
+ * to the specified list of events for the specified parse chunks.
+ *
+ * @param parseEvents The {@link Event events} for the specified chunks.
+ * @param chunks The incomplete {@link Parse parses} to be parsed.
*/
@Override
protected void addParseEvents(List<Event> parseEvents, Parse[] chunks) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java
index 6784204e..5cbb986d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/en/HeadRules.java
@@ -45,7 +45,7 @@ import opennlp.tools.util.model.ArtifactSerializer;
import opennlp.tools.util.model.SerializableArtifact;
/**
- * Class for storing the English head rules associated with parsing.
+ * Class for storing the English {@link opennlp.tools.parser.HeadRules} associated with parsing.
*/
public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, SerializableArtifact {
@@ -98,26 +98,28 @@ public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, Se
}
private Map<String, HeadRule> headRules;
- private Set<String> punctSet;
+ private final Set<String> punctSet;
/**
- * Creates a new set of head rules based on the specified head rules file.
+ * Creates a new set of head rules based on the specified {@code ruleFile}.
*
- * @param ruleFile the head rules file.
+ * @param ruleFile A string representation for a head rules file.
*
- * @throws IOException if the head rules file can not be read.
+ * @throws IOException Thrown if the head rules file can not be read.
+ *
+ * @deprecated Use {@link #HeadRules(Reader)} instead.
*/
@Deprecated
public HeadRules(String ruleFile) throws IOException {
- this(new BufferedReader(new FileReader(ruleFile)));
+ this(new FileReader(ruleFile));
}
/**
* Creates a new set of head rules based on the specified reader.
*
- * @param rulesReader the head rules reader.
+ * @param rulesReader A {@link Reader} for a head rules file.
*
- * @throws IOException if the head rules reader can not be read.
+ * @throws IOException Thrown f the head rules reader can not be read.
*/
public HeadRules(Reader rulesReader) throws IOException {
BufferedReader in = new BufferedReader(rulesReader);
@@ -131,10 +133,12 @@ public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, Se
//punctSet.add(":");
}
+ @Override
public Set<String> getPunctuationTags() {
return punctSet;
}
+ @Override
public Parse getHead(Parse[] constituents, String type) {
if (Parser.TOK_NODE.equals(constituents[0].getType())) {
return null;
@@ -218,6 +222,7 @@ public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, Se
}
}
+ @Override
public void labelGaps(Stack<Constituent> stack) {
if (stack.size() > 4) {
//Constituent con0 = (Constituent) stack.get(stack.size()-1);
@@ -245,15 +250,17 @@ public class HeadRules implements opennlp.tools.parser.HeadRules, GapLabeler, Se
}
/**
- * Writes the head rules to the writer in a format suitable for loading
- * the head rules again with the constructor. The encoding must be
- * taken into account while working with the writer and reader.
+ * Serializes the head rules via a {@link Writer} in a format suitable for loading
+ * the head rules again. The encoding must be taken into account while
+ * working with the writer and reader.
+ * <p>
+ * Once the entries have been written, the {@code writer} is flushed.
* <p>
- * After the entries have been written, the writer is flushed.
- * The writer remains open after this method returns.
+ * Note:
+ * The {@code writer} remains open after this method returns.
*
- * @param writer
- * @throws IOException
+ * @param writer The {@link Writer} to write the head rules to.
+ * @throws IOException Thrown if IO errors occurred during write operation.
*/
public void serialize(Writer writer) throws IOException {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java
index 6083f351..e59e0438 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/lang/es/AncoraSpanishHeadRules.java
@@ -112,16 +112,14 @@ public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, Serializab
}
private Map<String, HeadRule> headRules;
- private Set<String> punctSet;
-
-
+ private final Set<String> punctSet;
/**
* Creates a new set of head rules based on the specified reader.
*
- * @param rulesReader the head rules reader.
+ * @param rulesReader A {@link Reader} for a head rules file.
*
- * @throws IOException if the head rules reader can not be read.
+ * @throws IOException Thrown f the head rules reader can not be read.
*/
public AncoraSpanishHeadRules(Reader rulesReader) throws IOException {
BufferedReader in = new BufferedReader(rulesReader);
@@ -135,10 +133,12 @@ public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, Serializab
//punctSet.add(":");
}
+ @Override
public Set<String> getPunctuationTags() {
return punctSet;
}
+ @Override
public Parse getHead(Parse[] constituents, String type) {
if (Parser.TOK_NODE.equals(constituents[0].getType())) {
return null;
@@ -154,9 +154,9 @@ public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, Serializab
}
}
}
- for (int ci = 0; ci < constituents.length; ci++) {
- if (constituents[ci].getType().equals("SN") || constituents[ci].getType().equals("GRUP.NOM")) {
- return constituents[ci];
+ for (Parse constituent : constituents) {
+ if (constituent.getType().equals("SN") || constituent.getType().equals("GRUP.NOM")) {
+ return constituent;
}
}
String[] tags2 = {"\\$","GRUP\\.A","SA"};
@@ -223,6 +223,7 @@ public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, Serializab
}
}
+ @Override
public void labelGaps(Stack<Constituent> stack) {
if (stack.size() > 4) {
//Constituent con0 = (Constituent) stack.get(stack.size()-1);
@@ -250,15 +251,17 @@ public class AncoraSpanishHeadRules implements HeadRules, GapLabeler, Serializab
}
/**
- * Writes the head rules to the writer in a format suitable for loading
- * the head rules again with the constructor. The encoding must be
- * taken into account while working with the writer and reader.
+ * Serializes the head rules via a {@link Writer} in a format suitable for loading
+ * the head rules again. The encoding must be taken into account while
+ * working with the writer and reader.
+ * <p>
+ * Once the entries have been written, the {@code writer} is flushed.
* <p>
- * After the entries have been written, the writer is flushed.
- * The writer remains open after this method returns.
+ * Note:
+ * The {@code writer} remains open after this method returns.
*
- * @param writer
- * @throws IOException
+ * @param writer The {@link Writer} to write the head rules to.
+ * @throws IOException Thrown if IO errors occurred during write operation.
*/
public void serialize(Writer writer) throws IOException {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
index 4af1d290..28fdac9c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/AttachContextGenerator.java
@@ -27,8 +27,16 @@ import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Cons;
import opennlp.tools.parser.Parse;
+/**
+ * Generates predictive contexts for deciding how constituents should be attached.
+ *
+ * @see AbstractContextGenerator
+ */
public class AttachContextGenerator extends AbstractContextGenerator {
+ /**
+ * Instantiates an {@link AttachContextGenerator} for making decisions about attachments.
+ */
public AttachContextGenerator(Set<String> punctSet) {
this.punctSet = punctSet;
}
@@ -50,8 +58,9 @@ public class AttachContextGenerator extends AbstractContextGenerator {
}
/**
+ * Finds the predictive contextual features about an attachment.
*
- * @param constituents The constituents as they have been constructed so far.
+ * @param constituents The {@link Parse constituents} as they have been constructed so far.
* @param index The constituent index of the node being attached.
* @param rightFrontier The nodes which have been not attach to so far.
* @return A set of contextual features about this attachment.
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
index 52c124fc..067f1b29 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/BuildContextGenerator.java
@@ -32,11 +32,16 @@ import opennlp.tools.parser.Parse;
* Creates the features or contexts for the building phase of parsing.
* This phase builds constituents from the left-most node of these
* constituents.
+ *
+ * @see AbstractContextGenerator
*/
public class BuildContextGenerator extends AbstractContextGenerator {
- private Parse[] leftNodes;
+ private final Parse[] leftNodes;
+ /**
+ * Instantiates a {@link BuildContextGenerator} for making decisions.
+ */
public BuildContextGenerator() {
super();
leftNodes = new Parse[2];
@@ -48,11 +53,12 @@ public class BuildContextGenerator extends AbstractContextGenerator {
}
/**
- * Returns the contexts/features for the decision to build a new constituent for the specified parse
- * at the specified index.
- * @param constituents The constituents of the parse so far.
+ * Finds the contexts/features for the decision to build a new constituent for the specified parse
+ * at the specified {@code index}.
+ *
+ * @param constituents The {@link Parse constituents} of the parse so far.
* @param index The index of the constituent where a build decision is being made.
- * @return the contexts/features for the decision to build a new constituent.
+ * @return The contexts/features for the decision to build a new constituent.
*/
public String[] getContext(Parse[] constituents, int index) {
int ps = constituents.length;
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
index 821f5780..f0345eaa 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/CheckContextGenerator.java
@@ -27,20 +27,42 @@ import java.util.Set;
import opennlp.tools.parser.AbstractContextGenerator;
import opennlp.tools.parser.Parse;
+/**
+ * Generates predictive context for deciding when a constituent is complete.
+ *
+ * @see AbstractContextGenerator
+ */
public class CheckContextGenerator extends AbstractContextGenerator {
- private Parse[] leftNodes;
+ private final Parse[] leftNodes;
+ /**
+ * Instantiates a {@link CheckContextGenerator} for making decisions using a {@code punctSet}.
+ *
+ * @param punctSet A set of punctuation symbols to be used during context generation.
+ */
public CheckContextGenerator(Set<String> punctSet) {
this.punctSet = punctSet;
leftNodes = new Parse[2];
}
- public String[] getContext(Object arg0) {
- // TODO Auto-generated method stub
- return null;
+ public String[] getContext(Object o) {
+ Object[] params = (Object[]) o;
+ return getContext((Parse) params[0], (Parse[]) params[1], (Integer) params[2], (Boolean) params[3]);
}
+
+ /**
+ * Finds the predictive context used to determine how constituent at the specified {@code index}
+ * should be combined with a {@code parent} constituent.
+ *
+ * @param parent The {@link Parse parent} element.
+ * @param constituents The {@link Parse constituents} which have yet to be combined into new constituents.
+ * @param index The index of the constituent which is being considered.
+ * @param trimFrontier Whether the frontier should be trimmed, or not.
+ *
+ * @return The context for deciding whether a new constituent should be created.
+ */
public String[] getContext(Parse parent, Parse[] constituents, int index, boolean trimFrontier) {
List<String> features = new ArrayList<>(100);
//default
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
index 781d6029..5227f189 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/Parser.java
@@ -50,16 +50,22 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
/**
- * Built/attach parser. Nodes are built when their left-most
- * child is encountered. Subsequent children are attached as
- * daughters. Attachment is based on node in the right-frontier
- * of the tree. After each attachment or building, nodes are
- * assesed as either complete or incomplete. Complete nodes
- * are no longer elligable for daughter attachment.
+ * A built-attach {@link opennlp.tools.parser.Parser} implementation.
+ * <p>
+ * Nodes are built when their left-most child is encountered.
+ * Subsequent children are attached as daughters.
+ * Attachment is based on node in the right-frontier
+ * of the tree. After each attachment or building, nodes are
+ * assessed as either complete or incomplete. Complete nodes
+ * are no longer eligible for daughter attachment.
+ * <p>
* Complex modifiers which produce additional node
* levels of the same type are attached with sister-adjunction.
* Attachment can not take place higher in the right-frontier
* than an incomplete node.
+ *
+ * @see AbstractBottomUpParser
+ * @see opennlp.tools.parser.Parser
*/
public class Parser extends AbstractBottomUpParser {
@@ -75,40 +81,83 @@ public class Parser extends AbstractBottomUpParser {
/** Label used to distinguish build nodes from non-built nodes. */
public static final String BUILT = "built";
- private MaxentModel buildModel;
- private MaxentModel attachModel;
- private MaxentModel checkModel;
+ private final MaxentModel buildModel;
+ private final MaxentModel attachModel;
+ private final MaxentModel checkModel;
static boolean checkComplete = false;
- private BuildContextGenerator buildContextGenerator;
- private AttachContextGenerator attachContextGenerator;
- private CheckContextGenerator checkContextGenerator;
+ private final BuildContextGenerator buildContextGenerator;
+ private final AttachContextGenerator attachContextGenerator;
+ private final CheckContextGenerator checkContextGenerator;
- private double[] bprobs;
- private double[] aprobs;
+ private final double[] bprobs;
+ private final double[] aprobs;
private double[] cprobs;
- private int doneIndex;
- private int sisterAttachIndex;
- private int daughterAttachIndex;
- private int nonAttachIndex;
- private int completeIndex;
+ private final int doneIndex;
+ private final int sisterAttachIndex;
+ private final int daughterAttachIndex;
+ private final int nonAttachIndex;
+ private final int completeIndex;
- private int[] attachments;
+ private final int[] attachments;
+ /**
+ * Instantiates a {@link Parser} via a given {@code model} and
+ * other configuration parameters. Uses the default implementations of
+ * {@link POSTaggerME} and {@link ChunkerME}.
+ *
+ * @param model The {@link ParserModel} to use.
+ * @param beamSize The number of different parses kept during parsing.
+ * @param advancePercentage The minimal amount of probability mass which advanced outcomes
+ * must represent. Only outcomes which contribute to the top
+ * {@code advancePercentage} will be explored.
+ *
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see ParserModel
+ * @see POSTaggerME
+ * @see ChunkerME
+ */
public Parser(ParserModel model, int beamSize, double advancePercentage) {
this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
- new POSTaggerME(model.getParserTaggerModel()),
- new ChunkerME(model.getParserChunkerModel()),
- model.getHeadRules(),
- beamSize, advancePercentage);
+ new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()),
+ model.getHeadRules(), beamSize, advancePercentage);
}
+ /**
+ * Instantiates a {@link Parser} via a given {@code model}.
+ * Uses the default implementations of {@link POSTaggerME} and {@link ChunkerME}
+ * and default values for {@code beamSize} and {@code advancePercentage}.
+ *
+ * @param model The {@link ParserModel} to use.
+ *
+ * @throws IllegalStateException Thrown if the {@link ParserType} is not supported.
+ *
+ * @see ParserModel
+ * @see POSTaggerME
+ * @see ChunkerME
+ */
public Parser(ParserModel model) {
this(model, defaultBeamSize, defaultAdvancePercentage);
}
+ /**
+ * Instantiates a {@link Parser} via a given {@code model} and other configuration parameters.
+ *
+ * @param buildModel A valid {@link MaxentModel} used to build.
+ * @param checkModel A valid {@link MaxentModel} used to check.
+ * @param tagger A valid {@link POSModel} used to tag.
+ * @param chunker A valid {@link ChunkerModel} used to chunk.
+ * @param headRules The {@link HeadRules} for head word percolation.
+ * @param beamSize The number of different parses kept during parsing.
+ * @param advancePercentage The minimal amount of probability mass which advanced outcomes
+ * must represent. Only outcomes which contribute to the top
+ * {@code advancePercentage} will be explored.
+ * @see POSTagger
+ * @see Chunker
+ */
private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel checkModel,
POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize,
double advancePercentage) {
@@ -134,12 +183,14 @@ public class Parser extends AbstractBottomUpParser {
}
/**
- * Returns the right frontier of the specified parse tree with nodes ordered from deepest
+ * Returns the right frontier of the specified {@link Parse tree} with nodes ordered from deepest
* to shallowest.
- * @param root The root of the parse tree.
+ *
+ * @param root The {@link Parse root} of the parse tree.
+ * @param punctSet A set of punctuation symbols to be used.
* @return The right frontier of the specified parse tree.
*/
- public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) {
+ public static List<Parse> getRightFrontier(Parse root, Set<String> punctSet) {
List<Parse> rf = new LinkedList<>();
Parse top;
if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) ||
@@ -436,9 +487,18 @@ public class Parser extends AbstractBottomUpParser {
p.setType(TOP_NODE);
}
- public static ParserModel train(String languageCode,
- ObjectStream<Parse> parseSamples, HeadRules rules, TrainingParameters mlParams)
- throws IOException {
+ /**
+ * Starts a training of a {@link ParserModel}.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param parseSamples The {@link ObjectStream<Parse> samples} as input.
+ * @param rules The {@link HeadRules} to use.
+ * @param mlParams The {@link TrainingParameters parameters} for training.
+ * @return A valid {@link ParserModel}.
+ * @throws IOException Thrown if IO errors occurred during training.
+ */
+ public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples,
+ HeadRules rules, TrainingParameters mlParams) throws IOException {
Map<String, String> manifestInfoEntries = new HashMap<>();
@@ -498,26 +558,33 @@ public class Parser extends AbstractBottomUpParser {
opennlp.tools.parser.chunking.Parser.mergeReportIntoManifest(
manifestInfoEntries, attachReportMap, "attach");
- // TODO: Remove cast for HeadRules
- return new ParserModel(languageCode, buildModel, checkModel,
- attachModel, posModel, chunkModel,
- rules, ParserType.TREEINSERT, manifestInfoEntries);
+ return new ParserModel(languageCode, buildModel, checkModel, attachModel,
+ posModel, chunkModel, rules, ParserType.TREEINSERT, manifestInfoEntries);
}
- public static ParserModel train(String languageCode,
- ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut)
- throws IOException {
+ /**
+ * Starts a training of a {@link ParserModel}.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param parseSamples The {@link ObjectStream<Parse> samples} as input.
+ * @param rules The {@link HeadRules} to use.
+ * @param iterations The number of iterations to be conducted.
+ * @param cutoff The cut-off parameter to be used.
+ * @return A valid {@link ParserModel}.
+ * @throws IOException Thrown if IO errors occurred during training.
+ */
+ public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples,
+ HeadRules rules, int iterations, int cutoff) throws IOException {
TrainingParameters params = new TrainingParameters();
- params.put("dict", TrainingParameters.CUTOFF_PARAM, cut);
-
- params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut);
+ params.put("dict", TrainingParameters.CUTOFF_PARAM, cutoff);
+ params.put("tagger", TrainingParameters.CUTOFF_PARAM, cutoff);
params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations);
- params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut);
+ params.put("chunker", TrainingParameters.CUTOFF_PARAM, cutoff);
params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations);
- params.put("check", TrainingParameters.CUTOFF_PARAM, cut);
+ params.put("check", TrainingParameters.CUTOFF_PARAM, cutoff);
params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations);
- params.put("build", TrainingParameters.CUTOFF_PARAM, cut);
+ params.put("build", TrainingParameters.CUTOFF_PARAM, cutoff);
params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations);
return train(languageCode, parseSamples, rules, params);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
index b4f6ac58..07a2a36c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
@@ -32,6 +32,10 @@ import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
+/**
+ * Wrapper class for one of four {@link Parser built-attach parser} event streams.
+ * The particular {@link ParserEventTypeEnum event type} is specified at construction.
+ */
public class ParserEventStream extends AbstractParserEventStream {
protected AttachContextGenerator attachContextGenerator;
@@ -40,11 +44,36 @@ public class ParserEventStream extends AbstractParserEventStream {
private static final boolean debug = false;
+ /**
+ * Instantiates a {@link ParserEventStream} based on the specified data stream
+ * of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
+ *
+ * @param d A 1-parse-per-line Penn Treebank Style parse.
+ * @param rules The {@link HeadRules head rules} to use.
+ * @param etype The {@link ParserEventTypeEnum type} of events desired.
+ * @param dict A tri-gram {@link Dictionary} to reduce feature generation.
+ *
+ * @see ParserEventTypeEnum
+ */
public ParserEventStream(ObjectStream<Parse> d, HeadRules rules,
ParserEventTypeEnum etype, Dictionary dict) {
super(d, rules, etype, dict);
}
+ /**
+ * Instantiates a {@link ParserEventStream} based on the specified data stream
+ * of the {@link ParserEventTypeEnum type} using {@link HeadRules head rules}.
+ *
+ * @param d A 1-parse-per-line Penn Treebank Style parse.
+ * @param rules The {@link HeadRules head rules} to use.
+ * @param etype The {@link ParserEventTypeEnum type} of events desired.
+ *
+ * @see ParserEventTypeEnum
+ */
+ public ParserEventStream(ObjectStream<Parse> d, HeadRules rules, ParserEventTypeEnum etype) {
+ super(d, rules, etype);
+ }
+
@Override
public void init() {
buildContextGenerator = new BuildContextGenerator();
@@ -52,16 +81,13 @@ public class ParserEventStream extends AbstractParserEventStream {
checkContextGenerator = new CheckContextGenerator(punctSet);
}
- public ParserEventStream(ObjectStream<Parse> d, HeadRules rules, ParserEventTypeEnum etype) {
- super(d, rules, etype);
- }
-
/**
- * Returns a set of parent nodes which consist of the immediate
- * parent of the specified node and any of its parent which
+ * Returns a map of parent nodes which consist of the immediate
+ * parent of the specified {@link Parse node} and any of its parent which
* share the same syntactic type.
- * @param node The node whose parents are to be returned.
- * @return a set of parent nodes.
+ *
+ * @param node The {@link Parse node} whose parents are to be returned.
+ * @return A {@link Map} of parent {@link Parse nodes}.
*/
private Map<Parse, Integer> getNonAdjoinedParent(Parse node) {
Map<Parse, Integer> parents = new HashMap<>();
@@ -90,18 +116,6 @@ public class ParserEventStream extends AbstractParserEventStream {
private int nonPunctChildCount(Parse node) {
return Parser.collapsePunctuation(node.getChildren(),punctSet).length;
}
- /*
- private Set getNonAdjoinedParent(Parse node) {
- Set parents = new HashSet();
- Parse parent = node.getParent();
- do {
- parents.add(parent);
- parent = parent.getParent();
- }
- while(parent.getType().equals(node.getType()));
- return parents;
- }
- */
@Override
protected boolean lastChild(Parse child, Parse parent) {
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
index 130bf079..7c86068f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/BaseToolFactory.java
@@ -30,8 +30,7 @@ import opennlp.tools.util.model.ArtifactSerializer;
* Extensions of this class should:
* <ul>
* <li>implement an empty constructor (TODO is it necessary?)
- * <li>implement a constructor that takes the {@link ArtifactProvider} and
- * calls {@link BaseToolFactory(Map)}
+ * <li>implement a constructor that takes the {@link ArtifactProvider}}
* <li>override {@link #createArtifactMap()} and
* {@link #createArtifactSerializersMap()} methods if necessary.
* </ul>