You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:33 UTC
[38/94] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 9e6135e..71d3df9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -28,13 +28,13 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.hypergraph.HyperEdge;
/**
- * Represents a hypothesis, a translation of some coverage of the input. Extends {@link HGNode},
- * through a bit of a hack. Whereas (i,j) represents the span of an {@link HGNode}, i here is not used,
+ * Represents a hypothesis, a translation of some coverage of the input. Extends {@link org.apache.joshua.decoder.hypergraph.HGNode},
+ * through a bit of a hack. Whereas (i,j) represents the span of an {@link org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
* and j is overloaded to denote the span of the phrase being applied. The complete coverage vector
* can be obtained by looking at the tail pointer and casting it.
*
* @author Kenneth Heafield
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
@@ -86,7 +86,7 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
* HGNodes (designed for chart parsing) maintain a span (i,j). We overload j
* here to record the index of the last translated source word.
*
- * @return
+ * @return the int 'j' which is overloaded to denote the span of the phrase being applied
*/
public int LastSourceIndex() {
return j;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
index 7e194a8..676a6a8 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
@@ -49,8 +49,10 @@ public class PhraseChart {
* applicable against the current input sentence. These phrases are extracted
* from all available grammars.
*
- * @param tables
- * @param source
+ * @param tables input array of {@link org.apache.joshua.decoder.phrase.PhraseTable}'s
+ * @param features {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param source input to {@link org.apache.joshua.lattice.Lattice}
+ * @param num_options number of translation options (typically set to 20)
*/
public PhraseChart(PhraseTable[] tables, List<FeatureFunction> features, Sentence source,
int num_options) {
@@ -123,8 +125,8 @@ public class PhraseChart {
/**
* Maps two-dimensional span into a one-dimensional array.
*
- * @param i
- * @param j
+ * @param i beginning of span
+ * @param j end of span
* @return offset into private list of TargetPhrases
*/
private int offset(int i, int j) {
@@ -134,9 +136,9 @@ public class PhraseChart {
/**
* Returns phrases from all grammars that match the span.
*
- * @param begin
- * @param end
- * @return
+ * @param begin beginning of span
+ * @param end end of span
+ * @return the {@link org.apache.joshua.decoder.phrase.TargetPhrases} at the specified position in this list.
*/
public TargetPhrases getRange(int begin, int end) {
int index = offset(begin, end);
@@ -156,9 +158,9 @@ public class PhraseChart {
/**
* Add a set of phrases from a grammar to the current span.
*
- * @param begin
- * @param end
- * @param to
+ * @param begin beginning of span
+ * @param end end of span
+ * @param to a {@link org.apache.joshua.decoder.ff.tm.RuleCollection} to be used in scoring and sorting.
*/
private void addToRange(int begin, int end, RuleCollection to) {
if (to != null) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index c937b50..733e1e1 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -47,12 +47,13 @@ public class PhraseTable implements Grammar {
* Chain to the super with a number of defaults. For example, we only use a single nonterminal,
* and there is no span limit.
*
- * @param grammarFile
- * @param owner
- * @param config
- * @throws IOException
+ * @param grammarFile file path parent directory
+ * @param owner used to set phrase owners
+ * @param type the grammar specification keyword (e.g., "thrax" or "moses")
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+ * @throws IOException if there is an error reading the grammar file
*/
- public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config, int maxSource)
+ public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config)
throws IOException {
this.config = config;
int spanLimit = 0;
@@ -81,7 +82,7 @@ public class PhraseTable implements Grammar {
* since the grammar includes the nonterminal. For {@link PackedGrammar}s, the value was either
* in the packed config file (Joshua 6.0.2+) or was passed in via the TM config line.
*
- * @return
+ * @return the longest source phrase read.
*/
@Override
public int getMaxSourcePhraseLength() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index 1ed2705..3e9e011 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -58,9 +58,9 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Create a new stack. Stacks are organized one for each number of source words that are covered.
*
- * @param featureFunctions
- * @param sentence
- * @param config
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param sentence input for a {@link org.apache.joshua.lattice.Lattice}
+ * @param config populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public Stack(List<FeatureFunction> featureFunctions, Sentence sentence, JoshuaConfiguration config) {
this.featureFunctions = featureFunctions;
@@ -76,6 +76,8 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* A Stack is an ArrayList; here, we intercept the add so we can maintain a list of the items
* stored under each distinct coverage vector
+ * @param hyp a {@link org.apache.joshua.decoder.phrase.Hypothesis} to add to the {@link org.apache.joshua.decoder.phrase.Stack}
+ * @return true if the {@link org.apache.joshua.decoder.phrase.Hypothesis} is appended to the list
*/
@Override
public boolean add(Hypothesis hyp) {
@@ -106,6 +108,7 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Returns the set of coverages contained in this stack. This is used to iterate over them
* in the main decoding loop in Stacks.java.
+ * @return a {@link java.util.Set} of {@link org.apache.joshua.decoder.phrase.Coverage}'s
*/
public Set<Coverage> getCoverages() {
return coverages.keySet();
@@ -114,8 +117,8 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Get all items with the same coverage vector.
*
- * @param cov
- * @return
+ * @param cov the {@link org.apache.joshua.decoder.phrase.Coverage} vector to get
+ * @return an {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.phrase.Hypothesis}'
*/
public ArrayList<Hypothesis> get(Coverage cov) {
ArrayList<Hypothesis> list = coverages.get(cov);
@@ -126,15 +129,14 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Receives a partially-initialized translation candidate and places it on the
* priority queue after scoring it with all of the feature functions. In this
- * respect it is like {@link CubePruneState} (it could make use of that class with
+ * respect it is like {@link org.apache.joshua.decoder.chart_parser.CubePruneState} (it could make use of that class with
* a little generalization of spans / coverage).
*
* This function is also used to (fairly concisely) implement constrained decoding. Before
* adding a candidate, we ensure that the sequence of English words match the sentence. If not,
* the code extends the dot in the cube-pruning chart to the next phrase, since that one might
* be a match.
- *
- * @param cand
+ * @param cand a partially-initialized translation {@link org.apache.joshua.decoder.phrase.Candidate}
*/
public void addCandidate(Candidate cand) {
if (visitedStates.contains(cand))
@@ -168,9 +170,6 @@ public class Stack extends ArrayList<Hypothesis> {
* Cube pruning. Repeatedly pop the top candidate, creating a new hyperedge from it, adding it to
* the k-best list, and then extending the list of candidates with extensions of the current
* candidate.
- *
- * @param context
- * @param output
*/
public void search() {
int to_pop = config.pop_limit;
@@ -197,6 +196,7 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Adds a popped candidate to the chart / main stack. This is a candidate we have decided to
* keep around.
+ * @param complete a completely-initialized translation {@link org.apache.joshua.decoder.phrase.Candidate}
*
*/
public void addHypothesis(Candidate complete) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 533d2fa..7d4b025 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -32,7 +32,7 @@ package org.apache.joshua.decoder.phrase;
* ensures that the coverage vector is consistent but the resulting hypergraph may not be projective,
* which is different from the CKY algorithm, which does produce projective derivations.
*
- * Lattice decoding is not yet supported (March 2015).
+ * TODO Lattice decoding is not yet supported (March 2015).
*/
import java.util.ArrayList;
@@ -71,10 +71,10 @@ public class Stacks {
* Entry point. Initialize everything. Create pass-through (OOV) phrase table and glue phrase
* table (with start-of-sentence and end-of-sentence rules).
*
- * @param sentence
- * @param featureFunctions
- * @param grammars
- * @param config
+ * @param sentence input to {@link org.apache.joshua.lattice.Lattice}
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param grammars an array of {@link org.apache.joshua.decoder.ff.tm.Grammar}'s
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public Stacks(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars,
JoshuaConfiguration config) {
@@ -106,7 +106,7 @@ public class Stacks {
/**
* The main algorithm. Returns a hypergraph representing the search space.
*
- * @return
+ * @return a {@link org.apache.joshua.decoder.hypergraph.HyperGraph} representing the search space
*/
public HyperGraph search() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index cf43dda..05a4b0a 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -45,7 +45,7 @@ public class TargetPhrases extends ArrayList<Rule> {
/**
* Initialize with a collection of rules.
*
- * @param list
+ * @param list a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.tm.Rule}'s
*/
public TargetPhrases(List<Rule> list) {
super();
@@ -59,6 +59,9 @@ public class TargetPhrases extends ArrayList<Rule> {
* Score the rules and sort them. Scoring is necessary because rules are only scored if they
* are used, in an effort to make reading in rules more efficient. This is starting to create
* some trouble and should probably be reworked.
+ * @param features a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param weights a populated {@link org.apache.joshua.decoder.ff.FeatureVector}
+ * @param num_options the number of options
*/
public void finish(List<FeatureFunction> features, FeatureVector weights, int num_options) {
for (Rule rule: this) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
index ecb274b..5146e2c 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
@@ -26,39 +26,42 @@ import javax.swing.text.Segment;
* <p>
* The {@link Segment}, {@link ConstraintSpan}, and {@link ConstraintRule} interfaces are for
* defining an interchange format between a SegmentFileParser and the Chart class. These interfaces
- * <emph>should not</emph> be used internally by the Chart. The objects returned by a
+ * <b>should not</b> be used internally by the Chart. The objects returned by a
* SegmentFileParser will not be optimal for use during decoding. The Chart should convert each of
* these objects into its own internal representation during construction. That is the contract
* described by these interfaces.
*
- * @see Type
+ * @see org.apache.joshua.decoder.segment_file.ConstraintRule.Type
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate: 2009-03-26 15:06:57 -0400 (Thu, 26 Mar 2009) $
*/
public interface ConstraintRule {
/**
- * There are three types of ConstraintRule. The RULE type returns non-null values for all methods.
+ * <p>There are three types of ConstraintRule. The RULE type returns non-null values for all methods.
* The LHS type provides a (non-null) value for the lhs method, but returns null for everything
* else. And the RHS type provides a (non-null) value for nativeRhs and foreignRhs but returns
- * null for the lhs and features.
+ * null for the lhs and features.</p>
* <p>
* The interpretation of a RULE is that it adds a new rule to the grammar which only applies to
* the associated span. If the associated span is hard, then the set of rules for that span will
- * override the regular grammar.
+ * override the regular grammar.</p>
* <p>
* The intepretation of a LHS is that it provides a hard constraint that the associated span be
- * treated as the nonterminal for that span, thus filtering the regular grammar.
+ * treated as the nonterminal for that span, thus filtering the regular grammar.</p>
* <p>
* The interpretation of a RHS is that it provides a hard constraint to filter the regular grammar
- * such that only rules generating the desired translation can be used.
+ * such that only rules generating the desired translation can be used.</p>
*/
public enum Type {
RULE, LHS, RHS
};
- /** Return the type of this ConstraintRule. */
+ /**
+ * Return the type of this ConstraintRule.
+ * @return the {@link org.apache.joshua.decoder.segment_file.ConstraintRule.Type}
+ */
Type type();
@@ -66,6 +69,7 @@ public interface ConstraintRule {
* Return the left hand side of the constraint rule. If this is null, then this object is
* specifying a translation for the span, but that translation may be derived from any
* nonterminal. The nonterminal here must be one used by the regular grammar.
+ * @return the left hand side of the constraint rule
*/
String lhs();
@@ -73,6 +77,7 @@ public interface ConstraintRule {
/**
* Return the native right hand side of the constraint rule. If this is null, then the regular
* grammar will be used to fill in the derivation from the lhs.
+ * @return the native right hand side of the constraint rule
*/
String nativeRhs();
@@ -80,6 +85,7 @@ public interface ConstraintRule {
/**
* Return the foreign right hand side of the constraint rule. This must be consistent with the
* sentence for the associated span, and is provided as a convenience method.
+ * @return the foreign right hand side of the constraint rule
*/
String foreignRhs();
@@ -87,7 +93,8 @@ public interface ConstraintRule {
/**
* Return the grammar feature values for the RULE. The length of this array must be the same as
* for the regular grammar. We cannot enforce this requirement, but the
- * {@link joshua.decoder.chart_parser.Chart} must throw an error if there is a mismatch.
+ * {@link org.apache.joshua.decoder.chart_parser.Chart} must throw an error if there is a mismatch.
+ * @return an array of floating feature values for the RULE
*/
float[] features();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintSpan.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintSpan.java b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintSpan.java
index 9d758f0..9863fa6 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintSpan.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintSpan.java
@@ -38,29 +38,32 @@ import javax.swing.text.Segment;
* <p>
* The {@link Segment}, {@link ConstraintSpan}, and {@link ConstraintRule} interfaces are for
* defining an interchange format between a SegmentFileParser and the Chart class. These interfaces
- * <emph>should not</emph> be used internally by the Chart. The objects returned by a
+ * <b>should not</b> be used internally by the Chart. The objects returned by a
* SegmentFileParser will not be optimal for use during decoding. The Chart should convert each of
* these objects into its own internal representation during construction. That is the contract
* described by these interfaces.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
*/
public interface ConstraintSpan {
/**
* Return the starting index of the span covered by this constraint.
+ * @return the starting index of the span covered by this constraint
*/
int start();
/**
* Return the ending index of the span covered by this constraint. Clients may assume
* <code>this.end() >= 1 + this.start()</code>.
+ * @return the ending index of the span covered by this constraint
*/
int end();
/**
* Return whether this is a hard constraint which should override the grammar. This value only
* really matters for sets of <code>RULE</code> type constraints.
+ * @return true if a hard constraint exists which should override the grammar
*/
boolean isHard();
@@ -71,6 +74,7 @@ public interface ConstraintSpan {
* {@link java.util.Iterator} instead in order to reduce the coupling between this class and
* Chart. See the note above about the fact that this interface should not be used internally by
* the Chart class because it will not be performant.
+ * @return a {@link java.util.List} of {@link org.apache.joshua.decoder.segment_file.ConstraintRule}'s
*/
List<ConstraintRule> rules();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index d3a5e79..9673022 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -19,7 +19,6 @@
package org.apache.joshua.decoder.segment_file;
import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
-import static org.apache.joshua.util.FormatUtils.escapeSpecialSymbols;
import java.util.ArrayList;
import java.util.HashSet;
@@ -44,9 +43,11 @@ import org.apache.joshua.util.Regex;
* This class represents lattice input. The lattice is contained on a single line and is represented
* in PLF (Python Lattice Format), e.g.,
*
+ * <pre>
* ((('ein',0.1,1),('dieses',0.2,1),('haus',0.4,2),),(('haus',0.8,1),),)
+ * </pre>
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Sentence {
@@ -77,8 +78,9 @@ public class Sentence {
* Constructor. Receives a string representing the input sentence. This string may be a
* string-encoded lattice or a plain text string for decoding.
*
- * @param inputString
- * @param id
+ * @param inputString representing the input sentence
+ * @param id ID to associate with the input string
+ * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public Sentence(String inputString, int id, JoshuaConfiguration joshuaConfiguration) {
@@ -135,7 +137,7 @@ public class Sentence {
/**
* Returns the length of the sentence. For lattices, the length is the shortest path through the
- * lattice. The length includes the <s> and </s> sentence markers.
+ * lattice. The length includes the <s> and </s> sentence markers.
*
* @return number of input tokens + 2 (for start and end of sentence markers)
*/
@@ -255,13 +257,13 @@ public class Sentence {
}
/**
- * If the input sentence is too long (not counting the <s> and </s> tokens), it is truncated to
+ * If the input sentence is too long (not counting the <s> and </s> tokens), it is truncated to
* the maximum length, specified with the "maxlen" parameter.
*
* Note that this code assumes the underlying representation is a sentence, and not a lattice. Its
* behavior is undefined for lattices.
*
- * @param length
+ * @param length int representing the length to truncate the sentence to
*/
protected void adjustForLength(int length) {
int size = this.getLattice().size() - 2; // subtract off the start- and end-of-sentence tokens
@@ -292,6 +294,7 @@ public class Sentence {
/**
* Returns the raw source-side input string.
+ * @return the raw source-side input string
*/
public String rawSource() {
return source;
@@ -300,7 +303,7 @@ public class Sentence {
/**
* Returns the source-side string with annotations --- if any --- stripped off.
*
- * @return
+ * @return the source-side string with annotations --- if any --- stripped off
*/
public String source() {
StringBuilder str = new StringBuilder();
@@ -332,7 +335,7 @@ public class Sentence {
*
* If the parameter parse=true is set, parsing will be triggered, otherwise constrained decoding.
*
- * @return
+ * @return target side of sentence translation
*/
public String target() {
return target;
@@ -368,7 +371,7 @@ public class Sentence {
* Returns the sequence of tokens comprising the sentence. This assumes you've done the checking
* to makes sure the input string (the source side) isn't a PLF waiting to be parsed.
*
- * @return
+ * @return a {@link java.util.List} of {@link org.apache.joshua.decoder.segment_file.Token}'s comprising the sentence
*/
public List<Token> getTokens() {
assert isLinearChain();
@@ -382,6 +385,7 @@ public class Sentence {
/**
* Returns the sequence of word IDs comprising the input sentence. Assumes this is not a general
* lattice, but a linear chain.
+ * @return an int[] comprising all word ID's
*/
public int[] getWordIDs() {
List<Token> tokens = getTokens();
@@ -395,7 +399,7 @@ public class Sentence {
* Returns the sequence of word ids comprising the sentence. Assumes this is a sentence and
* not a lattice.
*
- * @return
+ * @return the sequence of word ids comprising the sentence
*/
public Lattice<String> stringLattice() {
assert isLinearChain();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
index 04c1da4..7ba13ed 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
@@ -44,24 +44,30 @@ public class Token {
private JoshuaConfiguration joshuaConfiguration;
/**
- * Constructor : Creates a Token object from a raw word
+ * <p>Constructor : Creates a Token object from a raw word
* Extracts and assigns an annotation when available.
* Any word can be marked with annotations, which are arbitrary semicolon-delimited
- * key[=value] pairs (the value is optional) listed in brackets after a word, e.g.,
+ * key[=value] pairs (the value is optional) listed in brackets after a word, e.g.,</p>
+ * <pre>
+ * Je[ref=Samuel;PRO] voudrais[FUT;COND]
+ * </pre>
*
- * Je[ref=Samuel;PRO] voudrais[FUT;COND] ...
+ * <p>This will create a dictionary annotation on the word of the following form for "Je"</p>
*
- * This will create a dictionary annotation on the word of the following form for "Je"
+ * <pre>
+ * ref -> Samuel
+ * PRO -> PRO
+ * </pre>
*
- * ref -> Samuel
- * PRO -> PRO
- *
- * and the following for "voudrais":
+ * <p>and the following for "voudrais":</p>
*
- * FUT -> FUT
- * COND -> COND
+ * <pre>
+ * FUT -> FUT
+ * COND -> COND
+ * </pre>
*
* @param rawWord A word with annotation information (possibly)
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*
*/
public Token(String rawWord, JoshuaConfiguration config) {
@@ -135,7 +141,8 @@ public class Token {
/**
* Returns the annotationID (vocab ID)
* associated with this token
- * @return int A type ID
+ * @param key A type ID
+ * @return the annotationID (vocab ID)
*/
public String getAnnotation(String key) {
if (annotations.containsKey(key)) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/package-info.java b/src/main/java/org/apache/joshua/decoder/segment_file/package-info.java
new file mode 100644
index 0000000..a615030
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides common interfaces for parsing segment files
+ * (aka test corpora to be translated). In order to support
+ * constraint annotations, we provide a general API for
+ * use by JoshuaDecoder and Chart.
+ */
+package org.apache.joshua.decoder.segment_file;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/segment_file/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/package.html b/src/main/java/org/apache/joshua/decoder/segment_file/package.html
deleted file mode 100644
index 8f06ebc..0000000
--- a/src/main/java/org/apache/joshua/decoder/segment_file/package.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides common interfaces for parsing segment files (aka test corpora to be translated). In order to support constraint annotations, we provide a general API for use by JoshuaDecoder and Chart.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/lattice/Lattice.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Lattice.java b/src/main/java/org/apache/joshua/lattice/Lattice.java
index 1adefa8..d89c948 100644
--- a/src/main/java/org/apache/joshua/lattice/Lattice.java
+++ b/src/main/java/org/apache/joshua/lattice/Lattice.java
@@ -39,10 +39,9 @@ import org.apache.joshua.util.ChartSpan;
* A lattice representation of a directed graph.
*
* @author Lane Schwartz
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
* @since 2008-07-08
*
- * @param Label Type of label associated with an arc.
*/
public class Lattice<Value> implements Iterable<Node<Value>> {
@@ -73,6 +72,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* order, the behavior of the lattice is not defined.
*
* @param nodes A list of nodes which must be in topological order.
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public Lattice(List<Node<Value>> nodes, JoshuaConfiguration config) {
this.nodes = nodes;
@@ -92,6 +92,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* Instantiates a lattice from a linear chain of values, i.e., a sentence.
*
* @param linearChain a sequence of Value objects
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public Lattice(Value[] linearChain, JoshuaConfiguration config) {
this.latticeHasAmbiguity = false;
@@ -126,9 +127,8 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* Computes the shortest distance between two nodes, which is used (perhaps among other places) in
* computing which rules can apply over which spans of the input
*
- * @param tail
- * @param head
- * @return the distance, a positive number, or -1 if there is no path between the nodes
+ * @param arc an {@link org.apache.joshua.lattice.Arc} of values
+ * @return the shortest distance between two nodes
*/
public int distance(Arc<Value> arc) {
return this.getShortestPath(arc.getTail().getNumber(), arc.getHead().getNumber());
@@ -141,7 +141,8 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
/**
* Convenience method to get a lattice from a linear sequence of {@link Token} objects.
*
- * @param linearChain
+ * @param source input string from which to create a {@link org.apache.joshua.lattice.Lattice}
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
* @return Lattice representation of the linear chain.
*/
public static Lattice<Token> createTokenLatticeFromString(String source, JoshuaConfiguration config) {
@@ -245,6 +246,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* Constructs a lattice from a given string representation.
*
* @param data String representation of a lattice.
+ * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
* @return A lattice that corresponds to the given string.
*/
public static Lattice<String> createStringLatticeFromString(String data, JoshuaConfiguration config) {
@@ -327,7 +329,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
/**
* Gets the shortest distance through the lattice.
- *
+ * @return int representing the shortest distance through the lattice
*/
public int getShortestDistance() {
if (distances == null)
@@ -444,9 +446,9 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
* Replaced the arc from node i to j with the supplied lattice. This is used to do OOV
* segmentation of words in a lattice.
*
- * @param i
- * @param j
- * @param lattice
+ * @param i start node of arc
+ * @param j end node of arc
+ * @param newNodes new nodes used within the replacement operation
*/
public void insert(int i, int j, List<Node<Value>> newNodes) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/lattice/Node.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Node.java b/src/main/java/org/apache/joshua/lattice/Node.java
index 09fb150..ecff22e 100644
--- a/src/main/java/org/apache/joshua/lattice/Node.java
+++ b/src/main/java/org/apache/joshua/lattice/Node.java
@@ -55,6 +55,7 @@ public class Node<Label> {
/**
* Constructs a new node with the specified numeric identifier.
+ * @param id the new node identifier
*/
public Node(int id) {
this.id = id;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/lattice/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/package-info.java b/src/main/java/org/apache/joshua/lattice/package-info.java
new file mode 100644
index 0000000..6b44542
--- /dev/null
+++ b/src/main/java/org/apache/joshua/lattice/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides implementations of lattice and related data structures.
+ */
+package org.apache.joshua.lattice;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/lattice/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/package.html b/src/main/java/org/apache/joshua/lattice/package.html
deleted file mode 100644
index a479be8..0000000
--- a/src/main/java/org/apache/joshua/lattice/package.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides implementations of lattice and related data structures.
-
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/metrics/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/BLEU.java b/src/main/java/org/apache/joshua/metrics/BLEU.java
index 1d14d36..4458ac6 100644
--- a/src/main/java/org/apache/joshua/metrics/BLEU.java
+++ b/src/main/java/org/apache/joshua/metrics/BLEU.java
@@ -147,6 +147,9 @@ public class BLEU extends EvaluationMetric {
/**
* Computes the BLEU sufficient statistics on a hypothesis.
+ * @param cand_str todo
+ * @param i todo
+ * @return int[] representing statistics on a hypothesis
*/
public int[] suffStats(String cand_str, int i) {
int[] stats = new int[suffStatsCount];
@@ -172,9 +175,9 @@ public class BLEU extends EvaluationMetric {
/**
* Computes the precision sufficient statistics, clipping counts.
*
- * @param stats
- * @param words
- * @param i
+ * @param stats int[] representing statistics on a hypothesis.
+ * @param words String[] of input terms
+ * @param i todo
*/
public void set_prec_suffStats(int[] stats, String[] words, int i) {
HashMap<String, Integer>[] candCountsArray = getNgramCountsArray(words);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
index cce51f8..08efdeb 100644
--- a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
+++ b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
@@ -213,9 +213,9 @@ public abstract class EvaluationMetric {
/**
* Calculates sufficient statistics on each sentence in the corpus, returning them as arrays.
*
- * @param cand_strings
- * @param cand_indices
- * @return
+ * @param cand_strings todo
+ * @param cand_indices todo
+ * @return todo
*/
public int[][] suffStats(String[] cand_strings, int[] cand_indices) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java b/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
index 184a14a..0660e8a 100644
--- a/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
+++ b/src/main/java/org/apache/joshua/oracle/OracleExtractionHG.java
@@ -42,7 +42,7 @@ import org.apache.joshua.util.io.LineReader;
* maintain different states for different hyp length (3) brief penalty is calculated based on the
* avg ref length (4) using sentence-level BLEU, instead of doc-level BLEU
*
- * @author Zhifei Li, <zh...@gmail.com> (Johns Hopkins University)
+ * @author Zhifei Li, zhifei.work@gmail.com (Johns Hopkins University)
*/
public class OracleExtractionHG extends SplitHg {
static String BACKOFF_LEFT_LM_STATE_SYM = "<lzfbo>";
@@ -58,7 +58,7 @@ public class OracleExtractionHG extends SplitHg {
protected int src_sent_len = 0;
protected int ref_sent_len = 0;
protected int g_lm_order = 4; // only used for decide whether to get the LM state by this class or
- // not in compute_state
+ // not in compute_state
static protected boolean do_local_ngram_clip = false;
static protected boolean maitain_length_state = false;
static protected int g_bleu_order = 4;
@@ -76,8 +76,8 @@ public class OracleExtractionHG extends SplitHg {
protected HashMap<String, Integer> tbl_ref_ngrams = new HashMap<String, Integer>();
static boolean always_maintain_seperate_lm_state = true; // if true: the virtual item maintain its
- // own lm state regardless whether
- // lm_order>=g_bleu_order
+ // own lm state regardless whether
+ // lm_order>=g_bleu_order
int lm_feat_id = 0; // the baseline LM feature id
@@ -88,7 +88,7 @@ public class OracleExtractionHG extends SplitHg {
* It seems that the symbol table here should only need to represent monolingual terminals, plus
* nonterminals.
*
- * @param lm_feat_id_
+ * @param lm_feat_id_ a language model feature identifier
*/
public OracleExtractionHG(int lm_feat_id_) {
this.lm_feat_id = lm_feat_id_;
@@ -111,7 +111,7 @@ public class OracleExtractionHG extends SplitHg {
*/
if (6 != args.length) {
System.out
- .println("Usage: java Decoder f_hypergraphs f_rule_tbl f_ref_files f_orc_out lm_order orc_extract_nbest");
+ .println("Usage: java Decoder f_hypergraphs f_rule_tbl f_ref_files f_orc_out lm_order orc_extract_nbest");
System.out.println("num of args is " + args.length);
for (int i = 0; i < args.length; i++) {
System.out.println("arg is: " + args[i]);
@@ -123,12 +123,9 @@ public class OracleExtractionHG extends SplitHg {
String f_ref_files = args[2].trim();
String f_orc_out = args[3].trim();
int lm_order = Integer.parseInt(args[4].trim());
- boolean orc_extract_nbest = Boolean.valueOf(args[5].trim()); // oracle extraction from nbest or
- // hg
+ boolean orc_extract_nbest = Boolean.valueOf(args[5].trim()); // oracle extraction from nbest or hg
- // ??????????????????????????????????????
int baseline_lm_feat_id = 0;
- // ??????????????????????????????????????
KBestExtractor kbest_extractor = null;
int topN = 300;// TODO
@@ -280,7 +277,7 @@ public class OracleExtractionHG extends SplitHg {
DPStateOracle dps = compute_state(parent_item, cur_dt, l_ant_virtual_item, tbl_ref_ngrams,
do_local_ngram_clip, g_lm_order, avg_ref_len, bleu_score, tbl_suffix, tbl_prefix);
VirtualDeduction t_dt = new VirtualDeduction(cur_dt, l_ant_virtual_item, -bleu_score[0]);// cost:
- // -best_bleu
+ // -best_bleu
g_num_virtual_deductions++;
add_deduction(parent_item, virtual_item_sigs, t_dt, dps, true);
}
@@ -355,11 +352,11 @@ public class OracleExtractionHG extends SplitHg {
// ################## deductions *not* under "goal item"
HashMap<String, Integer> new_ngram_counts = new HashMap<String, Integer>();// new ngrams created
- // due to the
- // combination
+ // due to the
+ // combination
HashMap<String, Integer> old_ngram_counts = new HashMap<String, Integer>();// the ngram that has
- // already been
- // computed
+ // already been
+ // computed
int total_hyp_len = 0;
int[] num_ngram_match = new int[g_bleu_order];
int[] en_words = dt.getRule().getEnglish();
@@ -378,7 +375,7 @@ public class OracleExtractionHG extends SplitHg {
left_state_sequence = new ArrayList<Integer>();
right_state_sequence = new ArrayList<Integer>();
correct_lm_order = g_bleu_order; // if lm_order is smaller than g_bleu_order, we will get the
- // lm state by ourself
+ // lm state by ourself
}
// #### get left_state_sequence, right_state_sequence, total_hyp_len, num_ngram_match
@@ -438,7 +435,7 @@ public class OracleExtractionHG extends SplitHg {
// BUG: Whoa, is that an actual hard-coded ID in there? :)
if (final_count < 0) {
throw new RuntimeException("negative count for ngram: " + Vocabulary.word(11844)
- + "; new: " + new_ngram_counts.get(ngram) + "; old: " + old_ngram_counts.get(ngram));
+ + "; new: " + new_ngram_counts.get(ngram) + "; old: " + old_ngram_counts.get(ngram));
}
}
if (final_count > 0) { // TODO: not correct/global ngram clip
@@ -625,7 +622,13 @@ public class OracleExtractionHG extends SplitHg {
}
}
- /** accumulate ngram counts into tbl. */
+ /**
+ * accumulate ngram counts into tbl.
+ * @param tbl a {@link java.util.HashMap} which is used to store ngram counts
+ * @param order todo
+ * @param wrds an {@link java.util.ArrayList} containing {@link java.lang.Integer} word representations
+ * @param ignore_null_equiv_symbol set to true to skip some nGrams
+ */
public void get_ngrams(HashMap<String, Integer> tbl, int order, ArrayList<Integer> wrds,
boolean ignore_null_equiv_symbol) {
for (int i = 0; i < wrds.size(); i++) {
@@ -686,9 +689,9 @@ public class OracleExtractionHG extends SplitHg {
} else {
// BUG: use joshua.util.Regex.spaces.split(...)
num_ngram_match[ngram.split("\\s+").length - 1] += (Integer) hyp_ngram_tbl.get(ngram);// without
- // ngram
- // count
- // clipping
+ // ngram
+ // count
+ // clipping
}
}
}
@@ -772,7 +775,7 @@ public class OracleExtractionHG extends SplitHg {
}
}
}
-
+
@SuppressWarnings("unused")
public boolean contain_ngram(ArrayList<Integer> wrds, int start_pos, int end_pos) {
if (end_pos < start_pos)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/oracle/SplitHg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/SplitHg.java b/src/main/java/org/apache/joshua/oracle/SplitHg.java
index 054e9b7..9fcdd35 100644
--- a/src/main/java/org/apache/joshua/oracle/SplitHg.java
+++ b/src/main/java/org/apache/joshua/oracle/SplitHg.java
@@ -30,7 +30,7 @@ import org.apache.joshua.decoder.hypergraph.HyperGraph;
* This class implements general ways of splitting the hypergraph based on coarse-to-fine idea input
* is a hypergraph output is another hypergraph that has changed state structures.
*
- * @author Zhifei Li, <zh...@gmail.com> (Johns Hopkins University)
+ * @author Zhifei Li, zhifei.work@gmail.com (Johns Hopkins University)
*/
public abstract class SplitHg {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/oracle/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/package-info.java b/src/main/java/org/apache/joshua/oracle/package-info.java
new file mode 100644
index 0000000..ae14e82
--- /dev/null
+++ b/src/main/java/org/apache/joshua/oracle/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides for extracting the target string from a hypergraph
+ * that most closely matches a reference sentence. Much of the
+ * code in this package is based on descriptions in Adam
+ * Lopez's <a href="http://homepages.inf.ed.ac.uk/alopez/papers/adam.lopez.dissertation.pdf">
+ * doctoral thesis</a>.
+ */
+package org.apache.joshua.oracle;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/oracle/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/oracle/package.html b/src/main/java/org/apache/joshua/oracle/package.html
deleted file mode 100644
index 0f670d3..0000000
--- a/src/main/java/org/apache/joshua/oracle/package.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-Provides for extracting the target string from a hypergraph that most closely matches a reference sentence.
-
-<!--
-<h2>Related Documentation</h2>
-
-<ul>
- <li>Much of the code in this package is based on descriptions in Adam Lopez's <a href="http://homepages.inf.ed.ac.uk/alopez/papers/adam.lopez.dissertation.pdf">doctoral thesis</a>.
-</ul>
--->
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 5f42be6..28c47ed 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -51,6 +51,7 @@ public class ServerThread extends Thread implements HttpHandler {
*
* @param socket the socket representing the input/output streams
* @param decoder the configured decoder that handles performing translations
+ * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public ServerThread(Socket socket, Decoder decoder, JoshuaConfiguration joshuaConfiguration) {
this.joshuaConfiguration = joshuaConfiguration;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/server/TcpServer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/TcpServer.java b/src/main/java/org/apache/joshua/server/TcpServer.java
index 36dc957..d9663ee 100644
--- a/src/main/java/org/apache/joshua/server/TcpServer.java
+++ b/src/main/java/org/apache/joshua/server/TcpServer.java
@@ -41,9 +41,6 @@ public class TcpServer {
/**
* Listens on a port for new socket connections. Concurrently handles multiple socket connections.
- *
- * @param args configuration options
- * @throws IOException
*/
public void start() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/AlignedSubsampler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/AlignedSubsampler.java b/src/main/java/org/apache/joshua/subsample/AlignedSubsampler.java
index 88a0960..2915685 100644
--- a/src/main/java/org/apache/joshua/subsample/AlignedSubsampler.java
+++ b/src/main/java/org/apache/joshua/subsample/AlignedSubsampler.java
@@ -32,8 +32,8 @@ import org.apache.commons.cli.Options;
* A subsampler which takes in word-alignments as well as the F and E files. To remove redundant
* code, this class uses callback techniques in order to "override" the superclass methods.
*
- * @see joshua.subsample.Subsampler
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @see org.apache.joshua.subsample.Subsampler
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class AlignedSubsampler extends Subsampler {
@@ -53,6 +53,7 @@ public class AlignedSubsampler extends Subsampler {
* @param epath path to source E files
* @param apath path to source alignment files
* @param output basename for output files (will append extensions)
+ * @throws IOException if there is an error reading the input file(s)
*/
public void subsample(String filelist, float targetFtoERatio, String extf, String exte,
String exta, String fpath, String epath, String apath, String output) throws IOException {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/Alignment.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/Alignment.java b/src/main/java/org/apache/joshua/subsample/Alignment.java
index 2372bdd..6a11a5e 100644
--- a/src/main/java/org/apache/joshua/subsample/Alignment.java
+++ b/src/main/java/org/apache/joshua/subsample/Alignment.java
@@ -12,10 +12,8 @@ package org.apache.joshua.subsample;
* two-dimensional bit vector, though for our purposes we could just keep the original string around
* (which would save lots of time parsing and reconstructing the string).
*
- * @see joshua.corpus.alignment.Alignments
- *
* @author UMD (Jimmy Lin, Chris Dyer, et al.)
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class Alignment {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/BiCorpus.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/BiCorpus.java b/src/main/java/org/apache/joshua/subsample/BiCorpus.java
index c2959fa..31e51e3 100644
--- a/src/main/java/org/apache/joshua/subsample/BiCorpus.java
+++ b/src/main/java/org/apache/joshua/subsample/BiCorpus.java
@@ -25,28 +25,32 @@ import org.apache.joshua.corpus.Phrase;
* <code>subsample(String, float, PhraseWriter, BiCorpusFactory)</code> method.
*
* @author UMD (Jimmy Lin, Chris Dyer, et al.)
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class BiCorpus implements Iterable<PhrasePair> {
- // Making these final requires Java6, doesn't work in Java5
protected final String foreignFileName;
protected final String nativeFileName;
protected final String alignmentFileName;
- // ===============================================================
- // Constructors
- // ===============================================================
/**
* Constructor for unaligned BiCorpus.
+ * @param foreignFileName todo
+ * @param nativeFileName todo
+ * @throws IOException todo
*/
public BiCorpus(String foreignFileName, String nativeFileName) throws IOException {
this(foreignFileName, nativeFileName, null);
}
-
/**
* Constructor for word-aligned BiCorpus.
+ * @param foreignFileName todo
+ * @param nativeFileName todo
+ * @param alignmentFileName todo
+ * @throws IOException todo
+ * @throws IllegalArgumentException todo
+ * @throws IndexOutOfBoundsException todo
*/
public BiCorpus(String foreignFileName, String nativeFileName, String alignmentFileName)
throws IOException, IllegalArgumentException, IndexOutOfBoundsException {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/BiCorpusFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/BiCorpusFactory.java b/src/main/java/org/apache/joshua/subsample/BiCorpusFactory.java
index 226090d..eda3bf5 100644
--- a/src/main/java/org/apache/joshua/subsample/BiCorpusFactory.java
+++ b/src/main/java/org/apache/joshua/subsample/BiCorpusFactory.java
@@ -26,7 +26,7 @@ import java.io.IOException;
* {@link AlignedSubsampler} in order to "override" methods of {@link Subsampler}, minimizing code
* duplication.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class BiCorpusFactory {
@@ -51,17 +51,32 @@ public class BiCorpusFactory {
}
- /** Generate unaligned BiCorpus by default. */
+ /**
+ * Generate unaligned {@link org.apache.joshua.subsample.BiCorpus} by default.
+ * @param f todo
+ * @return an unaligned {@link org.apache.joshua.subsample.BiCorpus}
+ * @throws IOException if there is an error reading input file
+ */
public BiCorpus fromFiles(String f) throws IOException {
return this.unalignedFromFiles(f);
}
- /** Generate unaligned BiCorpus. */
+ /**
+ * Generate unaligned BiCorpus.
+ * @param f todo
+ * @return an unaligned {@link org.apache.joshua.subsample.BiCorpus}
+ * @throws IOException if there is an error reading input file
+ */
public BiCorpus unalignedFromFiles(String f) throws IOException {
return new BiCorpus(fpath + f + extf, epath + f + exte);
}
- /** Generate aligned BiCorpus. */
+ /**
+ * Generate aligned BiCorpus.
+ * @param f todo
+ * @return an aligned {@link org.apache.joshua.subsample.BiCorpus}
+ * @throws IOException if there is an error reading input file
+ */
public BiCorpus alignedFromFiles(String f) throws IOException {
return new BiCorpus(fpath + f + extf, epath + f + exte, apath + f + exta);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/PhrasePair.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/PhrasePair.java b/src/main/java/org/apache/joshua/subsample/PhrasePair.java
index 125cac2..b958540 100644
--- a/src/main/java/org/apache/joshua/subsample/PhrasePair.java
+++ b/src/main/java/org/apache/joshua/subsample/PhrasePair.java
@@ -15,7 +15,7 @@ import org.apache.joshua.corpus.Phrase;
* word-alignments. This is primarily for maintaining sentence-alignment.
*
* @author UMD (Jimmy Lin, Chris Dyer, et al.)
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class PhrasePair {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/PhraseReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/PhraseReader.java b/src/main/java/org/apache/joshua/subsample/PhraseReader.java
index f35288c..61136ba 100644
--- a/src/main/java/org/apache/joshua/subsample/PhraseReader.java
+++ b/src/main/java/org/apache/joshua/subsample/PhraseReader.java
@@ -17,7 +17,7 @@ import org.apache.joshua.corpus.BasicPhrase;
* Wrapper class to read in each line as a BasicPhrase.
*
* @author UMD (Jimmy Lin, Chris Dyer, et al.)
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class PhraseReader extends BufferedReader {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/PhraseWriter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/PhraseWriter.java b/src/main/java/org/apache/joshua/subsample/PhraseWriter.java
index cda99e6..11bbf08 100644
--- a/src/main/java/org/apache/joshua/subsample/PhraseWriter.java
+++ b/src/main/java/org/apache/joshua/subsample/PhraseWriter.java
@@ -27,7 +27,7 @@ import java.io.IOException;
* of this, to have zero-overhead while not duplicating code. Alas, Java's not that cool. The
* "final" could help on JIT at least.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
final public class PhraseWriter {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/Subsampler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/Subsampler.java b/src/main/java/org/apache/joshua/subsample/Subsampler.java
index d56c529..d3cfd89 100644
--- a/src/main/java/org/apache/joshua/subsample/Subsampler.java
+++ b/src/main/java/org/apache/joshua/subsample/Subsampler.java
@@ -27,7 +27,7 @@ import org.apache.joshua.corpus.Phrase;
* Papineni.
*
* @author UMD (Jimmy Lin, Chris Dyer, et al.)
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class Subsampler {
@@ -81,6 +81,7 @@ public class Subsampler {
* @param fpath path to source F files
* @param epath path to source E files
* @param output basename for output files (will append extensions)
+ * @throws IOException if there is an issue reading one of the input files
*/
public void subsample(String filelist, float targetFtoERatio, String extf, String exte,
String fpath, String epath, String output) throws IOException {
@@ -92,7 +93,13 @@ public class Subsampler {
}
/**
- * The main wrapper for the subsample worker. Closes the PhraseWriter before exiting.
+ * The main wrapper for the subsample worker. Closes the
+ * {@link org.apache.joshua.subsample.PhraseWriter} before exiting.
+ * @param filelist list of source files to subsample from
+ * @param targetFtoERatio goal for ratio of output F length to output E length
+ * @param out a {@link org.apache.joshua.subsample.PhraseWriter} to flush data to
+ * @param bcFactory used to generate a sentence-aligned {@link org.apache.joshua.subsample.BiCorpus}
+ * @throws IOException if there is an issue reading one of the input files
*/
protected void subsample(String filelist, float targetFtoERatio, PhraseWriter out,
BiCorpusFactory bcFactory) throws IOException {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/SubsamplerCLI.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/SubsamplerCLI.java b/src/main/java/org/apache/joshua/subsample/SubsamplerCLI.java
index 8303617..ecdb527 100644
--- a/src/main/java/org/apache/joshua/subsample/SubsamplerCLI.java
+++ b/src/main/java/org/apache/joshua/subsample/SubsamplerCLI.java
@@ -28,7 +28,7 @@ import org.apache.commons.cli.ParseException;
* one-use nature of subclasses of <code>SubsampleCLI</code>, they generally should be implemented
* as anonymous local classes.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
@SuppressWarnings("static-access")
@@ -69,6 +69,7 @@ public class SubsamplerCLI {
/**
* Return all Options. The HelpFormatter will print them in sorted order, so it doesn't matter
* when we add them. Subclasses should override this method by adding more options.
+ * @return all of the {@link org.apache.commons.cli.Options}
*/
public Options getCliOptions() {
return new Options().addOption(ot).addOption(otest).addOption(of).addOption(oe)
@@ -76,7 +77,9 @@ public class SubsamplerCLI {
}
/**
- * This method should be overridden to return the class used in runSubsampler.
+ * This method should be overridden to return the class used in
+ * {@link org.apache.joshua.subsample.SubsamplerCLI#runSubsampler(String[], int, int, float)}.
+ * @return the {@link org.apache.joshua.subsample.Subsampler} implementation
*/
public String getClassName() {
return Subsampler.class.getName();
@@ -85,6 +88,11 @@ public class SubsamplerCLI {
/**
* Callback to run the subsampler. This function needs access to the variables holding each
* Option, thus all this closure nonsense.
+ * @param testFiles a String array of test files
+ * @param maxN todo
+ * @param targetCount todo
+ * @param ratio todo
+ * @throws IOException if there is an issue whilst reading input files
*/
public void runSubsampler(String[] testFiles, int maxN, int targetCount, float ratio)
throws IOException {
@@ -95,6 +103,7 @@ public class SubsamplerCLI {
/**
* Non-static version of main so that we can define anonymous local classes to override or extend
* the above.
+ * @param args a String array of input options
*/
public void runMain(String[] args) {
Options o = this.getCliOptions();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/package-info.java b/src/main/java/org/apache/joshua/subsample/package-info.java
new file mode 100644
index 0000000..b7fe744
--- /dev/null
+++ b/src/main/java/org/apache/joshua/subsample/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides executables Subsampler and AlignedSubsampler,
+ * for subsampling from large training corpora based on a
+ * test corpus.
+ */
+package org.apache.joshua.subsample;
+
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/subsample/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/subsample/package.html b/src/main/java/org/apache/joshua/subsample/package.html
deleted file mode 100644
index bed439c..0000000
--- a/src/main/java/org/apache/joshua/subsample/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides executables Subsampler and AlignedSubsampler, for subsampling from large training corpora based on a test corpus.
-
-<!--
-<h2>Related Documentation</h2>
-
-<ul>
- <li>Much of the code in this package is based on .....
-</ul>
--->
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/GrammarPacker.java b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index d32ab53..517b744 100644
--- a/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@ -147,7 +147,7 @@ public class GrammarPacker {
/**
* Executes the packing.
*
- * @throws IOException
+ * @throws IOException if there is an error reading the grammar
*/
public void pack() throws IOException {
logger.info("Beginning exploration pass.");
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/tools/LabelPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/LabelPhrases.java b/src/main/java/org/apache/joshua/tools/LabelPhrases.java
index b4a31c7..9dbf249 100644
--- a/src/main/java/org/apache/joshua/tools/LabelPhrases.java
+++ b/src/main/java/org/apache/joshua/tools/LabelPhrases.java
@@ -39,10 +39,9 @@ public class LabelPhrases {
* Main method.
*
* @param args names of the two grammars to be compared
- * @throws IOException
- * @throws NumberFormatException
+ * @throws IOException if there is an error reading the input grammars
*/
- public static void main(String[] args) throws NumberFormatException, IOException {
+ public static void main(String[] args) throws IOException {
if (args.length < 1 || args[0].equals("-h")) {
System.err.println("Usage: " + LabelPhrases.class.toString());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/tools/TestSetFilter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/TestSetFilter.java b/src/main/java/org/apache/joshua/tools/TestSetFilter.java
index 6312266..1b3cf74 100644
--- a/src/main/java/org/apache/joshua/tools/TestSetFilter.java
+++ b/src/main/java/org/apache/joshua/tools/TestSetFilter.java
@@ -106,6 +106,8 @@ public class TestSetFilter {
/**
* Top-level filter, responsible for calling the fast or exact version. Takes the source side
* of a rule and determines whether there is any sentence in the test set that can match it.
+ * @param sourceSide an input source sentence
+ * @return true if is any sentence in the test set can match the source input
*/
public boolean inTestSet(String sourceSide) {
if (!sourceSide.equals(lastSourceSide)) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/ui/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/package-info.java b/src/main/java/org/apache/joshua/ui/package-info.java
new file mode 100644
index 0000000..1d69516
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides classes for visualizing parts of the translation process.
+ */
+package org.apache.joshua.ui;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/ui/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/package.html b/src/main/java/org/apache/joshua/ui/package.html
deleted file mode 100644
index 2dcc44e..0000000
--- a/src/main/java/org/apache/joshua/ui/package.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides classes for visualizing parts of the translation process.
-
-<!--
-<h2>Related Documentation</h2>
-
-<ul>
- <li>Much of the code in this package is based on .....
-</ul>
--->
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
index 10913f6..ee22b94 100644
--- a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
@@ -74,7 +74,8 @@ public class Browser {
static final Color[] dataSetColors = { Color.red, Color.orange, Color.blue, Color.green };
/**
- * @param args the paths to the source, reference, and n-best files
+ * @param argv the paths to the source, reference, and n-best files
+ * @throws IOException if there is an error reading from standard input
*/
public static void main(String[] argv) throws IOException {
String sourcePath = argv.length > 0 ? argv[0] : null;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
index 9eb586e..662544b 100644
--- a/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
@@ -48,232 +48,236 @@ import java.util.Comparator;
* Using a Tree the source sentence it was aligned to, we can create
* a DerivationTree object suitable for display.
*
- * @author Jonny Weese <jo...@cs.jhu.edu>
+ * @author Jonny Weese jonny@cs.jhu.edu
*/
public class Tree {
- /**
- * An array holding the label of each node of the tree, in depth-first order.
- * The label of a node means the NT label assigned to an internal node, or
- * the terminal symbol (English word) at a leaf.
- */
- private final String [] labels;
+ /**
+ * An array holding the label of each node of the tree, in depth-first order.
+ * The label of a node means the NT label assigned to an internal node, or
+ * the terminal symbol (English word) at a leaf.
+ */
+ private final String [] labels;
- /**
- * The number of children of each node of the tree, in depth-first order.
- */
- private final int [] numChildren;
+ /**
+ * The number of children of each node of the tree, in depth-first order.
+ */
+ private final int [] numChildren;
- /**
- * The smallest source-side index that each node covers, in depth-first order.
- * Note that we only have this information for internal nodes. For leaves,
- * this value will always be -1.
- */
- private final int [] sourceStartIndices;
+ /**
+ * The smallest source-side index that each node covers, in depth-first order.
+ * Note that we only have this information for internal nodes. For leaves,
+ * this value will always be -1.
+ */
+ private final int [] sourceStartIndices;
- /**
- * 1 + the largest source-side index that each node covers, in depth-first
- * order. Note that we only have this informaion for internal nodes. For
- * leaves, this value will always be -1.
- */
- private final int [] sourceEndIndices;
+ /**
+ * 1 + the largest source-side index that each node covers, in depth-first
+ * order. Note that we only have this informaion for internal nodes. For
+ * leaves, this value will always be -1.
+ */
+ private final int [] sourceEndIndices;
- /**
- * A pattern to match an aligned internal node and pull out its information.
- * This pattern matches:
- *
- * 1) start-of-string
- * 2) (
- * 3) an arbitrary sequence of non-whitespace characters (at least 1)
- * 4) {
- * 5) a decimal number
- * 6) -
- * 7) a decimal number
- * 8) }
- * 9) end-of-string
- *
- * That is, it matches something like "(FOO{32-55}". The string and two
- * decimal numbers (parts 3, 5, and 7) are captured in groups.
- */
- private static final Pattern NONTERMINAL_PATTERN =
- Pattern.compile("^\\((\\S+)\\{(\\d+)-(\\d+)\\}$");
+ /**
+ * A pattern to match an aligned internal node and pull out its information.
+ * This pattern matches:
+ *
+ * 1) start-of-string
+ * 2) (
+ * 3) an arbitrary sequence of non-whitespace characters (at least 1)
+ * 4) {
+ * 5) a decimal number
+ * 6) -
+ * 7) a decimal number
+ * 8) }
+ * 9) end-of-string
+ *
+ * That is, it matches something like "(FOO{32-55}". The string and two
+ * decimal numbers (parts 3, 5, and 7) are captured in groups.
+ */
+ private static final Pattern NONTERMINAL_PATTERN =
+ Pattern.compile("^\\((\\S+)\\{(\\d+)-(\\d+)\\}$");
- /**
- * Creates a Tree object from an input string in Penn treebank format with
- * source alignment annotations.
- */
- public Tree(String s) {
- final String [] tokens = s.replaceAll("\\)", " )").split("\\s+");
- int numNodes = 0;
- for (String t : tokens) {
- if (!t.equals(")")) {
- numNodes++;
- }
- }
- labels = new String[numNodes];
- numChildren = new int[numNodes];
- sourceStartIndices = new int[numNodes];
- sourceEndIndices = new int[numNodes];
- try {
- initialize(tokens);
- } catch (Exception e) {
- // This will catch most formatting errors.
- throw new IllegalArgumentException(
- String.format("couldn't create tree from string: \"%s\"", s),
- e);
- }
- }
+ /**
+ * Creates a Tree object from an input string in Penn treebank format with
+ * source alignment annotations.
+ * @param s an input string in Penn treebank format with source alignment annotations
+ */
+ public Tree(String s) {
+ final String [] tokens = s.replaceAll("\\)", " )").split("\\s+");
+ int numNodes = 0;
+ for (String t : tokens) {
+ if (!t.equals(")")) {
+ numNodes++;
+ }
+ }
+ labels = new String[numNodes];
+ numChildren = new int[numNodes];
+ sourceStartIndices = new int[numNodes];
+ sourceEndIndices = new int[numNodes];
+ try {
+ initialize(tokens);
+ } catch (Exception e) {
+ // This will catch most formatting errors.
+ throw new IllegalArgumentException(
+ String.format("couldn't create tree from string: \"%s\"", s),
+ e);
+ }
+ }
- private void initialize(String [] tokens) {
- final Stack<Integer> stack = new Stack<Integer>();
- int nodeIndex = 0;
- for (String token : tokens) {
- final Matcher matcher = NONTERMINAL_PATTERN.matcher(token);
- if (matcher.matches()) {
- // new non-terminal node
- labels[nodeIndex] = matcher.group(1);
- sourceStartIndices[nodeIndex] = Integer.parseInt(matcher.group(2));
- sourceEndIndices[nodeIndex] = Integer.parseInt(matcher.group(3));
- stack.push(nodeIndex);
- nodeIndex++;
- } else if (token.equals(")")) {
- // finished a subtree
- stack.pop();
- if (stack.empty()) {
- break;
- } else {
- numChildren[stack.peek()]++;
- }
- } else {
- // otherwise, it's a new leaf node
- labels[nodeIndex] = token;
- sourceStartIndices[nodeIndex] = -1;
- sourceEndIndices[nodeIndex] = -1;
- numChildren[stack.peek()]++;
- nodeIndex++;
- }
- }
- if (!stack.empty()) {
- // Not enough close-parentheses at the end of the tree.
- throw new IllegalArgumentException();
- }
- }
+ private void initialize(String [] tokens) {
+ final Stack<Integer> stack = new Stack<Integer>();
+ int nodeIndex = 0;
+ for (String token : tokens) {
+ final Matcher matcher = NONTERMINAL_PATTERN.matcher(token);
+ if (matcher.matches()) {
+ // new non-terminal node
+ labels[nodeIndex] = matcher.group(1);
+ sourceStartIndices[nodeIndex] = Integer.parseInt(matcher.group(2));
+ sourceEndIndices[nodeIndex] = Integer.parseInt(matcher.group(3));
+ stack.push(nodeIndex);
+ nodeIndex++;
+ } else if (token.equals(")")) {
+ // finished a subtree
+ stack.pop();
+ if (stack.empty()) {
+ break;
+ } else {
+ numChildren[stack.peek()]++;
+ }
+ } else {
+ // otherwise, it's a new leaf node
+ labels[nodeIndex] = token;
+ sourceStartIndices[nodeIndex] = -1;
+ sourceEndIndices[nodeIndex] = -1;
+ numChildren[stack.peek()]++;
+ nodeIndex++;
+ }
+ }
+ if (!stack.empty()) {
+ // Not enough close-parentheses at the end of the tree.
+ throw new IllegalArgumentException();
+ }
+ }
- /**
- * Return the number of nodes in this Tree.
- */
- public int size() {
- return labels.length;
- }
+ /**
+ * Return the number of nodes in this Tree.
+ * @return the number of nodes in this Tree
+ */
+ public int size() {
+ return labels.length;
+ }
- /**
- * Get the root Node of this Tree.
- */
- public Node root() {
- return new Node(0);
- }
+ /**
+ * Get the root Node of this Tree.
+ * @return the Node present at the toom the this Tree
+ */
+ public Node root() {
+ return new Node(0);
+ }
- private List<Integer> childIndices(int index) {
- List<Integer> result = new ArrayList<Integer>();
- int remainingChildren = numChildren[index];
- int childIndex = index + 1;
- while (remainingChildren > 0) {
- result.add(childIndex);
- childIndex = nextSiblingIndex(childIndex);
- remainingChildren--;
- }
- return result;
- }
+ private List<Integer> childIndices(int index) {
+ List<Integer> result = new ArrayList<Integer>();
+ int remainingChildren = numChildren[index];
+ int childIndex = index + 1;
+ while (remainingChildren > 0) {
+ result.add(childIndex);
+ childIndex = nextSiblingIndex(childIndex);
+ remainingChildren--;
+ }
+ return result;
+ }
- private int nextSiblingIndex(int index) {
- int result = index + 1;
- int remainingChildren = numChildren[index];
- for (int i = 0; i < remainingChildren; i++) {
- result = nextSiblingIndex(result);
- }
- return result;
- }
+ private int nextSiblingIndex(int index) {
+ int result = index + 1;
+ int remainingChildren = numChildren[index];
+ for (int i = 0; i < remainingChildren; i++) {
+ result = nextSiblingIndex(result);
+ }
+ return result;
+ }
- public String yield() {
- String result = "";
- for (int i = 0; i < labels.length; i++) {
- if (numChildren[i] == 0) {
- if (!result.equals("")) {
- result += " ";
- }
- result += labels[i];
- }
- }
- return result;
- }
+ public String yield() {
+ String result = "";
+ for (int i = 0; i < labels.length; i++) {
+ if (numChildren[i] == 0) {
+ if (!result.equals("")) {
+ result += " ";
+ }
+ result += labels[i];
+ }
+ }
+ return result;
+ }
- @Override
- public String toString() {
- return root().toString();
- }
+ @Override
+ public String toString() {
+ return root().toString();
+ }
- /**
- * A class representing the Nodes of a tree.
- */
- public class Node {
+ /**
+ * A class representing the Nodes of a tree.
+ */
+ public class Node {
- /**
- * The index into the Tree class's internal arrays.
- */
- private final int index;
+ /**
+ * The index into the Tree class's internal arrays.
+ */
+ private final int index;
- private Node(int i) {
- index = i;
- }
+ private Node(int i) {
+ index = i;
+ }
- /**
- * Get the label for this node. If the node is internal to the tree, its
- * label is the non-terminal label assigned to it. If it is a leaf node,
- * the label is the English word at the leaf.
- */
- public String label() {
- return labels[index];
- }
+ /**
+ * Get the label for this node. If the node is internal to the tree, its
+ * label is the non-terminal label assigned to it. If it is a leaf node,
+ * the label is the English word at the leaf.
+ * @return a string representing the label for this node
+ */
+ public String label() {
+ return labels[index];
+ }
- public boolean isLeaf() {
- return numChildren[index] == 0;
- }
+ public boolean isLeaf() {
+ return numChildren[index] == 0;
+ }
- public int sourceStartIndex() {
- return sourceStartIndices[index];
- }
+ public int sourceStartIndex() {
+ return sourceStartIndices[index];
+ }
- public int sourceEndIndex() {
- return sourceEndIndices[index];
- }
+ public int sourceEndIndex() {
+ return sourceEndIndices[index];
+ }
- public List<Node> children() {
- List<Node> result = new ArrayList<Node>();
- for (int j : childIndices(index)) {
- result.add(new Node(j));
- }
- return result;
- }
+ public List<Node> children() {
+ List<Node> result = new ArrayList<Node>();
+ for (int j : childIndices(index)) {
+ result.add(new Node(j));
+ }
+ return result;
+ }
- @Override
- public String toString() {
- if (isLeaf()) {
- return label();
- }
- String result = String.format("(%s{%d-%d}",
- label(),
- sourceStartIndex(),
- sourceEndIndex());
- for (Node c : children()) {
- result += String.format(" %s", c);
- }
- return result + ")";
- }
- }
+ @Override
+ public String toString() {
+ if (isLeaf()) {
+ return label();
+ }
+ String result = String.format("(%s{%d-%d}",
+ label(),
+ sourceStartIndex(),
+ sourceEndIndex());
+ for (Node c : children()) {
+ result += String.format(" %s", c);
+ }
+ return result + ")";
+ }
+ }
- public static class NodeSourceStartComparator implements Comparator<Node> {
- public int compare(Node a, Node b) {
- return a.sourceStartIndex() - b.sourceStartIndex();
- }
- }
+ public static class NodeSourceStartComparator implements Comparator<Node> {
+ public int compare(Node a, Node b) {
+ return a.sourceStartIndex() - b.sourceStartIndex();
+ }
+ }
}