You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:34 UTC
[39/94] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
index 57ec0a2..9748ba0 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
@@ -25,8 +25,8 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
/**
* Grammar is a class for wrapping a trie of TrieGrammar in order to store holistic metadata.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public interface Grammar {
@@ -45,7 +45,7 @@ public interface Grammar {
* <p>
* Cube-pruning requires that the grammar be sorted based on the latest feature functions.
*
- * @param weights The model weights.
+ * @param models list of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
void sortGrammar(List<FeatureFunction> models);
@@ -73,6 +73,7 @@ public interface Grammar {
* @param pathLength Length of the input path in a source input lattice. If a source input phrase
* is used instead of a lattice, this value will likely be ignored by the underlying
* implementation, but would normally be defined as <code>endIndex-startIndex</code>
+ * @return true if there is a rule for this span
*/
boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength);
@@ -93,6 +94,12 @@ public interface Grammar {
/**
* This is used to construct a manual rule supported from outside the grammar, but the owner
* should be the same as the grammar. Rule ID will the same as OOVRuleId, and no lattice cost
+ * @param lhs todo
+ * @param sourceWords todo
+ * @param targetWords todo
+ * @param scores todo
+ * @param arity todo
+ * @return the constructed {@link org.apache.joshua.decoder.ff.tm.Rule}
*/
@Deprecated
Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity);
@@ -100,7 +107,7 @@ public interface Grammar {
/**
* Dump the grammar to disk.
*
- * @param file
+ * @param file the file path to write to
*/
@Deprecated
void writeGrammarOnDisk(String file);
@@ -115,26 +122,28 @@ public interface Grammar {
/**
* Return the grammar's owner.
+ * @return grammar owner
*/
int getOwner();
/**
- * Return the maximum source phrase length (terminals + nonterminals).
+ * Return the maximum source phrase length (terminals + nonterminals)
+ * @return the maximum source phrase length
*/
int getMaxSourcePhraseLength();
/**
* Add an OOV rule for the requested word for the grammar.
*
- * @param word
- * @param featureFunctions
+ * @param word input word to add rules to
+ * @param featureFunctions a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
void addOOVRules(int word, List<FeatureFunction> featureFunctions);
/**
* Add a rule to the grammar.
*
- * @param Rule the rule
+ * @param rule the {@link org.apache.joshua.decoder.ff.tm.Rule}
*/
void addRule(Rule rule);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
index e340a85..5fc2576 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
@@ -97,7 +97,7 @@ public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iter
* For correct behavior <code>close</code> must be called on every GrammarReader, however this
* code attempts to avoid resource leaks.
*
- * @see joshua.util.io.LineReader
+ * @see org.apache.joshua.util.io.LineReader
*/
@Override
protected void finalize() throws Throwable {
@@ -173,7 +173,7 @@ public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iter
/**
* Removes square brackets (and index, if present) from nonterminal id
- * @param tokenID
+ * @param tokenID the int ID to clean
* @return cleaned ID
*/
public static int cleanNonTerminal(int tokenID) {
@@ -183,7 +183,7 @@ public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iter
/**
* Removes square brackets (and index, if present) from nonterminal id
- * @param token
+ * @param token the string ID to clean
* @return cleaned token
*/
public static String cleanNonTerminal(String token) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
index 812e669..e3aa999 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
/**
* this class implements MonolingualRule
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate: 2010-02-10 09:59:38 -0600 (Wed, 10 Feb 2010) $
*/
public class MonolingualRule extends Rule {
@@ -90,9 +90,9 @@ public class MonolingualRule extends Rule {
* @param featureScores Feature value scores for the rule.
* @param arity Number of nonterminals in the source language
* right-hand side.
- * @param owner
- * @param latticeCost
- * @param ruleID
+ * @param owner todo
+ * @param latticeCost todo
+ * @param ruleID todo
*/
public MonolingualRule(int lhs, int[] sourceRhs, float[] featureScores, int arity, int owner, float latticeCost, int ruleID) {
this.lhs = lhs;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/PhraseRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/PhraseRule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/PhraseRule.java
index 1b8b871..f9097f8 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/PhraseRule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/PhraseRule.java
@@ -22,17 +22,17 @@ import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
/***
- * A class for reading in rules from a Moses phrase table. Most of the conversion work is done
- * in {@link joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes prepending every
+ * <p>A class for reading in rules from a Moses phrase table. Most of the conversion work is done
+ * in {@link org.apache.joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes prepending every
* rule with a nonterminal, so that the phrase-based decoder can assume the same hypergraph
* format as the hierarchical decoder (by pretending to be a strictly left-branching grammar and
* dispensing with the notion of coverage spans). However, prepending the nonterminals means all
* the alignments are off by 1. We do not want to fix those when reading in due to the expense,
- * so instead we use this rule which adjust the alignments on the fly.
+ * so instead we use this rule which adjust the alignments on the fly.</p>
*
- * Also, we only convert the Moses dense features on the fly, via this class.
+ * <p>Also, we only convert the Moses dense features on the fly, via this class.</p>
*
- * TODO: this class should also be responsible for prepending the nonterminals.
+ * <p>TODO: this class should also be responsible for prepending the nonterminals.</p>
*
* @author Matt Post
*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 06d4153..4db3972 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -42,16 +42,11 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* Note that not all features need to be negative log probs, but you should be aware that they
* will be negated, so if you want a positive count, it should come in as negative.
*
- * @author Zhifei Li, <zh...@gmail.com>
- */
-
-
-/**
* Normally, the feature score in the rule should be *cost* (i.e., -LogP), so that the feature
* weight should be positive
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class Rule implements Comparator<Rule>, Comparable<Rule> {
@@ -100,7 +95,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* @param targetRhs Target language right-hand side of the rule.
* @param sparseFeatures Feature value scores for the rule.
* @param arity Number of nonterminals in the source language right-hand side.
- * @param owner
+ * @param owner todo
*/
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, int owner) {
this.lhs = lhs;
@@ -114,7 +109,13 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
}
/**
- * Constructor used by PackedGrammar's sortRules().
+ * Constructor used by PackedGrammar's sortRules()
+ * @param lhs todo
+ * @param sourceRhs todo
+ * @param targetRhs todo
+ * @param features todo
+ * @param arity todo
+ * @param owner todo
*/
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, int owner) {
this.lhs = lhs;
@@ -130,6 +131,11 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Constructor used for SamtFormatReader and GrammarBuilderWalkerFunction's getRuleWithSpans()
* Owner set to -1
+ * @param lhs todo
+ * @param sourceRhs todo
+ * @param targetRhs todo
+ * @param sparseFeatures todo
+ * @param arity todo
*/
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
@@ -137,6 +143,12 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Constructor used for addOOVRules(), HieroFormatReader and PhraseRule.
+ * @param lhs todo
+ * @param sourceRhs todo
+ * @param targetRhs todo
+ * @param sparseFeatures todo
+ * @param arity todo
+ * @param alignment todo
*/
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, String alignment) {
this(lhs, sourceRhs, targetRhs, sparseFeatures, arity);
@@ -283,6 +295,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* This function returns the dense (phrasal) features discovered when the rule was loaded. Dense
* features are the list of unlabeled features that preceded labeled ones. They can also be
* specified as labeled features of the form "tm_OWNER_INDEX", but the former format is preferred.
+ *
+ * @return the {@link org.apache.joshua.decoder.ff.FeatureVector} for this rule
*/
public FeatureVector getFeatureVector() {
return featuresSupplier.get();
@@ -389,7 +403,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Returns a version of the rule suitable for reading in from a text file.
*
- * @return
+ * @return string version of the rule
*/
public String textFormat() {
StringBuffer sb = new StringBuffer();
@@ -425,6 +439,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Returns an alignment as a sequence of integers. The integers at positions i and i+1 are paired,
* with position i indexing the source and i+1 the target.
+ *
+ * @return a byte[] from the {@link com.google.common.base.Supplier}
*/
public byte[] getAlignment() {
return this.alignmentSupplier.get();
@@ -468,7 +484,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Return the French (source) nonterminals as list of Strings
*
- * @return
+ * @return a list of strings
*/
public int[] getForeignNonTerminals() {
int[] nts = new int[getArity()];
@@ -481,6 +497,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Returns an array of size getArity() containing the source indeces of non terminals.
+ *
+ * @return an array of size getArity() containing the source indeces of non terminals
*/
public int[] getNonTerminalSourcePositions() {
int[] nonTerminalPositions = new int[getArity()];
@@ -495,6 +513,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Parses the Alignment byte[] into a Map from target to (possibly a list of) source positions.
* Used by the WordAlignmentExtractor.
+ *
+ * @return a {@link java.util.Map} of alignments
*/
public Map<Integer, List<Integer>> getAlignmentMap() {
byte[] alignmentArray = getAlignment();
@@ -515,7 +535,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Return the English (target) nonterminals as list of Strings
*
- * @return
+ * @return list of strings
*/
public int[] getEnglishNonTerminals() {
int[] nts = new int[getArity()];
@@ -570,8 +590,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Matches the string representation of the rule's source side against a sentence
*
- * @param sentence
- * @return
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return true if there is a match
*/
public boolean matches(Sentence sentence) {
boolean match = getPattern().matcher(sentence.fullSource()).find();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/RuleCollection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/RuleCollection.java b/src/main/java/org/apache/joshua/decoder/ff/tm/RuleCollection.java
index f527878..a45c41b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/RuleCollection.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/RuleCollection.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
*
* @author Zhifei Li
* @author Lane Schwartz
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public interface RuleCollection {
@@ -37,18 +37,22 @@ public interface RuleCollection {
* Returns true if the rules are sorted. This is used to allow rules to be sorted in an amortized
* fashion; rather than sorting all trie nodes when the grammar is originally loaded, we sort them
* only as the decoder actually needs them.
+ * @return true if rules are sorted
*/
boolean isSorted();
/**
- * This returns a list of the rules, sorting them if necessary.
+ * This returns a list of the rules, sorting them if necessary.
*
- * Implementations of this function should be synchronized.
+ * Implementations of this function should be synchronized.
+ * @param models {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @return the {@link java.util.List} of sorted rules
*/
List<Rule> getSortedRules(List<FeatureFunction> models);
/**
* Get the list of rules. There are no guarantees about whether they're sorted or not.
+ * @return the {@link java.util.List} of rules, there is no gurantee they will be sorted
*/
List<Rule> getRules();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
index 0d1875b..ce04e15 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
@@ -32,7 +32,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* trie, a subset of the original trie, that only contains trie paths that are reachable from
* traversals of the current sentence.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
private AbstractGrammar baseGrammar;
@@ -44,8 +44,8 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
* Construct a new sentence-filtered grammar. The main work is done in the enclosed trie (obtained
* from the base grammar, which contains the complete grammar).
*
- * @param baseGrammar
- * @param sentence
+ * @param baseGrammar a new {@link org.apache.joshua.decoder.ff.tm.AbstractGrammar} to populate
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
*/
SentenceFilteredGrammar(AbstractGrammar baseGrammar, Sentence sentence) {
super(baseGrammar.joshuaConfiguration);
@@ -90,8 +90,8 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
/**
* A convenience function that counts the number of rules in a grammar's trie.
*
- * @param node
- * @return
+ * @param node the {@link org.apache.joshua.decoder.ff.tm.Trie} implementation for which to count rules
+ * @return the number of rules
*/
public int getNumRules(Trie node) {
int numRules = 0;
@@ -144,6 +144,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
* subsequent ones would have to consume just one word. We then just have to record in the
* recursive call whether the last traversal was a nonterminal or not.
*
+ * @param unfilteredTrieRoot todo
* @return the root of the filtered trie
*/
private SentenceFilteredTrie filter(Trie unfilteredTrieRoot) {
@@ -246,6 +247,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
* source side of each rule collection against the input sentence. Failed matches are discarded,
* and trie nodes extending from that position need not be explored.
*
+ * @param unfilteredTrie todo
* @return the root of the filtered trie if any rules were retained, otherwise null
*/
@SuppressWarnings("unused")
@@ -283,7 +285,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
* Implements a filtered trie, by sitting on top of a base trie and annotating nodes that match
* the given input sentence.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
public class SentenceFilteredTrie implements Trie {
@@ -297,8 +299,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
/**
* Constructor.
*
- * @param trieRoot
- * @param source
+ * @param unfilteredTrieNode todo
*/
public SentenceFilteredTrie(Trie unfilteredTrieNode) {
this.unfilteredTrieNode = unfilteredTrieNode;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/Trie.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Trie.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Trie.java
index d2c54d9..51d2dd8 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Trie.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Trie.java
@@ -25,15 +25,15 @@ import java.util.Iterator;
/**
* An interface for trie-like data structures.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public interface Trie {
/**
* Traverse one ply further down the trie. If there is no match, the result is null.
*
- * @param wordID
+ * @param wordID input word ID
* @return Child node of this trie
*/
Trie match(int wordID);
@@ -53,30 +53,30 @@ public interface Trie {
* null.
*
* @return A list of extended <code>Trie</code> nodes if this node has extensions,
- * <code>null<code>
+ * <code>null</code>
* otherwise
*/
Collection<? extends Trie> getExtensions();
/**
- * If the trie node has extensions, get a list of their labels.
+ * If the trie node has extensions, get a {@link java.util.HashMap} of their labels.
*
- * @return
+ * @return a {@link java.util.HashMap} pf node extensions
*/
HashMap<Integer,? extends Trie> getChildren();
/**
* Returns an iterator over the trie node's extensions with terminal labels.
*
- * @return
+ * @return the {@link java.util.Iterator} created over the trie node's extensions with terminal labels
*/
Iterator<Integer> getTerminalExtensionIterator();
/**
* Returns an iterator over the trie node's extensions with nonterminal labels.
*
- * @return
+ * @return the {@link java.util.Iterator} created over the trie node's extensions with terminal labels
*/
Iterator<Integer> getNonterminalExtensionIterator();
@@ -100,6 +100,8 @@ public interface Trie {
* true.</li>
* <li>The collection must be sorted (at least as used by TMGrammar)</li>
* </ol>
+ * @return a {@link org.apache.joshua.decoder.ff.tm.RuleCollection} representing the rules
+ * at the current node/state
*/
RuleCollection getRuleCollection();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/format/HieroFormatReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/format/HieroFormatReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/format/HieroFormatReader.java
index a9507ad..04a206a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/format/HieroFormatReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/format/HieroFormatReader.java
@@ -25,8 +25,7 @@ import org.apache.joshua.decoder.ff.tm.Rule;
/**
* This class implements reading files in the format defined by David Chiang for Hiero.
*
- * @author Unknown
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class HieroFormatReader extends GrammarReader<Rule> {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/format/PhraseFormatReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/format/PhraseFormatReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/format/PhraseFormatReader.java
index 4d37803..870683a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/format/PhraseFormatReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/format/PhraseFormatReader.java
@@ -23,17 +23,17 @@ import org.apache.joshua.decoder.ff.tm.PhraseRule;
import org.apache.joshua.util.io.LineReader;
/***
- * This class reads in the Moses phrase table format, with support for the source and target side,
+ * <p>This class reads in the Moses phrase table format, with support for the source and target side,
* list of features, and word alignments. It works by simply casting the phrase-based rules to
- * left-branching hierarchical rules and passing them on to its parent class, {@HieroFormatReader}.
+ * left-branching hierarchical rules and passing them on to its parent class, {@link org.apache.joshua.decoder.ff.tm.format.HieroFormatReader}.</p>
*
- * There is also a tool to convert the grammars directly, so that they can be suitably packed. Usage:
+ * <p>There is also a tool to convert the grammars directly, so that they can be suitably packed. Usage:</p>
*
* <pre>
- * cat PHRASE_TABLE | java -cp $JOSHUA/class joshua.decoder.ff.tm.format.PhraseFormatReader > grammar
+ * cat PHRASE_TABLE | java -cp $JOSHUA/class org.apache.joshua.decoder.ff.tm.format.PhraseFormatReader > grammar
* </pre>
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
@@ -116,7 +116,7 @@ public class PhraseFormatReader extends HieroFormatReader {
/**
* Converts a Moses phrase table to a Joshua grammar.
*
- * @param args
+ * @param args arguments required to do the conversion
*/
public static void main(String[] args) {
PhraseFormatReader reader = new PhraseFormatReader();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index 6ad6d50..f1078c4 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -44,8 +44,8 @@ import org.apache.joshua.util.FormatUtils;
* french sides so far (2) A HashMap of next-layer trie nodes, the next french word used as the key
* in HashMap
*
- * @author Zhifei Li <zh...@gmail.com>
- * @author Matt Post <post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class MemoryBasedBatchGrammar extends AbstractGrammar {
@@ -254,8 +254,8 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
/***
* Takes an input word and creates an OOV rule in the current grammar for that word.
*
- * @param sourceWord
- * @param featureFunctions
+ * @param sourceWord integer representation of word
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
@Override
public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
@@ -288,7 +288,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
/**
* Adds a default set of glue rules.
*
- * @param featureFunctions
+ * @param featureFunctions an {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void addGlueRules(ArrayList<FeatureFunction> featureFunctions) {
HieroFormatReader reader = new HieroFormatReader();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedRuleBin.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedRuleBin.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedRuleBin.java
index 2ab5843..f91df1e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedRuleBin.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedRuleBin.java
@@ -24,7 +24,7 @@ import org.apache.joshua.decoder.ff.tm.Rule;
/**
* Stores a collection of all rules with the same french side (and thus same arity).
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class MemoryBasedRuleBin extends BasicRuleCollection {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedTrie.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedTrie.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedTrie.java
index c14e54e..998688a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedTrie.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedTrie.java
@@ -26,7 +26,7 @@ import org.apache.joshua.decoder.ff.tm.RuleCollection;
import org.apache.joshua.decoder.ff.tm.Trie;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class MemoryBasedTrie implements Trie {
MemoryBasedRuleBin ruleBin = null;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package-info.java
new file mode 100644
index 0000000..695a0a4
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Provides implementations of hierarchical phrase-based translation grammars.
+ */
+package org.apache.joshua.decoder.ff.tm.hash_based;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package.html b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package.html
deleted file mode 100644
index 88ded5d..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/package.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides implementations of hierarchical phrase-based translation grammars.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/tm/package-info.java
new file mode 100644
index 0000000..b804db6
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Defines interfaces and provides infrastructure for hierarchical
+ * phrase-based translation grammars.
+ */
+package org.apache.joshua.decoder.ff.tm;
+
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/package.html b/src/main/java/org/apache/joshua/decoder/ff/tm/package.html
deleted file mode 100644
index bf99594..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/package.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Defines interfaces and provides infrastructure for hierarchical phrase-based translation grammars.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 77fb233..a90684e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -192,6 +192,7 @@ public class PackedGrammar extends AbstractGrammar {
/**
* Computes the MD5 checksum of the vocabulary file.
* Can be used for comparing vocabularies across multiple packedGrammars.
+ * @return the computed checksum
*/
public String computeVocabularyChecksum() {
MessageDigest md;
@@ -801,7 +802,7 @@ public class PackedGrammar extends AbstractGrammar {
* to then put a nonterminal on the source and target sides to treat the phrase pairs like
* left-branching rules, which is how Joshua deals with phrase decoding.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
public final class PackedPhrasePair extends PackedRule {
@@ -856,7 +857,7 @@ public class PackedGrammar extends AbstractGrammar {
/**
* Take the English phrase of the underlying rule and prepend an [X].
*
- * @return
+ * @return the augmented phrase
*/
@Override
public int[] getEnglish() {
@@ -866,7 +867,7 @@ public class PackedGrammar extends AbstractGrammar {
/**
* Take the French phrase of the underlying rule and prepend an [X].
*
- * @return
+ * @return the augmented French phrase
*/
@Override
public int[] getFrench() {
@@ -880,7 +881,7 @@ public class PackedGrammar extends AbstractGrammar {
/**
* Similarly the alignment array needs to be shifted over by one.
*
- * @return
+ * @return the byte[] alignment
*/
@Override
public byte[] getAlignment() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
index 8054cda..c6d03a6 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
@@ -37,28 +37,29 @@ import org.apache.joshua.decoder.ff.tm.Trie;
import org.apache.joshua.decoder.ff.tm.hash_based.ExtensionIterator;
/**
- * SliceAggregatingTrie collapses multiple tries
- * with the same source root (i.e. tries from multiple packed slices).
+ * <p>SliceAggregatingTrie collapses multiple tries
+ * with the same source root (i.e. tries from multiple packed slices).</p>
*
- * Consider the example below.
+ * <p>Consider the example below.
* Without SliceAggregatingTries, the following grammar rules could have only
- * partitioned by splitting rule lists when the first word of SOURCE changes. (">" markers).
+ * partitioned by splitting rule lists when the first word of SOURCE changes. (">" markers).</p>
*
- * Using a SliceAggregatingTrie allows splitting at changes of second SOURCE words (">>" marker).
+ * <p>Using a SliceAggregatingTrie allows splitting at changes of second SOURCE words (">>" marker).</p>
*
+ * <pre>
* EXAMPLE: (LHS ||| SOURCE ||| TARGET)
* [X] ||| - ||| -
- * >
+ * >
* [X] ||| [X] ||| [X]
- * >>
+ * >>
* [X] ||| [X] a ||| [X] A
* [X] ||| [X] a ||| [X] A
- * >>
+ * >>
* [X] ||| [X] b ||| [X] B
- * >
+ * >
* [X] ||| u ||| u
- *
- * A SliceAggregatingTrie node behaves just like a regular Trie node but subsumes a list of extensions/children.
+ * </pre>
+ * <p>A SliceAggregatingTrie node behaves just like a regular Trie node but subsumes a list of extensions/children.
* This class hides the complexity of having multiple tries with the same root
* from nodes one level up.
* Similar to PackedRoot, it maintains a lookup table of children's
@@ -70,7 +71,7 @@ import org.apache.joshua.decoder.ff.tm.hash_based.ExtensionIterator;
* must be found in exactly one of the subtries.
* (!) This assumption relies on the sort order of the packed grammar.
* If the grammar was incorrectly sorted and then packed, construction
- * of SliceAggregatingTrie nodes fails.
+ * of SliceAggregatingTrie nodes fails.</p>
*
* @author fhieber
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java b/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
index 6a4bed6..948001f 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
@@ -24,13 +24,17 @@ import java.util.ListIterator;
/**
* Class that represents a one to (possibly) many alignment from target to
* source. Extends from a LinkedList. Instances of this class are updated by the
- * WordAlignmentExtractor.substitute() method. The <shiftBy> method shifts the
+ * WordAlignmentExtractor.substitute() method.
+ * The {@link org.apache.joshua.decoder.hypergraph.AlignedSourceTokens#shiftBy(int, int)}
+ * method shifts the
* elements in the list by a scalar to reflect substitutions of non terminals in
* the rule. if indexes are final, i.e. the point instance has been substituted
- * into a parent WordAlignmentState once, <isFinal> is set to true. This is
+ * into a parent WordAlignmentState once,
+ * {@link org.apache.joshua.decoder.hypergraph.AlignedSourceTokens#isFinal} is set to true.
+ * This is
* necessary since the final source index of a point is known once we have
* substituted in a complete WordAlignmentState into its parent. If the index in
- * the list is a non terminal, <isNonTerminal> = true
+ * the list is a non terminal, {@link org.apache.joshua.decoder.hypergraph.AlignedSourceTokens#isNonTerminal} = true
*/
class AlignedSourceTokens extends LinkedList<Integer> {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java b/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
index 1338414..1aad06f 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
@@ -27,7 +27,7 @@ import org.apache.joshua.corpus.Span;
* Uses {@link ForestWalker} to visit one {@link HGNode} per span of the chart. No guarantees are
* provided as to which HGNode will be visited in each span.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
@@ -42,8 +42,9 @@ public class AllSpansWalker {
* This function wraps a {@link ForestWalker}, preventing calls to its walker function for all but
* the first node reached for each span.
*
- * @param node
- * @param walker
+ * @param node the {@link org.apache.joshua.decoder.hypergraph.HGNode} we wish to walk
+ * @param walker the {@link org.apache.joshua.decoder.hypergraph.WalkerFunction}
+ * implementation to do the walking
*/
public void walk(HGNode node, final WalkerFunction walker) {
new ForestWalker().walk(node, new org.apache.joshua.decoder.hypergraph.WalkerFunction() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java b/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
index b429176..c6dae77 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
@@ -25,7 +25,7 @@ import java.util.HashMap;
* to use the functions here, one need to extend the class to provide a way to calculate the
* transitionLogP based on feature set
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate$
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
index d9cd4a8..695cad5 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
@@ -28,8 +28,8 @@ import org.apache.joshua.decoder.ff.state_maintenance.DPState;
/**
* this class implement Hypergraph node (i.e., HGNode); also known as Item in parsing.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Juri Ganitkevitch, <ju...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Juri Ganitkevitch, juri@cs.jhu.edu
*/
// TODO: handle the case that the Hypergraph only maintains the one-best tree
@@ -93,6 +93,8 @@ public class HGNode {
* Adds the hyperedge to the list of incoming hyperedges (i.e., ways to form this node), creating
* the list if necessary. We then update the cache of the best incoming hyperedge via a call to
* the (obscurely named) semiringPlus().
+ * @param hyperEdge the {@link org.apache.joshua.decoder.hypergraph.HyperEdge} to add
+ * to the list of incoming hyperedges
*/
public void addHyperedgeInNode(HyperEdge hyperEdge) {
if (hyperEdge != null) {
@@ -106,6 +108,8 @@ public class HGNode {
/**
* Convenience function to add a list of hyperedges one at a time.
+ * @param hyperedges a {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HyperEdge}'s
+ * to add to the current HGNode.
*/
public void addHyperedgesInNode(List<HyperEdge> hyperedges) {
for (HyperEdge hyperEdge : hyperedges)
@@ -114,6 +118,7 @@ public class HGNode {
/**
* Updates the cache of the best incoming hyperedge.
+ * @param hyperEdge an incoming {{@link org.apache.joshua.decoder.hypergraph.HyperEdge}
*/
public void semiringPlus(HyperEdge hyperEdge) {
if (null == bestHyperedge || bestHyperedge.getBestDerivationScore() < hyperEdge.getBestDerivationScore()) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
index 128ee68..d7bcc4d 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
@@ -26,8 +26,8 @@ import org.apache.joshua.decoder.ff.tm.Rule;
/**
* this class implement Hyperedge
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class HyperEdge {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
index e921027..516b347 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
@@ -39,7 +39,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* Note: to seed the kbest extraction, each deduction should have the best_cost properly set. We do
* not require any list being sorted
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class HyperGraph {
@@ -141,7 +141,8 @@ public class HyperGraph {
/**
* Dump the hypergraph to the specified file.
*
- * @param fileName
+ * @param fileName local file path
+ * @param model {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void dump(String fileName, List<FeatureFunction> model) {
try ( PrintWriter out = new PrintWriter(fileName, "UTF-8") ) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
index ff44a25..27f5525 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
@@ -26,7 +26,7 @@ import org.apache.joshua.corpus.Vocabulary;
* during the pruning process, many Item/Deductions may not be explored at all due to the early-stop
* in pruning_deduction
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate$
*/
public class HyperGraphPruning extends TrivialInsideOutside {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
index 324cf4c..9e7cbbb 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
@@ -47,49 +47,49 @@ import org.apache.joshua.util.FormatUtils;
import cern.colt.Arrays;
/**
- * This class implements lazy k-best extraction on a hyper-graph.
+ * <p>This class implements lazy k-best extraction on a hyper-graph.</p>
*
- * K-best extraction over hypergraphs is a little hairy, but is best understood in the following
+ * <p>K-best extraction over hypergraphs is a little hairy, but is best understood in the following
* manner. Imagine a hypergraph, which is composed of nodes connected by hyperedges. A hyperedge has
* exactly one parent node and 1 or more tail nodes, corresponding to the rank of the rule that gave
- * rise to the hyperedge. Each node has 1 or more incoming hyperedges.
+ * rise to the hyperedge. Each node has 1 or more incoming hyperedges.</p>
*
- * K-best extraction works in the following manner. A derivation is a set of nodes and hyperedges
+ * <p>K-best extraction works in the following manner. A derivation is a set of nodes and hyperedges
* that leads from the root node down and exactly covers the source-side sentence. To define a
* derivation, we start at the root node, choose one of its incoming hyperedges, and then recurse to
- * the tail (or antecedent) nodes of that hyperedge, where we continually make the same decision.
+ * the tail (or antecedent) nodes of that hyperedge, where we continually make the same decision.</p>
*
- * Each hypernode has its hyperedges sorted according to their model score. To get the best
+ * <p>Each hypernode has its hyperedges sorted according to their model score. To get the best
* (Viterbi) derivation, we simply recursively follow the best hyperedge coming in to each
- * hypernode.
+ * hypernode.</p>
*
- * How do we get the second-best derivation? It is defined by changing exactly one of the decisions
+ * <p>How do we get the second-best derivation? It is defined by changing exactly one of the decisions
* about which hyperedge to follow in the recursion. Somewhere, we take the second-best. Similarly,
* the third-best derivation makes a single change from the second-best: either making another
* (differnt) second-best choice somewhere along the 1-best derivation, or taking the third-best
- * choice at the same spot where the second-best derivation took the second-best choice. And so on.
+ * choice at the same spot where the second-best derivation took the second-best choice. And so on.</p>
*
- * This class uses two classes that encode the necessary meta-information. The first is the
+ * <p>This class uses two classes that encode the necessary meta-information. The first is the
* DerivationState class. It roughly corresponds to a hyperedge, and records, for each of that
* hyperedge's tail nodes, which-best to take. So for a hyperedge with three tail nodes, the 1-best
* derivation will be (1,1,1), the second-best will be one of (2,1,1), (1,2,1), or (1,1,2), the
- * third best will be one of
+ * third best will be one of</p>
*
- * (3,1,1), (2,2,1), (1,1,3)
+ * <code>(3,1,1), (2,2,1), (1,1,3)</code>
*
- * and so on.
+ * <p>and so on.</p>
*
- * The configuration parameter `output-format` controls what exactly is extracted from the forest.
+ * <p>The configuration parameter `output-format` controls what exactly is extracted from the forest.
* See documentation for that below. Note that Joshua does not store individual feature values while
* decoding, but only the cost of each edge (in the form of a float). Therefore, if you request
* the features values (`%f` in `output-format`), the feature functions must be replayed, which
- * is expensive.
+ * is expensive.</p>
*
- * The configuration parameter `top-n` controls how many items are returned. If this is set to 0,
- * k-best extraction should be turned off entirely.
+ * <p>The configuration parameter `top-n` controls how many items are returned. If this is set to 0,
+ * k-best extraction should be turned off entirely.</p>
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class KBestExtractor {
private final JoshuaConfiguration joshuaConfiguration;
@@ -162,7 +162,12 @@ public class KBestExtractor {
* Compute the string that is output from the decoder, using the "output-format" config file
* parameter as a template.
*
- * You may need to reset_state() before you call this function for the first time.
+ * You may need to {@link org.apache.joshua.decoder.hypergraph.KBestExtractor#resetState()}
+ * before you call this function for the first time.
+ *
+ * @param node todo
+ * @param k todo
+ * @return todo
*/
public String getKthHyp(HGNode node, int k) {
@@ -228,9 +233,9 @@ public class KBestExtractor {
* If requested, projects source-side lettercase to target, and appends the alignment from
* to the source-side sentence in ||s.
*
- * @param hypothesis
- * @param state
- * @return
+ * @param hypothesis todo
+ * @param state todo
+ * @return source-side lettercase to target, and appends the alignment from to the source-side sentence in ||s
*/
private String maybeProjectCase(String hypothesis, DerivationState state) {
String output = hypothesis;
@@ -260,6 +265,9 @@ public class KBestExtractor {
/**
* Convenience function for k-best extraction that prints to STDOUT.
+ * @param hg the {@link org.apache.joshua.decoder.hypergraph.HyperGraph} from which to extract
+ * @param topN the number of k
+ * @throws IOException if there is an error writing the extraction
*/
public void lazyKBestExtractOnHG(HyperGraph hg, int topN) throws IOException {
lazyKBestExtractOnHG(hg, topN, new BufferedWriter(new OutputStreamWriter(System.out)));
@@ -279,7 +287,7 @@ public class KBestExtractor {
* @param hg the hypergraph to extract from
* @param topN how many to extract
* @param out object to write to
- * @throws IOException
+ * @throws IOException if there is an error writing the extraction
*/
public void lazyKBestExtractOnHG(HyperGraph hg, int topN, BufferedWriter out) throws IOException {
@@ -308,11 +316,13 @@ public class KBestExtractor {
}
/**
- * Returns the VirtualNode corresponding to an HGNode. If no such VirtualNode exists, it is
- * created.
+ * Returns the {@link org.apache.joshua.decoder.hypergraph.KBestExtractor.VirtualNode}
+ * corresponding to an {@link org.apache.joshua.decoder.hypergraph.HGNode}.
+ * If no such VirtualNode exists, it is created.
*
- * @param hgnode
- * @return the corresponding VirtualNode
+ * @param hgnode from which we wish to create a
+ * {@link org.apache.joshua.decoder.hypergraph.KBestExtractor.VirtualNode}
+ * @return the corresponding {@link org.apache.joshua.decoder.hypergraph.KBestExtractor.VirtualNode}
*/
private VirtualNode getVirtualNode(HGNode hgnode) {
VirtualNode virtualNode = virtualNodesTable.get(hgnode);
@@ -330,7 +340,6 @@ public class KBestExtractor {
* k-best derivations from that point on, retaining the derivations computed so far and a priority
* queue of candidates.
*/
-
private class VirtualNode {
// The node being annotated.
@@ -357,7 +366,7 @@ public class KBestExtractor {
/**
* This returns a DerivationState corresponding to the kth-best derivation rooted at this node.
*
- * @param kbestExtractor
+ * @param kbestExtractor todo
* @param k (indexed from one)
* @return the k-th best (1-indexed) hypothesis, or null if there are no more.
*/
@@ -651,7 +660,7 @@ public class KBestExtractor {
* assumption that the total number of words in the hypothesis scales linearly with the input
* sentence span.
*
- * @return
+ * @return float representing {@link org.apache.joshua.decoder.BLEU} score
*/
public float computeBLEU() {
if (stats == null) {
@@ -678,7 +687,7 @@ public class KBestExtractor {
* Returns the model cost. This is obtained by subtracting off the incorporated BLEU score (if
* used).
*
- * @return
+ * @return float representing model cost
*/
public float getModelCost() {
return this.cost;
@@ -687,7 +696,7 @@ public class KBestExtractor {
/**
* Returns the model cost plus the BLEU score.
*
- * @return
+ * @return float representing model cost plus the BLEU score
*/
public float getCost() {
return cost - weights.getSparse("BLEU") * bleu;
@@ -725,6 +734,7 @@ public class KBestExtractor {
/**
* DerivationState objects are unique to each VirtualNode, so the unique identifying information
* only need contain the edge position and the ranks.
+ * @return hashof the edge position and ranks
*/
public int hashCode() {
int hash = edgePos;
@@ -738,6 +748,8 @@ public class KBestExtractor {
/**
* Visits every state in the derivation in a depth-first order.
+ * @param visitor todo
+ * @return todo
*/
private DerivationVisitor visit(DerivationVisitor visitor) {
return visit(visitor, 0, 0);
@@ -808,9 +820,9 @@ public class KBestExtractor {
* function looks up the VirtualNode corresponding to the HGNode pointed to by the edge's
* {tailNodeIndex}th tail node.
*
- * @param edge
- * @param tailNodeIndex
- * @return
+ * @param edge todo
+ * @param tailNodeIndex todo
+ * @return todo
*/
public DerivationState getChildDerivationState(HyperEdge edge, int tailNodeIndex) {
HGNode child = edge.getTailNodes().get(tailNodeIndex);
@@ -840,7 +852,7 @@ public class KBestExtractor {
* way to do different things to the tree (e.g., extract its words, assemble a derivation, and so
* on) without having to rewrite the node-visiting code.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public interface DerivationVisitor {
/**
@@ -953,7 +965,7 @@ public class KBestExtractor {
* Assembles an informative version of the derivation. Each rule is printed as it is encountered.
* Don't try to parse this output; make something that writes out JSON or something, instead.
*
- * @author Matt Post <post@cs.jhu.edu
+ * @author Matt Post post@cs.jhu.edu
*/
public class DerivationExtractor implements DerivationVisitor {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/TrivialInsideOutside.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/TrivialInsideOutside.java b/src/main/java/org/apache/joshua/decoder/hypergraph/TrivialInsideOutside.java
index 4f1d950..67be0c1 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/TrivialInsideOutside.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/TrivialInsideOutside.java
@@ -19,7 +19,7 @@
package org.apache.joshua.decoder.hypergraph;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate$
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
index b6e7166..734e0aa 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
@@ -28,8 +28,8 @@ import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class ViterbiExtractor {
@@ -61,25 +61,33 @@ public class ViterbiExtractor {
}
}
}
-
+
public static void viterbiWalk(final HGNode node, final WalkerFunction walker) {
viterbiWalk(node, walker, 0);
}
-
+
/**
* Returns the Viterbi translation of the Hypergraph (includes sentence markers)
+ * @param hg a {@link org.apache.joshua.decoder.hypergraph.HyperGraph} we wish to
+ * obtain a Viterbi translation for
+ * @return a String Viterbi translation
*/
public static String getViterbiString(final HyperGraph hg) {
if (hg == null)
return "";
-
+
final WalkerFunction viterbiOutputStringWalker = new OutputStringExtractor(false);
viterbiWalk(hg.goalNode, viterbiOutputStringWalker);
return viterbiOutputStringWalker.toString();
}
-
+
/**
* Returns the Viterbi feature vector
+ * @param hg a {@link org.apache.joshua.decoder.hypergraph.HyperGraph} we wish to
+ * obtain a Viterbi features for
+ * @param featureFunctions a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return a Viterbi {@link org.apache.joshua.decoder.ff.FeatureVector}
*/
public static FeatureVector getViterbiFeatures(
final HyperGraph hg,
@@ -87,38 +95,46 @@ public class ViterbiExtractor {
final Sentence sentence) {
if (hg == null)
return new FeatureVector();
-
+
final FeatureVectorExtractor extractor = new FeatureVectorExtractor(
featureFunctions, sentence);
- viterbiWalk(hg.goalNode, extractor);
- return extractor.getFeatures();
+ viterbiWalk(hg.goalNode, extractor);
+ return extractor.getFeatures();
}
-
+
/**
* Returns the Viterbi Word Alignments as String.
+ * @param hg input {@link org.apache.joshua.decoder.hypergraph.HyperGraph}
+ * @return the Viterbi Word Alignments as String
*/
public static String getViterbiWordAlignments(final HyperGraph hg) {
if (hg == null)
return "";
-
+
final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
viterbiWalk(hg.goalNode, wordAlignmentWalker);
return wordAlignmentWalker.toString();
}
-
+
/**
* Returns the Viterbi Word Alignments as list of lists (target-side).
+ * @param hg input {@link org.apache.joshua.decoder.hypergraph.HyperGraph}
+ * @return a {@link java.util.List} of Viterbi Word Alignments
*/
public static List<List<Integer>> getViterbiWordAlignmentList(final HyperGraph hg) {
if (hg == null)
return emptyList();
-
+
final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
viterbiWalk(hg.goalNode, wordAlignmentWalker);
return wordAlignmentWalker.getFinalWordAlignments();
}
-
- /** find 1best hypergraph */
+
+ /**
+ * find 1best hypergraph
+ * @param hg_in input {@link org.apache.joshua.decoder.hypergraph.HyperGraph}
+ * @return new best {@link org.apache.joshua.decoder.hypergraph.HyperGraph}
+ */
public static HyperGraph getViterbiTreeHG(HyperGraph hg_in) {
HyperGraph res =
new HyperGraph(cloneNodeWithBestHyperedge(hg_in.goalNode), -1, -1, null);
@@ -152,7 +168,7 @@ public class ViterbiExtractor {
List<HGNode> antNodes = null;
if (null != inEdge.getTailNodes()) {
antNodes = new ArrayList<HGNode>(inEdge.getTailNodes());// l_ant_items will be changed in
- // get_1best_tree_item
+ // get_1best_tree_item
}
HyperEdge res =
new HyperEdge(inEdge.getRule(), inEdge.getBestDerivationScore(), inEdge.getTransitionLogP(false),
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/WalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/WalkerFunction.java b/src/main/java/org/apache/joshua/decoder/hypergraph/WalkerFunction.java
index 67bcfc2..811521c 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/WalkerFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/WalkerFunction.java
@@ -28,6 +28,9 @@ public interface WalkerFunction {
* Function that is applied to node at tail node index nodeIndex.
* nodeIndex indicates the index of node in the list of tailnodes for the
* outgoing edge.
+ * @param node the {{@link org.apache.joshua.decoder.hypergraph.HGNode} we
+ * wish to apply some Walker Function to.
+ * @param nodeIndex node in the list of tailnodes for the outgoing edge
*/
void apply(HGNode node, int nodeIndex);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
index 98937c4..04d0897 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
@@ -96,6 +96,7 @@ public class WordAlignmentExtractor implements WalkerFunction, DerivationVisitor
/**
* Final word alignment without sentence markers
* or empty list if stack is empty.
+ * @return a final alignment list
*/
public List<List<Integer>> getFinalWordAlignments() {
if (stack.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
index 39700d2..5140652 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
@@ -83,6 +83,7 @@ public class WordAlignmentState {
/**
* if there are no more NonTerminals to substitute,
* this state is said to be complete
+ * @return true if complete
*/
public boolean isComplete() {
return numNT == 0;
@@ -91,6 +92,7 @@ public class WordAlignmentState {
/**
* builds the final alignment string in the standard alignment format: src -
* trg. Sorted by trg indexes. Disregards the sentence markers.
+ * @return result string
*/
public String toFinalString() {
StringBuilder sb = new StringBuilder();
@@ -111,6 +113,7 @@ public class WordAlignmentState {
* builds the final alignment list.
* each entry in the list corresponds to a list of aligned source tokens.
* First and last item in trgPoints is skipped.
+ * @return a final alignment list
*/
public List<List<Integer>> toFinalList() {
assert (isComplete() == true);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/package-info.java b/src/main/java/org/apache/joshua/decoder/hypergraph/package-info.java
new file mode 100644
index 0000000..05e66e2
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Provides implementations of hypergraph data structures
+ * and related algorithms used in extracting translation
+ * results in hierarchical phrase-based translation.
+ */
+package org.apache.joshua.decoder.hypergraph;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/hypergraph/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/package.html b/src/main/java/org/apache/joshua/decoder/hypergraph/package.html
deleted file mode 100644
index 6fdd043..0000000
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/package.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides implementations of hypergraph data structures and related algorithms
-used in extracting translation results in hierarchical phrase-based translation.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/io/DeNormalize.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/DeNormalize.java b/src/main/java/org/apache/joshua/decoder/io/DeNormalize.java
index a90a7d2..cc6e839 100644
--- a/src/main/java/org/apache/joshua/decoder/io/DeNormalize.java
+++ b/src/main/java/org/apache/joshua/decoder/io/DeNormalize.java
@@ -26,18 +26,17 @@ import java.util.regex.Pattern;
* <UL>
* <LI>Capitalize the first character in the string</LI>
* <LI>Detokenize</LI>
- * <UL>
* <LI>Delete whitespace in front of periods and commas</LI>
* <LI>Join contractions</LI>
* <LI>Capitalize name titles (Mr Ms Miss Dr etc.)</LI>
- * <LI>TODO: Handle surrounding characters ([{<"''">}])</LI>
+ * <LI>TODO: Handle surrounding characters ([{<"''">}])</LI>
* <LI>TODO: Join multi-period abbreviations (e.g. M.Phil. i.e.)</LI>
* <LI>TODO: Handle ambiguities like "st.", which can be an abbreviation for both "Saint" and
* "street"</LI>
* <LI>TODO: Capitalize both the title and the name of a person, e.g. Mr. Morton (named entities
* should be demarcated).</LI>
* </UL>
- * </UL> <bold>N.B.</bold> These methods all assume that every translation result that will be
+ * <b>N.B.</b> These methods all assume that every translation result that will be
* denormalized has the following format:
* <UL>
* <LI>There is only one space between every pair of tokens</LI>
@@ -45,7 +44,6 @@ import java.util.regex.Pattern;
* <LI>There is no whitespace after the final token</LI>
* <LI>Standard spaces are the only type of whitespace</LI>
* </UL>
- * </UL>
*/
public class DeNormalize {
@@ -53,8 +51,8 @@ public class DeNormalize {
/**
* Apply all the denormalization methods to the normalized input line.
*
- * @param normalized
- * @return
+ * @param normalized a normalized input line
+ * @return the denormalized String
*/
public static String processSingleLine(String normalized) {
// The order in which the methods are applied could matter in some situations. E.g., a token to
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java b/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
index 32978e8..432f1fb 100644
--- a/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
+++ b/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
@@ -34,7 +34,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* expects plain-text input, which can be plain sentences or PLF-encoded lattices. If
* '-input-type json' is passed to the decoder, it will instead read JSON objects from the input
* stream, with the following format:
- *
+ * <pre>
* {
* "data": {
* "translations": [
@@ -44,8 +44,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* ]
* }
* }
- *
- * @author Matt Post <po...@cs.jhu.edu>
+ * </pre>
+ * @author Matt Post post@cs.jhu.edu
* @author orluke
*/
public class TranslationRequestStream {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/package-info.java b/src/main/java/org/apache/joshua/decoder/package-info.java
new file mode 100644
index 0000000..af1127b
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides infrastructure and wrapper code used relevant to
+ * hierarchical phrase-based decoding for statistical machine
+ * translation. This package does not include an implementation
+ * of any actual decoding algorithm. Rather, such code is in
+ * child packages of this package.
+ */
+package org.apache.joshua.decoder;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/package.html b/src/main/java/org/apache/joshua/decoder/package.html
deleted file mode 100644
index fda252e..0000000
--- a/src/main/java/org/apache/joshua/decoder/package.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides infrastructure and wrapper code used relevant to
-hierarchical phrase-based decoding for statistical machine translation.
-<p>
-This package does not include an implementation of any actual decoding algorithm.
-Rather, such code is in child packages of this package.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 69e1447..ee8a2a9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -170,7 +170,7 @@ public class Candidate {
}
/**
- * This returns the target side {@link Phrase}, which is a {@link Rule} object. This is just a
+ * This returns the target side {@link org.apache.joshua.corpus.Phrase}, which is a {@link org.apache.joshua.decoder.ff.tm.Rule} object. This is just a
* convenience function that works by returning the phrase indexed in ranks[1].
*
* @return the phrase at position ranks[1]
@@ -194,7 +194,7 @@ public class Candidate {
/**
* Returns the bit vector of this hypothesis. The bit vector is computed by ORing the coverage
* vector of the tail node (hypothesis) and the source span of phrases in this candidate.
- * @return
+ * @return the bit vector of this hypothesis
*/
public Coverage getCoverage() {
Coverage cov = new Coverage(getHypothesis().getCoverage());
@@ -203,9 +203,9 @@ public class Candidate {
}
/**
- * Sets the result of a candidate (should just be moved to the constructor).
+ * Sets the result of a candidate (TODO should just be moved to the constructor).
*
- * @param result
+ * @param result todo
*/
public void setResult(ComputeNodeResult result) {
this.result = result;
@@ -221,7 +221,7 @@ public class Candidate {
* The Future Cost item should probably just be implemented as another kind of feature function,
* but it would require some reworking of that interface, which isn't worth it.
*
- * @return
+ * @return the sum of two costs: the HypoState cost + the transition cost
*/
public float score() {
return getHypothesis().getScore() + future_delta + result.getTransitionCost();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java b/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
index 9d8feb1..2c674fc 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
@@ -29,7 +29,7 @@ import org.apache.joshua.corpus.Span;
*/
public class Coverage {
-
+
// The index of the first uncovered word
private int firstZero;
@@ -45,12 +45,12 @@ public class Coverage {
firstZero = 0;
bits = new BitSet(INITIAL_LENGTH);
}
-
+
public Coverage(int firstZero) {
this.firstZero = firstZero;
bits = new BitSet(INITIAL_LENGTH);
}
-
+
/**
* Pretty-prints the coverage vector, making a guess about the length
*/
@@ -69,8 +69,7 @@ public class Coverage {
/**
* Initialize a coverage vector from another Coverage vector, creating a separate object.
*
- * @param firstZero
- * @param bits
+ * @param other an existing coverage vector from which to create a new coverage vector
*/
public Coverage(Coverage other) {
this.firstZero = other.firstZero;
@@ -81,14 +80,14 @@ public class Coverage {
* Turns on all bits from position start to position (end - 1), that is, in the range [start .. end).
* This is done relative to the current coverage vector, of course, which may not start at 0.
*
- * @param begin
- * @param end
+ * @param begin bits at start position
+ * @param end bits at end position (end - 1)
*/
public void set(int begin, int end) {
assert compatible(begin, end);
-// StringBuffer sb = new StringBuffer();
-// sb.append(String.format("SET(%d,%d) %s", begin, end, this));
+ // StringBuffer sb = new StringBuffer();
+ // sb.append(String.format("SET(%d,%d) %s", begin, end, this));
if (begin == firstZero) {
// A concatenation.
@@ -106,12 +105,13 @@ public class Coverage {
bits.or(pattern(begin, end));
}
-// sb.append(String.format(" -> %s", this));
-// System.err.println(sb);
+ // sb.append(String.format(" -> %s", this));
+ // System.err.println(sb);
}
-
+
/**
* Convenience function.
+ * @param span todo
*/
public final void set(Span span) {
set(span.start, span.end);
@@ -134,7 +134,7 @@ public class Coverage {
}
return false;
}
-
+
/**
* Returns the source sentence index of the first uncovered word.
*
@@ -155,7 +155,7 @@ public class Coverage {
* Find the left bound of the gap in which the phrase [begin, ...) sits.
*
* @param begin the start index of the phrase being applied.
- * @return
+ * @return todo
*/
public int leftOpening(int begin) {
for (int i = begin - firstZero; i > 0; --i) {
@@ -173,12 +173,16 @@ public class Coverage {
/**
* LeftOpen() and RightOpen() find the larger gap in which a new source phrase pair sits.
* When using a phrase pair covering (begin, end), the pair
- *
+ * <pre>
* (LeftOpen(begin), RightOpen(end, sentence_length))
+ * </pre>
*
* provides this gap.
*
* Finds the right bound of the enclosing gap, or the end of sentence, whichever is less.
+ * @param end end of phrase pair
+ * @param sentenceLength length of sentence
+ * @return todo
*/
public int rightOpening(int end, int sentenceLength) {
for (int i = end - firstZero; i < Math.min(64, sentenceLength - firstZero); i++) {
@@ -188,7 +192,7 @@ public class Coverage {
}
return sentenceLength;
}
-
+
/**
* Creates a bit vector with the same offset as the current coverage vector, flipping on
* bits begin..end.
@@ -198,7 +202,7 @@ public class Coverage {
* @return a bit vector (relative) with positions [begin..end) on
*/
public BitSet pattern(int begin, int end) {
-// System.err.println(String.format("pattern(%d,%d) %d %s %s", begin, end, firstZero, begin >= firstZero, toString()));
+ // System.err.println(String.format("pattern(%d,%d) %d %s %s", begin, end, firstZero, begin >= firstZero, toString()));
assert begin >= firstZero;
BitSet pattern = new BitSet(INITIAL_LENGTH);
pattern.set(begin - firstZero, end - firstZero);
@@ -208,12 +212,12 @@ public class Coverage {
/**
* Returns the underlying coverage bits.
*
- * @return
+ * @return {@link java.util.BitSet} vector of bits
*/
public BitSet getCoverage() {
return bits;
}
-
+
@Override
public boolean equals(Object obj) {
if (obj instanceof Coverage) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 352a23e..aa24390 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -22,18 +22,19 @@ import org.apache.joshua.decoder.Decoder;
import org.apache.joshua.util.ChartSpan;
public class Future {
-
+
// Square matrix with half the values ignored.
private ChartSpan<Float> entries;
private int sentlen;
-
+
/**
* Computes bottom-up the best way to cover all spans of the input sentence, using the phrases
- * that have been assembled in a {@link PhraseChart}. Requires that there be a translation at least
- * for every word (which can be accomplished with a pass-through grammar).
+ * that have been assembled in a {@link org.apache.joshua.decoder.phrase.PhraseChart}.
+ * Requires that there be a translation at least for every word (which can be
+ * accomplished with a pass-through grammar).
*
- * @param chart
+ * @param chart an input {@link org.apache.joshua.decoder.phrase.PhraseChart}
*/
public Future(PhraseChart chart) {
@@ -50,7 +51,7 @@ public class Future {
// Insert phrases
int max_end = Math.min(begin + chart.MaxSourcePhraseLength(), chart.SentenceLength());
for (int end = begin + 1; end <= max_end; end++) {
-
+
// Moses doesn't include the cost of applying </s>, so force it to zero
if (begin == sentlen - 1 && end == sentlen)
setEntry(begin, end, 0.0f);
@@ -61,7 +62,7 @@ public class Future {
}
}
}
-
+
// All the phrases are in, now do minimum dynamic programming. Lengths 0 and 1 were already handled above.
for (int length = 2; length <= chart.SentenceLength(); length++) {
for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
@@ -70,40 +71,44 @@ public class Future {
}
}
}
-
+
if (Decoder.VERBOSE >= 3) {
for (int i = 1; i < chart.SentenceLength(); i++)
for (int j = i + 1; j < chart.SentenceLength(); j++)
System.err.println(String.format("future cost from %d to %d is %.3f", i-1, j-2, getEntry(i, j)));
}
}
-
+
public float Full() {
-// System.err.println("Future::Full(): " + Entry(1, sentlen));
+ // System.err.println("Future::Full(): " + Entry(1, sentlen));
return getEntry(1, sentlen);
}
/**
* Calculate change in rest cost when the given coverage is to be covered.
- */
+ * @param coverage input {@link org.apache.joshua.decoder.phrase.Coverage} vector
+ * @param begin word at which to begin within a sentence
+ * @param end word at which to end within a sentence
+ * @return a float value representing a {@link Future} entry
+ */
public float Change(Coverage coverage, int begin, int end) {
int left = coverage.leftOpening(begin);
int right = coverage.rightOpening(end, sentlen);
-// System.err.println(String.format("Future::Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", coverage, begin, end, left, right,
-// Entry(left, begin), Entry(end, right), Entry(left, right)));
+ // System.err.println(String.format("Future::Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", coverage, begin, end, left, right,
+ // Entry(left, begin), Entry(end, right), Entry(left, right)));
return getEntry(left, begin) + getEntry(end, right) - getEntry(left, right);
}
-
+
private float getEntry(int begin, int end) {
assert end >= begin;
assert end < this.sentlen;
return entries.get(begin, end);
}
-
+
private void setEntry(int begin, int end, float value) {
assert end >= begin;
assert end < this.sentlen;
-// System.err.println(String.format("future cost from %d to %d is %.5f", begin, end, value));
+ // System.err.println(String.format("future cost from %d to %d is %.5f", begin, end, value));
entries.set(begin, end, value);
}
}