You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/18 05:31:50 UTC
[3/3] incubator-joshua git commit: JOSHUA-252 Make it possible to use
Maven to build Joshua
JOSHUA-252 Make it possible to use Maven to build Joshua
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1fc0590e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1fc0590e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1fc0590e
Branch: refs/heads/JOSHUA-252
Commit: 1fc0590e9f6c77c2ff0efbe6453ddd39364141ad
Parents: ae47ca1
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Tue May 17 22:36:57 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Tue May 17 22:36:57 2016 -0700
----------------------------------------------------------------------
.../org/apache/joshua/corpus/BasicPhrase.java | 2 +-
.../java/org/apache/joshua/corpus/Phrase.java | 3 +
.../org/apache/joshua/corpus/SymbolTable.java | 3 +-
.../org/apache/joshua/corpus/Vocabulary.java | 2 +-
.../java/org/apache/joshua/decoder/BLEU.java | 35 ++++---
.../java/org/apache/joshua/decoder/Decoder.java | 9 +-
.../apache/joshua/decoder/DecoderThread.java | 4 +-
.../joshua/decoder/JoshuaConfiguration.java | 13 ++-
.../apache/joshua/decoder/JoshuaDecoder.java | 6 +-
.../joshua/decoder/NbestMinRiskReranker.java | 7 +-
.../joshua/decoder/StructuredTranslation.java | 5 +-
.../java/org/apache/joshua/decoder/Support.java | 2 +-
.../org/apache/joshua/decoder/Translation.java | 74 ++++++-------
.../org/apache/joshua/decoder/Translations.java | 6 +-
.../joshua/decoder/chart_parser/Chart.java | 6 +-
.../decoder/chart_parser/ComputeNodeResult.java | 18 +++-
.../chart_parser/ManualConstraintsHandler.java | 13 ++-
.../joshua/decoder/chart_parser/SourcePath.java | 2 +-
.../decoder/chart_parser/StateConstraint.java | 4 +-
.../joshua/decoder/ff/ArityPhrasePenalty.java | 4 +-
.../joshua/decoder/ff/FeatureFunction.java | 103 ++++++++++---------
.../apache/joshua/decoder/ff/FeatureVector.java | 32 ++++--
.../apache/joshua/decoder/ff/OOVPenalty.java | 2 +-
.../apache/joshua/decoder/ff/PhraseModel.java | 4 +-
.../apache/joshua/decoder/ff/SourcePathFF.java | 4 +-
.../apache/joshua/decoder/ff/StatefulFF.java | 4 +-
.../apache/joshua/decoder/ff/StatelessFF.java | 4 +-
.../apache/joshua/decoder/ff/TargetBigram.java | 2 +-
.../apache/joshua/decoder/ff/WordPenalty.java | 4 +-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 49 +++++----
.../joshua/decoder/ff/fragmentlm/Tree.java | 54 +++++-----
.../joshua/decoder/ff/fragmentlm/Trees.java | 3 +
.../apache/joshua/decoder/ff/lm/AbstractLM.java | 15 +--
.../ff/lm/DefaultNGramLanguageModel.java | 10 +-
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 16 +--
.../joshua/decoder/ff/lm/LanguageModelFF.java | 21 ++--
.../decoder/ff/lm/NGramLanguageModel.java | 20 ++--
.../ff/lm/StateMinimizingLanguageModel.java | 12 +--
.../BloomFilterLanguageModel.java | 1 +
.../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 6 +-
.../decoder/ff/state_maintenance/DPState.java | 4 +-
.../ff/state_maintenance/KenLMState.java | 4 +-
.../ff/state_maintenance/NgramDPState.java | 4 +-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 16 +--
.../joshua/decoder/ff/tm/BilingualRule.java | 8 +-
.../apache/joshua/decoder/ff/tm/Grammar.java | 25 +++--
.../joshua/decoder/ff/tm/GrammarReader.java | 6 +-
.../joshua/decoder/ff/tm/MonolingualRule.java | 8 +-
.../apache/joshua/decoder/ff/tm/PhraseRule.java | 10 +-
.../org/apache/joshua/decoder/ff/tm/Rule.java | 48 ++++++---
.../joshua/decoder/ff/tm/RuleCollection.java | 10 +-
.../decoder/ff/tm/SentenceFilteredGrammar.java | 17 +--
.../org/apache/joshua/decoder/ff/tm/Trie.java | 18 ++--
.../decoder/ff/tm/format/HieroFormatReader.java | 3 +-
.../ff/tm/format/PhraseFormatReader.java | 12 +--
.../tm/hash_based/MemoryBasedBatchGrammar.java | 10 +-
.../ff/tm/hash_based/MemoryBasedRuleBin.java | 2 +-
.../ff/tm/hash_based/MemoryBasedTrie.java | 2 +-
.../decoder/ff/tm/packed/PackedGrammar.java | 9 +-
.../ff/tm/packed/SliceAggregatingTrie.java | 25 ++---
.../decoder/hypergraph/AlignedSourceTokens.java | 10 +-
.../decoder/hypergraph/AllSpansWalker.java | 7 +-
.../hypergraph/DefaultInsideOutside.java | 2 +-
.../joshua/decoder/hypergraph/HGNode.java | 9 +-
.../joshua/decoder/hypergraph/HyperEdge.java | 4 +-
.../joshua/decoder/hypergraph/HyperGraph.java | 5 +-
.../decoder/hypergraph/HyperGraphPruning.java | 2 +-
.../decoder/hypergraph/KBestExtractor.java | 88 +++++++++-------
.../hypergraph/TrivialInsideOutside.java | 2 +-
.../decoder/hypergraph/ViterbiExtractor.java | 48 ++++++---
.../decoder/hypergraph/WalkerFunction.java | 3 +
.../hypergraph/WordAlignmentExtractor.java | 1 +
.../decoder/hypergraph/WordAlignmentState.java | 3 +
.../apache/joshua/decoder/io/DeNormalize.java | 6 +-
.../decoder/io/TranslationRequestStream.java | 6 +-
.../apache/joshua/decoder/phrase/Candidate.java | 10 +-
.../apache/joshua/decoder/phrase/Coverage.java | 15 +--
.../joshua/decoder/phrase/Hypothesis.java | 6 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 5 +-
.../decoder/segment_file/ConstraintRule.java | 18 ++--
.../decoder/segment_file/ConstraintSpan.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 8 +-
.../joshua/decoder/segment_file/Token.java | 25 +++--
.../java/org/apache/joshua/lattice/Lattice.java | 12 +--
.../joshua/oracle/OracleExtractionHG.java | 2 +-
.../java/org/apache/joshua/oracle/SplitHg.java | 2 +-
.../org/apache/joshua/server/TcpServer.java | 3 -
.../joshua/subsample/AlignedSubsampler.java | 4 +-
.../org/apache/joshua/subsample/Alignment.java | 4 +-
.../org/apache/joshua/subsample/BiCorpus.java | 2 +-
.../joshua/subsample/BiCorpusFactory.java | 2 +-
.../org/apache/joshua/subsample/PhrasePair.java | 2 +-
.../apache/joshua/subsample/PhraseReader.java | 2 +-
.../apache/joshua/subsample/PhraseWriter.java | 2 +-
.../org/apache/joshua/subsample/Subsampler.java | 2 +-
.../apache/joshua/subsample/SubsamplerCLI.java | 2 +-
.../ui/tree_visualizer/browser/Browser.java | 2 +-
.../joshua/ui/tree_visualizer/tree/Tree.java | 2 +-
.../java/org/apache/joshua/util/ChartSpan.java | 10 +-
.../org/apache/joshua/util/ExtractTopCand.java | 10 +-
.../org/apache/joshua/util/FileUtility.java | 6 +-
.../org/apache/joshua/util/FormatUtils.java | 6 +-
.../org/apache/joshua/util/NullIterator.java | 2 +-
src/main/java/org/apache/joshua/util/Regex.java | 2 +-
.../org/apache/joshua/util/SocketUtility.java | 2 +-
.../apache/joshua/util/io/IndexedReader.java | 2 +-
.../org/apache/joshua/util/io/LineReader.java | 4 +-
.../org/apache/joshua/util/io/NullReader.java | 2 +-
.../joshua/util/io/ProgressInputStream.java | 2 +-
.../java/org/apache/joshua/util/io/Reader.java | 2 +-
110 files changed, 675 insertions(+), 532 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
index f65f26f..f7f6be2 100644
--- a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
@@ -12,7 +12,7 @@ import java.util.ArrayList;
/**
* The simplest concrete implementation of Phrase.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class BasicPhrase extends AbstractPhrase {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/corpus/Phrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Phrase.java b/src/main/java/org/apache/joshua/corpus/Phrase.java
index f22c8a5..5a06a8b 100644
--- a/src/main/java/org/apache/joshua/corpus/Phrase.java
+++ b/src/main/java/org/apache/joshua/corpus/Phrase.java
@@ -93,6 +93,9 @@ public interface Phrase extends Comparable<Phrase> {
* complete Phrase List.
*
* @see ArrayList#subList(int, int)
+ * @param start start position to begin new phrase
+ * @param end end position to end new phrase
+ * @return a new {@link org.apache.joshua.corpus.Phrase} object from the indexes provided.
*/
Phrase subPhrase(int start, int end);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/corpus/SymbolTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/SymbolTable.java b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
index d8b1694..b108477 100644
--- a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
+++ b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
@@ -164,8 +164,9 @@ public interface SymbolTable {
*
* @see #getUnknownWordID
* @return the unique integer identifier for wordString,
- * or the result of <code>getUnknownWordID<code>
+ * or the result of <code>getUnknownWordID</code>
* if wordString is not in the vocabulary
+ * @param wordString the word to retrieve the integer identifier
*/
int getID(String wordString);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Vocabulary.java b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
index bd91b31..f89a198 100644
--- a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
+++ b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
@@ -128,7 +128,7 @@ public class Vocabulary implements Externalizable {
* Get the id of the token if it already exists, new id is created otherwise.
*
* TODO: currently locks for every call. Separate constant (frozen) ids from
- * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
+ * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
* Alternatively: could we use ConcurrentHashMap to not have to lock if
* actually contains it and only lock for modifications?
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/BLEU.java b/src/main/java/org/apache/joshua/decoder/BLEU.java
index 43082a8..ec68f13 100644
--- a/src/main/java/org/apache/joshua/decoder/BLEU.java
+++ b/src/main/java/org/apache/joshua/decoder/BLEU.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -35,7 +34,7 @@ import org.apache.joshua.util.Regex;
/**
* this class implements: (1) sentence-level bleu, with smoothing
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class BLEU {
// do_ngram_clip: consider global n-gram clip
@@ -47,11 +46,12 @@ public class BLEU {
// ====================multiple references
/**
*
- * @param refSents
- * @param hypSent
+ * @param refSents todo
+ * @param hypSent todo
* @param doNgramClip Should usually be true
* @param bleuOrder Should usually be 4
* @param useShortestRef Probably use false
+ * @return todo
*/
public static float computeSentenceBleu(String[] refSents, String hypSent, boolean doNgramClip,
int bleuOrder, boolean useShortestRef) {
@@ -92,6 +92,9 @@ public class BLEU {
/**
* words in the ngrams are using integer symbol ID
+ * @param refSents todo
+ * @param bleuOrder todo
+ * @return todo
* */
public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
@@ -111,6 +114,8 @@ public class BLEU {
/**
* compute max_ref_count for each ngram in the reference sentences
+ * @param listRefNgramTbl todo
+ * @return todo
* */
public static HashMap<String, Integer> computeMaxRefCountTbl(
List<HashMap<String, Integer>> listRefNgramTbl) {
@@ -195,10 +200,7 @@ public class BLEU {
numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
refNgramTbl.get(ngram), entry.getValue()); // ngram clip
} else {
- numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without
- // ngram
- // count
- // clipping
+ numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without ngram count clipping
}
}
}
@@ -257,6 +259,11 @@ public class BLEU {
/**
* speed consideration: assume hypNgramTable has a smaller size than referenceNgramTable does
+ * @param linearCorpusGainThetas todo
+ * @param hypLength todo
+ * @param hypNgramTable todo
+ * @param referenceNgramTable todo
+ * @return todo
*/
public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, int hypLength,
Map<String, Integer> hypNgramTable, Map<String, Integer> referenceNgramTable) {
@@ -332,8 +339,10 @@ public class BLEU {
return res;
}
+ public static final int maxOrder = 4;
+
/**
- * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n <= 4) for terminal rules
+ * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n <= 4) for terminal rules
* and (b) all ngrams overlying boundary points between terminals in the rule and ngram state from
* tail nodes.
*
@@ -347,13 +356,11 @@ public class BLEU {
*
* Of these, all but the first have a boundary point to consider.
*
- * @param rule the rule being applied
- * @param spanWidth the width of the span in the input sentence
+ * @param edge todo
+ * @param spanPct todo
* @param references the reference to compute statistics against
- * @return
+ * @return todo
*/
- public static final int maxOrder = 4;
-
public static Stats compute(HyperEdge edge, float spanPct, References references) {
Stats stats = new Stats();
// TODO: this should not be the span width, but the real ref scaled to the span percentage
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 43f845c..29ccce6 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -28,7 +28,6 @@ import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -80,10 +79,10 @@ import org.apache.joshua.util.io.LineReader;
* Translations object). Translations itself is an iterator whose next() call blocks until the next
* translation is available.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
*/
public class Decoder {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
index 4390a59..c5835fd 100644
--- a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@ -44,8 +44,8 @@ import org.apache.joshua.corpus.Vocabulary;
*
* The DecoderFactory class is responsible for launching the threads.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class DecoderThread extends Thread {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
index bf8cfb3..f498a56 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@ -43,8 +43,8 @@ import org.apache.joshua.util.io.LineReader;
* When adding new features to Joshua, any new configurable parameters should be added to this
* class.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class JoshuaConfiguration {
@@ -325,6 +325,8 @@ public class JoshuaConfiguration {
* To process command-line options, we write them to a file that looks like the config file, and
* then call readConfigFile() on it. It would be more general to define a class that sits on a
* stream and knows how to chop it up, but this was quicker to implement.
+ *
+ * @param options string array of command line options
*/
public void processCommandLineOptions(String[] options) {
try {
@@ -701,8 +703,13 @@ public class JoshuaConfiguration {
* equivalence classes on external use of parameter names, permitting arbitrary_under_scores and
* camelCasing in paramter names without forcing the user to memorize them all. Here are some
* examples of equivalent ways to refer to parameter names:
- *
+ * <pre>
* {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
+ * </pre>
+ *
+ * @param text the string to be normalized
+ * @return normalized key
+ *
*/
public static String normalize_key(String text) {
return text.replaceAll("[-_]", "").toLowerCase();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index 8c0b10b..42b17d7 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -39,9 +39,9 @@ import org.apache.joshua.server.ServerThread;
* Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
* and <code>DecoderThread</code>.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
*/
public class JoshuaDecoder {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
index b2126cb..e2061b0 100644
--- a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
+++ b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
@@ -42,7 +42,7 @@ import org.apache.joshua.util.Regex;
* uses a Viterbi approximation: the probability of a string is its best derivation probability So,
* if one want to deal with spurious ambiguity, he/she should do that before calling this class
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class NbestMinRiskReranker {
@@ -182,7 +182,10 @@ public class NbestMinRiskReranker {
/**
* based on a list of log-probabilities in nbestLogProbs, obtain a normalized distribution, and
* put the normalized probability (real value in [0,1]) into nbestLogProbs
- * */
+ *
+ * @param nbestLogProbs a {@link java.util.List} of {@link java.lang.Double} representing nbestLogProbs
+ * @param scalingFactor double value representing scaling factor
+ */
// get a normalized distributeion and put it back to nbestLogProbs
static public void computeNormalizedProbs(List<Double> nbestLogProbs, double scalingFactor) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 75e6ab4..bdfde54 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -33,10 +33,9 @@ import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * structuredTranslation provides a more structured access to translation
+ * <p>structuredTranslation provides a more structured access to translation
* results than the Translation class.
- * Members of instances of this class can be used upstream.
- * <br/>
+ * Members of instances of this class can be used upstream.</p>
* TODO:
* Enable K-Best extraction.
*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/Support.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Support.java b/src/main/java/org/apache/joshua/decoder/Support.java
index 7c4a0b2..22bfe89 100644
--- a/src/main/java/org/apache/joshua/decoder/Support.java
+++ b/src/main/java/org/apache/joshua/decoder/Support.java
@@ -21,7 +21,7 @@ package org.apache.joshua.decoder;
import java.util.List;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class Support {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index 10852e5..0b632ba 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -41,7 +41,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* sentence and id and contains the decoded hypergraph. Translation objects are returned by
* DecoderThread instances to the InputHandler, where they are assembled in order for output.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Translation {
@@ -54,17 +54,17 @@ public class Translation {
private String output = null;
private StructuredTranslation structuredTranslation = null;
-
+
public Translation(Sentence source, HyperGraph hypergraph,
List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
this.source = source;
-
+
if (joshuaConfiguration.use_structured_output) {
-
+
structuredTranslation = new StructuredTranslation(
source, hypergraph, featureFunctions);
this.output = structuredTranslation.getTranslationString();
-
+
} else {
StringWriter sw = new StringWriter();
@@ -81,15 +81,15 @@ public class Translation {
// We must put this weight as zero, otherwise we get an error when we try to retrieve it
// without checking
Decoder.weights.increment("BLEU", 0);
-
+
if (joshuaConfiguration.topN == 0) {
-
+
/* construct Viterbi output */
final String best = getViterbiString(hypergraph);
-
+
Decoder.LOG(1, String.format("Translation %d: %.3f %s", source.id(), hypergraph.goalNode.getScore(),
best));
-
+
/*
* Setting topN to 0 turns off k-best extraction, in which case we need to parse through
* the output-string, with the understanding that we can only substitute variables for the
@@ -100,21 +100,21 @@ public class Translation {
.replace("%S", DeNormalize.processSingleLine(best))
.replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
.replace("%i", String.format("%d", source.id()));
-
+
if (joshuaConfiguration.outputFormat.contains("%a")) {
translation = translation.replace("%a", getViterbiWordAlignments(hypergraph));
}
-
+
if (joshuaConfiguration.outputFormat.contains("%f")) {
final FeatureVector features = getViterbiFeatures(hypergraph, featureFunctions, source);
translation = translation.replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString());
}
-
+
out.write(translation);
out.newLine();
-
+
} else {
-
+
final KBestExtractor kBestExtractor = new KBestExtractor(
source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
@@ -132,32 +132,32 @@ public class Translation {
Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
joshuaConfiguration.topN, seconds));
- } else {
-
- // Failed translations and blank lines get empty formatted outputs
- // @formatter:off
- String outputString = joshuaConfiguration.outputFormat
- .replace("%s", source.source())
- .replace("%e", "")
- .replace("%S", "")
- .replace("%t", "()")
- .replace("%i", Integer.toString(source.id()))
- .replace("%f", "")
- .replace("%c", "0.000");
- // @formatter:on
-
- out.write(outputString);
- out.newLine();
- }
+ } else {
+
+ // Failed translations and blank lines get empty formatted outputs
+ // @formatter:off
+ String outputString = joshuaConfiguration.outputFormat
+ .replace("%s", source.source())
+ .replace("%e", "")
+ .replace("%S", "")
+ .replace("%t", "()")
+ .replace("%i", Integer.toString(source.id()))
+ .replace("%f", "")
+ .replace("%c", "0.000");
+ // @formatter:on
+
+ out.write(outputString);
+ out.newLine();
+ }
out.flush();
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
-
+
this.output = sw.toString();
-
+
}
/*
@@ -170,7 +170,7 @@ public class Translation {
break;
}
}
-
+
}
public Sentence getSourceSentence() {
@@ -185,12 +185,12 @@ public class Translation {
public String toString() {
return output;
}
-
+
/**
* Returns the StructuredTranslation object
* if JoshuaConfiguration.construct_structured_output == True.
* @throws RuntimeException if StructuredTranslation object not set.
- * @return
+ * @return {@link org.apache.joshua.decoder.StructuredTranslation} object
*/
public StructuredTranslation getStructuredTranslation() {
if (structuredTranslation == null) {
@@ -198,5 +198,5 @@ public class Translation {
}
return structuredTranslation;
}
-
+
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/Translations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translations.java b/src/main/java/org/apache/joshua/decoder/Translations.java
index 7dd9086..0b91ff9 100644
--- a/src/main/java/org/apache/joshua/decoder/Translations.java
+++ b/src/main/java/org/apache/joshua/decoder/Translations.java
@@ -30,7 +30,7 @@ import org.apache.joshua.decoder.io.TranslationRequestStream;
* Translation in the right place. When the next translation in a sequence is available, next() is
* notified.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Translations {
@@ -73,7 +73,7 @@ public class Translations {
* the ID of the translation is the same as the one being waited for (currentID). If so, the
* thread waiting for it is notified.
*
- * @param translation
+ * @param translation a translated input object
*/
public void record(Translation translation) {
synchronized (this) {
@@ -98,6 +98,8 @@ public class Translations {
/**
* Returns the next Translation, blocking if necessary until it's available, since the next
* Translation might not have been produced yet.
+ *
+ * @return first element from the list of {@link org.apache.joshua.decoder.Translation}'s
*/
public Translation next() {
synchronized (this) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 0825ccb..0bc2f9f 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -60,8 +60,8 @@ import org.apache.joshua.util.ChartSpan;
* index of sentences: start from zero index of cell: cell (i,j) represent span
* of words indexed [i,j-1] where i is in [0,n-1] and j is in [1,n]
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class Chart {
@@ -728,7 +728,7 @@ public class Chart {
}
/***
- * Add a terminal production (X -> english phrase) to the hypergraph.
+ * Add a terminal production (X -> english phrase) to the hypergraph.
*
* @param i the start index
* @param j stop index
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index eeb6366..ce0b190 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -35,8 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
/**
* This class computes the cost of applying a rule.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class ComputeNodeResult {
@@ -130,7 +130,8 @@ public class ComputeNodeResult {
}
/**
- * This is called from Cell.java when making the final transition to the goal state.
+ * This is called from {@link org.apache.joshua.decoder.chart_parser.Cell}
+ * when making the final transition to the goal state.
* This is done to allow feature functions to correct for partial estimates, since
* they now have the knowledge that the whole sentence is complete. Basically, this
* is only used by LanguageModelFF, which does not score partial n-grams, and therefore
@@ -140,6 +141,14 @@ public class ComputeNodeResult {
* too: it makes search better (more accurate at the beginning, for example), and would
* also do away with the need for the computeFinal* class of functions (and hooks in
* the feature function interface).
+ *
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source lattice
+ * @param sentence the lattice input
+ * @return the final cost for the Node
*/
public static float computeFinalCost(List<FeatureFunction> featureFunctions,
List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence) {
@@ -176,6 +185,7 @@ public class ComputeNodeResult {
/**
* The complete cost of the Viterbi derivation at this point
+ * @return float representing cost
*/
public float getViterbiCost() {
return this.viterbiCost;
@@ -188,7 +198,7 @@ public class ComputeNodeResult {
/**
* The cost incurred by this edge alone
*
- * @return
+ * @return float representing cost
*/
public float getTransitionCost() {
return this.transitionCost;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
index 38e9f4a..06a14ee 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
@@ -31,7 +31,7 @@ import org.apache.joshua.decoder.segment_file.ConstraintRule;
import org.apache.joshua.decoder.segment_file.ConstraintSpan;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class ManualConstraintsHandler {
@@ -141,6 +141,11 @@ public class ManualConstraintsHandler {
/**
* if there are any LHS or RHS constraints for a span, then all the applicable grammar rules in
* that span will have to pass the filter.
+ *
+ * @param i LHS of span, used for genrating the span signature
+ * @param j RHS of span, used for genrating the span signature
+ * @param rulesIn {@link java.util.List} of {@link org.apache.joshua.decoder.ff.tm.Rule}'s
+ * @return filtered {@link java.util.List} of {@link org.apache.joshua.decoder.ff.tm.Rule}'s
*/
public List<Rule> filterRules(int i, int j, List<Rule> rulesIn) {
if (null == this.constraintSpansForFiltering) return rulesIn;
@@ -165,6 +170,9 @@ public class ManualConstraintsHandler {
/**
* should we filter out the gRule based on the manually provided constraint cRule
+ * @param cRule constraint rule
+ * @param gRule rule which may be filtered
+ * @return true if this gRule should survive
*/
public boolean shouldSurvive(ConstraintRule cRule, Rule gRule) {
@@ -189,6 +197,9 @@ public class ManualConstraintsHandler {
/**
* if a span is *within* the coverage of a *hard* rule constraint, then this span will be only
* allowed to use the mannual rules
+ * @param startSpan beginning node (int) for span
+ * @param endSpan end node (int) for span
+ * @return true if this span containers a rule constraint
*/
public boolean containHardRuleConstraint(int startSpan, int endSpan) {
if (null != this.spansWithHardRuleConstraint) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
index 3fba257..1d96149 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
@@ -24,7 +24,7 @@ import org.apache.joshua.lattice.Arc;
/**
* This class represents information about a path taken through the source lattice.
*
- * @note This implementation only tracks the source path cost which is assumed to be a scalar value.
+ * <p>This implementation only tracks the source path cost which is assumed to be a scalar value.
* If you need multiple values, or want to recover more detailed path statistics, you'll need
* to update this code.
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
index 7cd263d..d21ceca 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
* original motivation was to be used as a means of doing forced decoding, which is accomplished by
* forcing all n-gram states that are created to match the target string.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
public class StateConstraint {
@@ -43,7 +43,7 @@ public class StateConstraint {
* Determines if all of the states passed in are legal in light of the input that was passed
* earlier. Currently only defined for n-gram states.
*
- * @param dpStates
+ * @param dpStates {@link java.util.Collection} of {@link org.apache.joshua.decoder.ff.state_maintenance.DPState}'s
* @return whether the states are legal in light of the target side sentence
*/
public boolean isLegal(Collection<DPState> dpStates) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
index 25f363d..d4f9534 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -33,8 +33,8 @@ import org.apache.joshua.corpus.Vocabulary;
* arity within a specific range. It expects three parameters upon initialization: the owner, the
* minimum arity, and the maximum arity.
*
- * @author Matt Post <post@cs.jhu.edu
- * @author Zhifei Li <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
*/
public class ArityPhrasePenalty extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index c6112e5..e5f0baa 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -32,26 +32,27 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * This class defines Joshua's feature function interface, for both sparse and
+ * <p>This class defines Joshua's feature function interface, for both sparse and
* dense features. It is immediately inherited by StatelessFF and StatefulFF,
* which provide functionality common to stateless and stateful features,
* respectively. Any feature implementation should extend those classes, and not
* this one. The distinction between stateless and stateful features is somewhat
* narrow: all features have the opportunity to return an instance of a
- * {@link DPState} object, and stateless ones just return null.
+ * {@link DPState} object, and stateless ones just return null.</p>
*
- * Features in Joshua work like templates. Each feature function defines any
+ * <p>Features in Joshua work like templates. Each feature function defines any
* number of actual features, which are associated with weights. The task of the
* feature function is to compute the features that are fired in different
* circumstances and then return the inner product of those features with the
* weight vector. Feature functions can also produce estimates of their future
- * cost (via {@link estimateCost()}); these values are not used in computing the
+ * cost (via {@link org.apache.joshua.decoder.ff.FeatureFunction#estimateCost(Rule, Sentence)});
+ * these values are not used in computing the
* score, but are only used for sorting rules during cube pruning. The
* individual features produced by each template should have globally unique
* names; a good convention is to prefix each feature with the name of the
- * template that produced it.
+ * template that produced it.</p>
*
- * Joshua does not retain individual feature values while decoding, since this
+ * <p>Joshua does not retain individual feature values while decoding, since this
* requires keeping a sparse feature vector along every hyperedge, which can be
* expensive. Instead, it computes only the weighted cost of each edge. If the
* individual feature values are requested, the feature functions are replayed
@@ -59,10 +60,10 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* a generic way by passing an {@link Accumulator} object to the compute()
* function. During decoding, the accumulator simply sums weighted features in a
* scalar. During k-best extraction, when individual feature values are needed,
- * a {@link FeatureAccumulator} is used to retain the individual values.
+ * a {@link FeatureAccumulator} is used to retain the individual values.</p>
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class FeatureFunction {
@@ -135,22 +136,23 @@ public abstract class FeatureFunction {
/**
* This is the main function for defining feature values. The implementor
- * should compute all the features along the hyperedge, calling acc.put(name,
- * value) for each feature. It then returns the newly-computed dynamic
+ * should compute all the features along the hyperedge, calling
+ * {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator#add(String, float)}
+ * for each feature. It then returns the newly-computed dynamic
* programming state for this feature (for example, for the
- * {@link LanguageModelFF} feature, this returns the new language model
+ * {@link org.apache.joshua.decoder.ff.lm.LanguageModelFF} feature, this returns the new language model
* context). For stateless features, this value is null.
*
* Note that the accumulator accumulates *unweighted* feature values. The
* feature vector is multiplied times the weight vector later on.
*
- * @param rule
- * @param tailNodes
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @param acc
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
* @return the new dynamic programming state (null for stateless features)
*/
public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
@@ -160,12 +162,12 @@ public abstract class FeatureFunction {
* Feature functions must overrided this. StatefulFF and StatelessFF provide
* reasonable defaults since most features do not fire on the goal node.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @param acc
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
* @return the DPState (null if none)
*/
public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
@@ -181,12 +183,12 @@ public abstract class FeatureFunction {
* incorporate the feature weights. This function is used in the kbest
* extraction code but could also be used in computing the cost.
*
- * @param rule
- * @param tailNodes
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return an *unweighted* feature delta
*/
public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
@@ -203,11 +205,11 @@ public abstract class FeatureFunction {
* return the *weighted* cost of applying the feature. Provided for backward
* compatibility.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return a *weighted* feature cost
*/
public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
@@ -222,12 +224,12 @@ public abstract class FeatureFunction {
* Returns the *unweighted* feature delta for the final transition (e.g., for
* the language model feature function). Provided for backward compatibility.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @return
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return an *weighted* feature vector
*/
public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
SourcePath sourcePath, Sentence sentence) {
@@ -247,6 +249,8 @@ public abstract class FeatureFunction {
* sorting. Later, the real cost of this feature function is called via
* compute();
*
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return the *weighted* cost of applying the feature.
*/
public abstract float estimateCost(Rule rule, Sentence sentence);
@@ -257,9 +261,9 @@ public abstract class FeatureFunction {
* score but is used in pruning decisions. Stateless features return 0.0f by
* default, but Stateful features might want to override this.
*
- * @param rule
- * @param state
- * @param sentence
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param state todo
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return the *weighted* future cost estimate of applying this rule in
* context.
*/
@@ -271,7 +275,7 @@ public abstract class FeatureFunction {
* Any key without a value is added with an empty string as value Multiple values for the same key
* are not parsed. The first one is used.
*
- * @param rawArgs A string with the raw arguments and their names
+ * @param args A string with the raw arguments and their names
* @return A hash with the keys and the values of the string
*/
public static HashMap<String, String> parseArgs(String[] args) {
@@ -306,7 +310,11 @@ public abstract class FeatureFunction {
/**
* It is used when initializing translation grammars (for
* pruning purpose, and to get stateless logP for each rule).
- * This is also required to sort the rules (required by Cube-pruning).
+ * This is also required to sort the rules (required by Cube-pruning).
+ *
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param sentID associated ID
+ * @return double value representing LogP
*/
public abstract double estimateLogP(Rule rule, int sentID);
@@ -318,7 +326,6 @@ public abstract class FeatureFunction {
* sum (for decoding). FeatureAccumulator records the named feature values
* (for k-best extraction).
*/
-
public interface Accumulator {
public void add(String name, float value);
public void add(int id, float value);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
index 65ed077..778997e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -35,9 +35,11 @@ import java.util.Set;
* queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
* returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
* feature array. Therefore, when a feature registers a dense feature, it should take care to
- * query either {@link getDense()} or {@link getSparse} when asking for the feature values later on.
+ * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
+ * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
+ * values later on.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class FeatureVector {
@@ -75,8 +77,8 @@ public class FeatureVector {
* **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
* of confusion. They have to be inverted here and when the score is actually computed. They
* are inverted here (which is used to build the feature vector representation of a rule's dense
- * features) and in {@link BilingualRule::estimateRuleCost()}, where the rule's precomputable
- * (weighted) score is cached.
+ * features) and in {@link org.apache.joshua.decoder.ff.tm.BilingualRule#estimateRuleCost(java.util.List)}
+ * , where the rule's precomputable (weighted) score is cached.
*
* @param featureString, the string of labeled and unlabeled features (probably straight from the
* grammar text file)
@@ -138,8 +140,7 @@ public class FeatureVector {
* can infer them all). This *must* be called by every feature function wishing to register
* dense features!
*
- * @param names
- * @return
+ * @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
for (FeatureFunction feature: featureFunctions) {
@@ -181,6 +182,8 @@ public class FeatureVector {
* Subtracts the weights in the other feature vector from this one. Note that this is not set
* subtraction; keys found in the other FeatureVector but not in this one will be initialized with
* a value of 0.0f before subtraction.
+ *
+ * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
*/
public void subtract(FeatureVector other) {
for (int i = 0; i < denseFeatures.size(); i++)
@@ -195,6 +198,8 @@ public class FeatureVector {
/**
* Adds the weights in the other feature vector to this one. This is set union, with values shared
* between the two being summed.
+ *
+ * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
*/
public void add(FeatureVector other) {
while (denseFeatures.size() < other.denseFeatures.size())
@@ -214,6 +219,8 @@ public class FeatureVector {
/**
* Return the weight of a feature by name, after checking to determine if it is sparse or dense.
*
+ * @param feature String name of some feature
+ * @return the feature's weight
*/
public float getWeight(String feature) {
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -227,7 +234,7 @@ public class FeatureVector {
/**
* Return the weight of a sparse feature, indexed by its name.
*
- * @param feature
+ * @param feature String name of some feature
* @return the sparse feature's weight, or 0 if not found.
*/
public float getSparse(String feature) {
@@ -244,7 +251,7 @@ public class FeatureVector {
* Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
* is not found. In other words, this is a safe way to query the dense feature vector.
*
- * @param id
+ * @param id int representing of some dense feature
* @return the dense feature's value, or 0 if not found.
*/
public float getDense(int id) {
@@ -267,8 +274,8 @@ public class FeatureVector {
* Set the value of a feature. We need to first determine whether the feature is a dense or
* sparse one, then set accordingly.
*
- * @param feature
- * @param value
+ * @param feature String name of some feature
+ * @param value float value to set to the featue with the associated name
*/
public void set(String feature, float value) {
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -293,6 +300,9 @@ public class FeatureVector {
/**
* Computes the inner product between this feature vector and another one.
+ *
+ * @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
+ * @return float value representing the computation
*/
public float innerProduct(FeatureVector other) {
float cost = 0.0f;
@@ -313,6 +323,8 @@ public class FeatureVector {
/***
* Moses distinguishes sparse features as those containing an underscore, so we have to fake it
* to be compatible with their tuners.
+ *
+ * @return trimmed Moses output string
*/
public String mosesString() {
StringBuilder outputString = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 0d0e0f7..69584dd 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -39,7 +39,7 @@ import org.apache.joshua.decoder.chart_parser.SourcePath;
* "mark-oovs") . These rules are all stored in a grammar whose owner is "oov". The OOV feature
* function template then fires the "OOVPenalty" feature whenever it is asked to score an OOV rule.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class OOVPenalty extends StatelessFF {
private int ownerID = -1;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index 62792dc..3eb0c2e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -37,8 +37,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* queries the weights for the set of features that are active for this grammar, storing them in an
* array.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
*/
public class PhraseModel extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index d757303..d529559 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -32,8 +32,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* This feature returns the scored path through the source lattice, which is recorded in a
* SourcePath object.
*
- * @author Chris Dyer <re...@umd.edu>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Chris Dyer redpony@umd.edu
+ * @author Matt Post post@cs.jhu.edu
*/
public final class SourcePathFF extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
index 626eb3c..4678902 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
@@ -35,8 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* state-contributing objects in each HGNode. State can no longer be shared among different feature
* functions.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class StatefulFF extends FeatureFunction {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
index 19f7050..e473c37 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -31,8 +31,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* Stateless feature functions do not contribute any state. You need not implement this class to
* create a stateless feature function, but it provides a few convenience functions.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class StatelessFF extends FeatureFunction {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index 5661ce7..ee0f9d5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -180,7 +180,7 @@ public class TargetBigram extends StatefulFF {
}
/**
- * There is nothing to be done here, since <s> and </s> are included in rules that are part
+ * There is nothing to be done here, since <s> and </s> are included in rules that are part
* of the grammar. We simply return the DP state of the tail node.
*/
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index 2a40088..62c889f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -31,8 +31,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
/**
*
- * @author Zhifei Li <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public final class WordPenalty extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index e438778..cec24f2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -38,33 +38,33 @@ import org.apache.joshua.decoder.hypergraph.HyperEdge;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * Feature function that reads in a list of language model fragments and matches them against the
+ * <p>Feature function that reads in a list of language model fragments and matches them against the
* hypergraph. This allows for language model fragment "glue" features, which fire when LM fragments
* (supplied as input) are assembled. These LM fragments are presumably useful in ensuring
- * grammaticality and can be independent of the translation model fragments.
+ * grammaticality and can be independent of the translation model fragments.</p>
*
- * Usage: in the Joshua Configuration file, put
+ * <p>Usage: in the Joshua Configuration file, put</p>
*
- * feature-function = FragmentLM -lm LM_FRAGMENTS_FILE -map RULE_FRAGMENTS_MAP_FILE
+ * <code>feature-function = FragmentLM -lm LM_FRAGMENTS_FILE -map RULE_FRAGMENTS_MAP_FILE</code>
*
- * LM_FRAGMENTS_FILE is a pointer to a file containing a list of fragments that it should look for.
- * The format of the file is one fragment per line in PTB format, e.g.:
+ * <p>LM_FRAGMENTS_FILE is a pointer to a file containing a list of fragments that it should look for.
+ * The format of the file is one fragment per line in PTB format, e.g.:</p>
*
- * (S NP (VP (VBD said) SBAR) (. .))
+ * <code>(S NP (VP (VBD said) SBAR) (. .))</code>
*
- * RULE_FRAGMENTS_MAP_FILE points to a file that maps fragments to the flattened SCFG rule format
+ * <p>RULE_FRAGMENTS_MAP_FILE points to a file that maps fragments to the flattened SCFG rule format
* that Joshua uses. This mapping is necessary because Joshua's rules have been flattened, meaning
* that their internal structure has been removed, yet this structure is needed for matching LM
- * fragments. The format of the file is
+ * fragments. The format of the file is</p>
*
- * FRAGMENT ||| RULE-TARGET-SIDE
+ * <code>FRAGMENT ||| RULE-TARGET-SIDE</code>
*
- * for example,
+ * <p>for example,</p>
*
- * (S (NP (DT the) (NN man)) VP .) ||| the man [VP,1] [.,2] (SBAR (IN that) (S (NP (PRP he)) (VP
- * (VBD was) (VB done)))) ||| that he was done (VP (VBD said) SBAR) ||| said SBAR
+ * <code>(S (NP (DT the) (NN man)) VP .) ||| the man [VP,1] [.,2] (SBAR (IN that) (S (NP (PRP he)) (VP
+ * (VBD was) (VB done)))) ||| that he was done (VP (VBD said) SBAR) ||| said SBAR</code>
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class FragmentLMFF extends StatefulFF {
@@ -104,9 +104,9 @@ public class FragmentLMFF extends StatefulFF {
private String fragmentLMFile = "";
/**
- * @param weights
- * @param name
- * @param stateComputer
+ * @param weights a {@link org.apache.joshua.decoder.ff.FeatureVector} with weights
+ * @param args arguments passed to the feature function
+ * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public FragmentLMFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "FragmentLMFF", args, config);
@@ -139,7 +139,7 @@ public class FragmentLMFF extends StatefulFF {
/**
* Add the provided fragment to the language model, subject to some filtering.
*
- * @param fragment
+ * @param fragment a {@link org.apache.joshua.decoder.ff.fragmentlm.Tree} fragment
*/
public void addLMFragment(Tree fragment) {
if (lmFragments == null)
@@ -170,6 +170,15 @@ public class FragmentLMFF extends StatefulFF {
* that fire are any LM fragments that match the fragment associated with the current rule. LM
* fragments may recurse over the tail nodes, following 1-best backpointers until the fragment
* either matches or fails.
+ *
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+ * @return the new dynamic programming state (null for stateless features)
*/
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
@@ -321,8 +330,8 @@ public class FragmentLMFF extends StatefulFF {
/**
* Maintains a state pointer used by KenLM to implement left-state minimization.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public class FragmentState extends DPState {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
index 6214560..22fc160 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
@@ -38,7 +38,7 @@ import org.apache.joshua.util.io.LineReader;
* enclosed in double-quotes when read in.
*
* @author Dan Klein
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Tree implements Serializable {
@@ -111,7 +111,7 @@ public class Tree implements Serializable {
/**
* Computes the depth-one rule rooted at this node. If the node has no children, null is returned.
*
- * @return
+ * @return string representation of the rule
*/
public String getRule() {
if (isLeaf()) {
@@ -237,6 +237,8 @@ public class Tree implements Serializable {
* A tree is lexicalized if it has terminal nodes among the leaves of its frontier. For normal
* trees this is always true since they bottom out in terminals, but for fragments, this may or
* may not be true.
+ *
+ * @return true if the tree is lexicalized
*/
public boolean isLexicalized() {
if (this.numLexicalItems < 0) {
@@ -313,7 +315,7 @@ public class Tree implements Serializable {
* Removes the quotes around terminals. Note that the resulting tree could not be read back
* in by this class, since unquoted leaves are interpreted as nonterminals.
*
- * @return
+ * @return unquoted string
*/
public String unquotedString() {
return toString().replaceAll("\"", "");
@@ -450,8 +452,8 @@ public class Tree implements Serializable {
* models. The arguments have to be passed in to preserve Java generics, even though this is only
* ever used with String versions.
*
- * @param sos presumably "<s>"
- * @param eos presumably "</s>"
+ * @param sos presumably "<s>"
+ * @param eos presumably "</s>"
*/
public void insertSentenceMarkers(String sos, String eos) {
insertSentenceMarker(sos, 0);
@@ -465,8 +467,8 @@ public class Tree implements Serializable {
/**
*
- * @param symbol
- * @param pos
+ * @param symbol the marker to insert
+ * @param pos the position at which to insert
*/
private void insertSentenceMarker(String symbol, int pos) {
@@ -487,6 +489,9 @@ public class Tree implements Serializable {
/**
* This is a convenience function for producing a fragment from its string representation.
+ *
+ * @param ptbStr input string from which to produce a fragment
+ * @return the fragment
*/
public static Tree fromString(String ptbStr) {
PennTreeReader reader = new PennTreeReader(new StringReader(ptbStr));
@@ -531,14 +536,13 @@ public class Tree implements Serializable {
* recursively visit the derivation state objects, following the route through the hypergraph
* defined by them.
*
- * This function is like the other buildTree() function, but that one simply follows the best
- * incoming hyperedge for each node.
+ * This function is like {@link org.apache.joshua.decoder.ff.fragmentlm.Tree#buildTree(DerivationState, int)},
+ * but that one simply follows the best incoming hyperedge for each node.
*
- * @param rule
- * @param tailNodes
- * @param derivation - should not be null
- * @param maxDepth
- * @return
+ * @param rule for which corresponding internal fragment can be used to initialize the tree
+ * @param derivationStates array of state objects
+ * @param maxDepth of route through the hypergraph
+ * @return the Tree
*/
public static Tree buildTree(Rule rule, DerivationState[] derivationStates, int maxDepth) {
Tree tree = getFragmentFromYield(rule.getEnglishWords());
@@ -603,19 +607,14 @@ public class Tree implements Serializable {
}
/**
- * Builds a tree from the kth-best derivation state. This is done by initializing the tree with
+ * <p>Builds a tree from the kth-best derivation state. This is done by initializing the tree with
* the internal fragment corresponding to the rule; this will be the top of the tree. We then
* recursively visit the derivation state objects, following the route through the hypergraph
- * defined by them.
- *
- * This function is like the other buildTree() function, but that one simply follows the best
- * incoming hyperedge for each node.
+ * defined by them.</p>
*
- * @param rule
- * @param tailNodes
- * @param derivation
- * @param maxDepth
- * @return
+ * @param derivationState array of state objects
+ * @param maxDepth of route through the hypergraph
+ * @return the Tree
*/
public static Tree buildTree(DerivationState derivationState, int maxDepth) {
Rule rule = derivationState.edge.getRule();
@@ -676,9 +675,10 @@ public class Tree implements Serializable {
* This could be implemented by using the other buildTree() function and using the 1-best
* DerivationState.
*
- * @param rule
- * @param tailNodes
- * @return
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be used whilst building the tree
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+ * @param maxDepth to go in the tree
+ * @return shallow clone of the Tree object
*/
public static Tree buildTree(Rule rule, List<HGNode> tailNodes, int maxDepth) {
Tree tree = getFragmentFromYield(rule.getEnglishWords());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
index 439ba96..d06388c 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
@@ -187,6 +187,9 @@ public class Trees {
* preterminals onto one line of tags and words. Additional complexities are that conjunctions
* (tag CC) are not collapsed in this way, and that the unlabeled outer brackets are collapsed
* onto the same line as the next bracket down.
+ *
+ * @param tree you wish to render and print
+ * @return a rendered String representation of the tree
*/
public static String render(Tree tree) {
StringBuilder sb = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
index 79560fd..e8225dc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
@@ -18,11 +18,7 @@
*/
package org.apache.joshua.decoder.ff.lm;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Support;
-import org.apache.joshua.corpus.SymbolTable;
-
-
import java.util.List;
/**
@@ -31,7 +27,7 @@ import java.util.List;
* methods are declared final, in an attempt to limit what subclasses
* may be defined.
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate: 2009-12-30 10:10:38 -0600 (Wed, 30 Dec 2009) $
*/
public abstract class AbstractLM extends DefaultNGramLanguageModel {
@@ -40,7 +36,7 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
super(symbolTable, order);
}
-
+ @SuppressWarnings("null")
public final double sentenceLogProbability(
List<Integer> sentence, int order, int startIndex
) {
@@ -48,12 +44,10 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
return (Double) null;
}
-
public final float ngramLogProbability(int[] ngram) {
return super.ngramLogProbability(ngram);
}
-
public final float ngramLogProbability(int[] ngram, int order) {
if (ngram.length > order) {
throw new RuntimeException("ngram length is greather than the max order");
@@ -77,11 +71,6 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
protected abstract float ngramLogProbability_helper(int[] ngram, int order);
-
- /**
- * @deprecated this function is much slower than the int[]
- * version
- */
@Deprecated
public final double logProbOfBackoffState(List<Integer> ngram, int order, int qtyAdditionalBackoffWeight) {
return logProbabilityOfBackoffState(
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
index 4ff8f59..759479f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
@@ -27,12 +27,12 @@ import org.apache.joshua.corpus.Vocabulary;
/**
* This class provides a default implementation for the Equivalent LM State optimization (namely,
* don't back off anywhere). It also provides some default implementations for more general
- * functions on the interface to fall back to more specific ones (e.g. from ArrayList<Integer> to
- * int[]) and a default implementation for sentenceLogProbability which enumerates the n-grams and
- * calls calls ngramLogProbability for each of them.
+ * functions on the interface to fall back to more specific ones (e.g. from {@link java.util.ArrayList}
+ * of {@link java.lang.Integer}'s to int[]) and a default implementation for sentenceLogProbability
+ * which enumerates the n-grams and calls calls ngramLogProbability for each of them.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
*/
public abstract class DefaultNGramLanguageModel implements NGramLanguageModel {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index 2c43712..2864e00 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
* state by itself and just passes in the ngrams for scoring.
*
* @author Kenneth Heafield
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
@@ -115,6 +115,8 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* Query for n-gram probability using strings.
+ * @param words a string array of words
+ * @return float value denoting probability
*/
public float prob(String[] words) {
return probForString(pointer, words);
@@ -127,14 +129,15 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
- * whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
+ * whole rule. It takes an array of words and states retrieved from tail nodes (nonterminals in the
* rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
* needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
* state and the LM probability incurred along this rule.
*
- * @param words
- * @param sentId
- * @return
+ * @param words array of words
+ * @param poolPointer todo
+ * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
+ * KenLM state and the LM probability incurred along this rule
*/
public StateProbPair probRule(long[] words, long poolPointer) {
@@ -153,7 +156,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
* Public facing function that estimates the cost of a rule, which value is used for sorting
* rules during cube pruning.
*
- * @param words
+ * @param words array of words
* @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
*/
public float estimateRule(long[] words) {
@@ -170,6 +173,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* The start symbol for a KenLM is the Vocabulary.START_SYM.
+ * @return "<s>"
*/
public String getStartSymbol() {
return Vocabulary.START_SYM;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index f2daffd..ad05a37 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -45,14 +45,14 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* This class performs the following:
* <ol>
* <li>Gets the additional LM score due to combinations of small items into larger ones by using
- * rules
- * <li>Gets the LM state
- * <li>Gets the left-side LM state estimation score
+ * rules</li>
+ * <li>Gets the LM state</li>
+ * <li>Gets the left-side LM state estimation score</li>
* </ol>
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class LanguageModelFF extends StatefulFF {
@@ -65,13 +65,14 @@ public class LanguageModelFF extends StatefulFF {
* <ol>
* <li>We assume it is a backoff lm, and high-order ngram implies low-order ngram; absense of
* low-order ngram implies high-order ngram</li>
- * <li>For a ngram, existence of backoffweight => existence a probability Two ways of dealing with
+ * <li>For a ngram, existence of backoffweight => existence a probability Two ways of dealing with
* low counts:
* <ul>
* <li>SRILM: don't multiply zeros in for unknown words</li>
* <li>Pharaoh: cap at a minimum score exp(-10), including unknown words</li>
* </ul>
* </li>
+ * </ol>
*/
protected NGramLanguageModel languageModel;
@@ -160,10 +161,6 @@ public class LanguageModelFF extends StatefulFF {
/**
* Initializes the underlying language model.
- *
- * @param config
- * @param type
- * @param path
*/
protected void initializeLM() {
if (type.equals("kenlm")) {
@@ -265,7 +262,7 @@ public class LanguageModelFF extends StatefulFF {
/**
* Sets the class map if this is a class LM
- * @param classMap
+ * @param fileName
* @throws IOException
*/
public void setClassMap(String fileName) throws IOException {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1fc0590e/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
index 4043171..882424b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
@@ -22,10 +22,10 @@ package org.apache.joshua.decoder.ff.lm;
* An interface for new language models to implement. An object of this type is passed to
* LanguageModelFF, which will handle all the dynamic programming and state maintenance.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public interface NGramLanguageModel {
@@ -41,12 +41,12 @@ public interface NGramLanguageModel {
/**
* Language models may have their own private vocabulary mapping strings to integers; for example,
* if they make use of a compile format (as KenLM and BerkeleyLM do). This mapping is likely
- * different from the global mapping containing in joshua.corpus.Vocabulary, which is used to
+ * different from the global mapping containing in {@link org.apache.joshua.corpus.Vocabulary}, which is used to
* convert the input string and grammars. This function is used to tell the language model what
* the global mapping is, so that the language model can convert it into its own private mapping.
*
- * @param word
- * @param id
+ * @param token string token to be registered
+ * @param id to associate with this word
* @return Whether any collisions were detected.
*/
boolean registerWord(String token, int id);
@@ -63,9 +63,9 @@ public interface NGramLanguageModel {
/**
* Compute the probability of a single word given its context.
*
- * @param ngram
- * @param order
- * @return
+ * @param ngram the NGram for which we wish to compute the probability
+ * @param order NGram order/context
+ * @return float representing the probability
*/
float ngramLogProbability(int[] ngram, int order);