You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:35 UTC
[40/94] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
index 38e9f4a..06a14ee 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ManualConstraintsHandler.java
@@ -31,7 +31,7 @@ import org.apache.joshua.decoder.segment_file.ConstraintRule;
import org.apache.joshua.decoder.segment_file.ConstraintSpan;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class ManualConstraintsHandler {
@@ -141,6 +141,11 @@ public class ManualConstraintsHandler {
/**
* if there are any LHS or RHS constraints for a span, then all the applicable grammar rules in
* that span will have to pass the filter.
+ *
+ * @param i LHS of span, used for genrating the span signature
+ * @param j RHS of span, used for genrating the span signature
+ * @param rulesIn {@link java.util.List} of {@link org.apache.joshua.decoder.ff.tm.Rule}'s
+ * @return filtered {@link java.util.List} of {@link org.apache.joshua.decoder.ff.tm.Rule}'s
*/
public List<Rule> filterRules(int i, int j, List<Rule> rulesIn) {
if (null == this.constraintSpansForFiltering) return rulesIn;
@@ -165,6 +170,9 @@ public class ManualConstraintsHandler {
/**
* should we filter out the gRule based on the manually provided constraint cRule
+ * @param cRule constraint rule
+ * @param gRule rule which may be filtered
+ * @return true if this gRule should survive
*/
public boolean shouldSurvive(ConstraintRule cRule, Rule gRule) {
@@ -189,6 +197,9 @@ public class ManualConstraintsHandler {
/**
* if a span is *within* the coverage of a *hard* rule constraint, then this span will be only
* allowed to use the mannual rules
+ * @param startSpan beginning node (int) for span
+ * @param endSpan end node (int) for span
+ * @return true if this span containers a rule constraint
*/
public boolean containHardRuleConstraint(int startSpan, int endSpan) {
if (null != this.spansWithHardRuleConstraint) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
index 3fba257..1d96149 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
@@ -24,7 +24,7 @@ import org.apache.joshua.lattice.Arc;
/**
* This class represents information about a path taken through the source lattice.
*
- * @note This implementation only tracks the source path cost which is assumed to be a scalar value.
+ * <p>This implementation only tracks the source path cost which is assumed to be a scalar value.
* If you need multiple values, or want to recover more detailed path statistics, you'll need
* to update this code.
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
index 7cd263d..d21ceca 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
* original motivation was to be used as a means of doing forced decoding, which is accomplished by
* forcing all n-gram states that are created to match the target string.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*
*/
public class StateConstraint {
@@ -43,7 +43,7 @@ public class StateConstraint {
* Determines if all of the states passed in are legal in light of the input that was passed
* earlier. Currently only defined for n-gram states.
*
- * @param dpStates
+ * @param dpStates {@link java.util.Collection} of {@link org.apache.joshua.decoder.ff.state_maintenance.DPState}'s
* @return whether the states are legal in light of the target side sentence
*/
public boolean isLegal(Collection<DPState> dpStates) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/package-info.java b/src/main/java/org/apache/joshua/decoder/chart_parser/package-info.java
new file mode 100644
index 0000000..8bf73ba
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides an implementation of a hierarchical phrase-based
+ * decoder for statistical machine translation. The code in
+ * this package is based largely on algorithms from Chiang (2007).
+ */
+package org.apache.joshua.decoder.chart_parser;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/package.html b/src/main/java/org/apache/joshua/decoder/chart_parser/package.html
deleted file mode 100644
index d7ca8f6..0000000
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides an implementation of a hierarchical phrase-based decoder for statistical machine translation.
-
-<h2>Related Documentation</h2>
-
-<ul>
- <li>The code in this package is based largely on algorithms from Chiang (2007).
-</ul>
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
index 25f363d..d4f9534 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -33,8 +33,8 @@ import org.apache.joshua.corpus.Vocabulary;
* arity within a specific range. It expects three parameters upon initialization: the owner, the
* minimum arity, and the maximum arity.
*
- * @author Matt Post <post@cs.jhu.edu
- * @author Zhifei Li <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
*/
public class ArityPhrasePenalty extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index c6112e5..e5f0baa 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -32,26 +32,27 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * This class defines Joshua's feature function interface, for both sparse and
+ * <p>This class defines Joshua's feature function interface, for both sparse and
* dense features. It is immediately inherited by StatelessFF and StatefulFF,
* which provide functionality common to stateless and stateful features,
* respectively. Any feature implementation should extend those classes, and not
* this one. The distinction between stateless and stateful features is somewhat
* narrow: all features have the opportunity to return an instance of a
- * {@link DPState} object, and stateless ones just return null.
+ * {@link DPState} object, and stateless ones just return null.</p>
*
- * Features in Joshua work like templates. Each feature function defines any
+ * <p>Features in Joshua work like templates. Each feature function defines any
* number of actual features, which are associated with weights. The task of the
* feature function is to compute the features that are fired in different
* circumstances and then return the inner product of those features with the
* weight vector. Feature functions can also produce estimates of their future
- * cost (via {@link estimateCost()}); these values are not used in computing the
+ * cost (via {@link org.apache.joshua.decoder.ff.FeatureFunction#estimateCost(Rule, Sentence)});
+ * these values are not used in computing the
* score, but are only used for sorting rules during cube pruning. The
* individual features produced by each template should have globally unique
* names; a good convention is to prefix each feature with the name of the
- * template that produced it.
+ * template that produced it.</p>
*
- * Joshua does not retain individual feature values while decoding, since this
+ * <p>Joshua does not retain individual feature values while decoding, since this
* requires keeping a sparse feature vector along every hyperedge, which can be
* expensive. Instead, it computes only the weighted cost of each edge. If the
* individual feature values are requested, the feature functions are replayed
@@ -59,10 +60,10 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* a generic way by passing an {@link Accumulator} object to the compute()
* function. During decoding, the accumulator simply sums weighted features in a
* scalar. During k-best extraction, when individual feature values are needed,
- * a {@link FeatureAccumulator} is used to retain the individual values.
+ * a {@link FeatureAccumulator} is used to retain the individual values.</p>
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class FeatureFunction {
@@ -135,22 +136,23 @@ public abstract class FeatureFunction {
/**
* This is the main function for defining feature values. The implementor
- * should compute all the features along the hyperedge, calling acc.put(name,
- * value) for each feature. It then returns the newly-computed dynamic
+ * should compute all the features along the hyperedge, calling
+ * {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator#add(String, float)}
+ * for each feature. It then returns the newly-computed dynamic
* programming state for this feature (for example, for the
- * {@link LanguageModelFF} feature, this returns the new language model
+ * {@link org.apache.joshua.decoder.ff.lm.LanguageModelFF} feature, this returns the new language model
* context). For stateless features, this value is null.
*
* Note that the accumulator accumulates *unweighted* feature values. The
* feature vector is multiplied times the weight vector later on.
*
- * @param rule
- * @param tailNodes
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @param acc
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
* @return the new dynamic programming state (null for stateless features)
*/
public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
@@ -160,12 +162,12 @@ public abstract class FeatureFunction {
* Feature functions must overrided this. StatefulFF and StatelessFF provide
* reasonable defaults since most features do not fire on the goal node.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @param acc
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
* @return the DPState (null if none)
*/
public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
@@ -181,12 +183,12 @@ public abstract class FeatureFunction {
* incorporate the feature weights. This function is used in the kbest
* extraction code but could also be used in computing the cost.
*
- * @param rule
- * @param tailNodes
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return an *unweighted* feature delta
*/
public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
@@ -203,11 +205,11 @@ public abstract class FeatureFunction {
* return the *weighted* cost of applying the feature. Provided for backward
* compatibility.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return a *weighted* feature cost
*/
public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
@@ -222,12 +224,12 @@ public abstract class FeatureFunction {
* Returns the *unweighted* feature delta for the final transition (e.g., for
* the language model feature function). Provided for backward compatibility.
*
- * @param tailNode
- * @param i
- * @param j
- * @param sourcePath
- * @param sentID
- * @return
+ * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return an *weighted* feature vector
*/
public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
SourcePath sourcePath, Sentence sentence) {
@@ -247,6 +249,8 @@ public abstract class FeatureFunction {
* sorting. Later, the real cost of this feature function is called via
* compute();
*
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return the *weighted* cost of applying the feature.
*/
public abstract float estimateCost(Rule rule, Sentence sentence);
@@ -257,9 +261,9 @@ public abstract class FeatureFunction {
* score but is used in pruning decisions. Stateless features return 0.0f by
* default, but Stateful features might want to override this.
*
- * @param rule
- * @param state
- * @param sentence
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param state todo
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
* @return the *weighted* future cost estimate of applying this rule in
* context.
*/
@@ -271,7 +275,7 @@ public abstract class FeatureFunction {
* Any key without a value is added with an empty string as value Multiple values for the same key
* are not parsed. The first one is used.
*
- * @param rawArgs A string with the raw arguments and their names
+ * @param args A string with the raw arguments and their names
* @return A hash with the keys and the values of the string
*/
public static HashMap<String, String> parseArgs(String[] args) {
@@ -306,7 +310,11 @@ public abstract class FeatureFunction {
/**
* It is used when initializing translation grammars (for
* pruning purpose, and to get stateless logP for each rule).
- * This is also required to sort the rules (required by Cube-pruning).
+ * This is also required to sort the rules (required by Cube-pruning).
+ *
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param sentID associated ID
+ * @return double value representing LogP
*/
public abstract double estimateLogP(Rule rule, int sentID);
@@ -318,7 +326,6 @@ public abstract class FeatureFunction {
* sum (for decoding). FeatureAccumulator records the named feature values
* (for k-best extraction).
*/
-
public interface Accumulator {
public void add(String name, float value);
public void add(int id, float value);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
index 65ed077..778997e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -35,9 +35,11 @@ import java.util.Set;
* queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
* returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
* feature array. Therefore, when a feature registers a dense feature, it should take care to
- * query either {@link getDense()} or {@link getSparse} when asking for the feature values later on.
+ * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
+ * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
+ * values later on.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class FeatureVector {
@@ -75,8 +77,8 @@ public class FeatureVector {
* **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
* of confusion. They have to be inverted here and when the score is actually computed. They
* are inverted here (which is used to build the feature vector representation of a rule's dense
- * features) and in {@link BilingualRule::estimateRuleCost()}, where the rule's precomputable
- * (weighted) score is cached.
+ * features) and in {@link org.apache.joshua.decoder.ff.tm.BilingualRule#estimateRuleCost(java.util.List)}
+ * , where the rule's precomputable (weighted) score is cached.
*
* @param featureString, the string of labeled and unlabeled features (probably straight from the
* grammar text file)
@@ -138,8 +140,7 @@ public class FeatureVector {
* can infer them all). This *must* be called by every feature function wishing to register
* dense features!
*
- * @param names
- * @return
+ * @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
for (FeatureFunction feature: featureFunctions) {
@@ -181,6 +182,8 @@ public class FeatureVector {
* Subtracts the weights in the other feature vector from this one. Note that this is not set
* subtraction; keys found in the other FeatureVector but not in this one will be initialized with
* a value of 0.0f before subtraction.
+ *
+ * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
*/
public void subtract(FeatureVector other) {
for (int i = 0; i < denseFeatures.size(); i++)
@@ -195,6 +198,8 @@ public class FeatureVector {
/**
* Adds the weights in the other feature vector to this one. This is set union, with values shared
* between the two being summed.
+ *
+ * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
*/
public void add(FeatureVector other) {
while (denseFeatures.size() < other.denseFeatures.size())
@@ -214,6 +219,8 @@ public class FeatureVector {
/**
* Return the weight of a feature by name, after checking to determine if it is sparse or dense.
*
+ * @param feature String name of some feature
+ * @return the feature's weight
*/
public float getWeight(String feature) {
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -227,7 +234,7 @@ public class FeatureVector {
/**
* Return the weight of a sparse feature, indexed by its name.
*
- * @param feature
+ * @param feature String name of some feature
* @return the sparse feature's weight, or 0 if not found.
*/
public float getSparse(String feature) {
@@ -244,7 +251,7 @@ public class FeatureVector {
* Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
* is not found. In other words, this is a safe way to query the dense feature vector.
*
- * @param id
+ * @param id int representing of some dense feature
* @return the dense feature's value, or 0 if not found.
*/
public float getDense(int id) {
@@ -267,8 +274,8 @@ public class FeatureVector {
* Set the value of a feature. We need to first determine whether the feature is a dense or
* sparse one, then set accordingly.
*
- * @param feature
- * @param value
+ * @param feature String name of some feature
+ * @param value float value to set to the featue with the associated name
*/
public void set(String feature, float value) {
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -293,6 +300,9 @@ public class FeatureVector {
/**
* Computes the inner product between this feature vector and another one.
+ *
+ * @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
+ * @return float value representing the computation
*/
public float innerProduct(FeatureVector other) {
float cost = 0.0f;
@@ -313,6 +323,8 @@ public class FeatureVector {
/***
* Moses distinguishes sparse features as those containing an underscore, so we have to fake it
* to be compatible with their tuners.
+ *
+ * @return trimmed Moses output string
*/
public String mosesString() {
StringBuilder outputString = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 0d0e0f7..69584dd 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -39,7 +39,7 @@ import org.apache.joshua.decoder.chart_parser.SourcePath;
* "mark-oovs") . These rules are all stored in a grammar whose owner is "oov". The OOV feature
* function template then fires the "OOVPenalty" feature whenever it is asked to score an OOV rule.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class OOVPenalty extends StatelessFF {
private int ownerID = -1;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index 62792dc..3eb0c2e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -37,8 +37,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* queries the weights for the set of features that are active for this grammar, storing them in an
* array.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
*/
public class PhraseModel extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index d757303..d529559 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -32,8 +32,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* This feature returns the scored path through the source lattice, which is recorded in a
* SourcePath object.
*
- * @author Chris Dyer <re...@umd.edu>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Chris Dyer redpony@umd.edu
+ * @author Matt Post post@cs.jhu.edu
*/
public final class SourcePathFF extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
index 626eb3c..4678902 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
@@ -35,8 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* state-contributing objects in each HGNode. State can no longer be shared among different feature
* functions.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class StatefulFF extends FeatureFunction {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
index 19f7050..e473c37 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -31,8 +31,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* Stateless feature functions do not contribute any state. You need not implement this class to
* create a stateless feature function, but it provides a few convenience functions.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevich <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
*/
public abstract class StatelessFF extends FeatureFunction {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index db70509..9e1b06c 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -180,7 +180,7 @@ public class TargetBigram extends StatefulFF {
}
/**
- * There is nothing to be done here, since <s> and </s> are included in rules that are part
+ * There is nothing to be done here, since <s> and </s> are included in rules that are part
* of the grammar. We simply return the DP state of the tail node.
*/
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index 2a40088..62c889f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -31,8 +31,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
/**
*
- * @author Zhifei Li <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public final class WordPenalty extends StatelessFF {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index c935eba..f969396 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -38,33 +38,33 @@ import org.apache.joshua.decoder.hypergraph.HyperEdge;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * Feature function that reads in a list of language model fragments and matches them against the
+ * <p>Feature function that reads in a list of language model fragments and matches them against the
* hypergraph. This allows for language model fragment "glue" features, which fire when LM fragments
* (supplied as input) are assembled. These LM fragments are presumably useful in ensuring
- * grammaticality and can be independent of the translation model fragments.
+ * grammaticality and can be independent of the translation model fragments.</p>
*
- * Usage: in the Joshua Configuration file, put
+ * <p>Usage: in the Joshua Configuration file, put</p>
*
- * feature-function = FragmentLM -lm LM_FRAGMENTS_FILE -map RULE_FRAGMENTS_MAP_FILE
+ * <code>feature-function = FragmentLM -lm LM_FRAGMENTS_FILE -map RULE_FRAGMENTS_MAP_FILE</code>
*
- * LM_FRAGMENTS_FILE is a pointer to a file containing a list of fragments that it should look for.
- * The format of the file is one fragment per line in PTB format, e.g.:
+ * <p>LM_FRAGMENTS_FILE is a pointer to a file containing a list of fragments that it should look for.
+ * The format of the file is one fragment per line in PTB format, e.g.:</p>
*
- * (S NP (VP (VBD said) SBAR) (. .))
+ * <code>(S NP (VP (VBD said) SBAR) (. .))</code>
*
- * RULE_FRAGMENTS_MAP_FILE points to a file that maps fragments to the flattened SCFG rule format
+ * <p>RULE_FRAGMENTS_MAP_FILE points to a file that maps fragments to the flattened SCFG rule format
* that Joshua uses. This mapping is necessary because Joshua's rules have been flattened, meaning
* that their internal structure has been removed, yet this structure is needed for matching LM
- * fragments. The format of the file is
+ * fragments. The format of the file is</p>
*
- * FRAGMENT ||| RULE-TARGET-SIDE
+ * <code>FRAGMENT ||| RULE-TARGET-SIDE</code>
*
- * for example,
+ * <p>for example,</p>
*
- * (S (NP (DT the) (NN man)) VP .) ||| the man [VP,1] [.,2] (SBAR (IN that) (S (NP (PRP he)) (VP
- * (VBD was) (VB done)))) ||| that he was done (VP (VBD said) SBAR) ||| said SBAR
+ * <code>(S (NP (DT the) (NN man)) VP .) ||| the man [VP,1] [.,2] (SBAR (IN that) (S (NP (PRP he)) (VP
+ * (VBD was) (VB done)))) ||| that he was done (VP (VBD said) SBAR) ||| said SBAR</code>
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class FragmentLMFF extends StatefulFF {
@@ -104,9 +104,9 @@ public class FragmentLMFF extends StatefulFF {
private String fragmentLMFile = "";
/**
- * @param weights
- * @param name
- * @param stateComputer
+ * @param weights a {@link org.apache.joshua.decoder.ff.FeatureVector} with weights
+ * @param args arguments passed to the feature function
+ * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
public FragmentLMFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "FragmentLMFF", args, config);
@@ -138,7 +138,7 @@ public class FragmentLMFF extends StatefulFF {
/**
* Add the provided fragment to the language model, subject to some filtering.
*
- * @param fragment
+ * @param fragment a {@link org.apache.joshua.decoder.ff.fragmentlm.Tree} fragment
*/
public void addLMFragment(Tree fragment) {
if (lmFragments == null)
@@ -169,6 +169,15 @@ public class FragmentLMFF extends StatefulFF {
* that fire are any LM fragments that match the fragment associated with the current rule. LM
* fragments may recurse over the tail nodes, following 1-best backpointers until the fragment
* either matches or fails.
+ *
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+ * @return the new dynamic programming state (null for stateless features)
*/
@Override
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
@@ -320,8 +329,8 @@ public class FragmentLMFF extends StatefulFF {
/**
* Maintains a state pointer used by KenLM to implement left-state minimization.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public class FragmentState extends DPState {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
index 02741e4..2ea9837 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
@@ -38,7 +38,7 @@ import org.apache.joshua.util.io.LineReader;
* enclosed in double-quotes when read in.
*
* @author Dan Klein
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Tree implements Serializable {
@@ -111,7 +111,7 @@ public class Tree implements Serializable {
/**
* Computes the depth-one rule rooted at this node. If the node has no children, null is returned.
*
- * @return
+ * @return string representation of the rule
*/
public String getRule() {
if (isLeaf()) {
@@ -237,6 +237,8 @@ public class Tree implements Serializable {
* A tree is lexicalized if it has terminal nodes among the leaves of its frontier. For normal
* trees this is always true since they bottom out in terminals, but for fragments, this may or
* may not be true.
+ *
+ * @return true if the tree is lexicalized
*/
public boolean isLexicalized() {
if (this.numLexicalItems < 0) {
@@ -313,7 +315,7 @@ public class Tree implements Serializable {
* Removes the quotes around terminals. Note that the resulting tree could not be read back
* in by this class, since unquoted leaves are interpreted as nonterminals.
*
- * @return
+ * @return unquoted string
*/
public String unquotedString() {
return toString().replaceAll("\"", "");
@@ -450,8 +452,8 @@ public class Tree implements Serializable {
* models. The arguments have to be passed in to preserve Java generics, even though this is only
* ever used with String versions.
*
- * @param sos presumably "<s>"
- * @param eos presumably "</s>"
+ * @param sos presumably "<s>"
+ * @param eos presumably "</s>"
*/
public void insertSentenceMarkers(String sos, String eos) {
insertSentenceMarker(sos, 0);
@@ -465,8 +467,8 @@ public class Tree implements Serializable {
/**
*
- * @param symbol
- * @param pos
+ * @param symbol the marker to insert
+ * @param pos the position at which to insert
*/
private void insertSentenceMarker(String symbol, int pos) {
@@ -487,6 +489,9 @@ public class Tree implements Serializable {
/**
* This is a convenience function for producing a fragment from its string representation.
+ *
+ * @param ptbStr input string from which to produce a fragment
+ * @return the fragment
*/
public static Tree fromString(String ptbStr) {
PennTreeReader reader = new PennTreeReader(new StringReader(ptbStr));
@@ -530,14 +535,13 @@ public class Tree implements Serializable {
* recursively visit the derivation state objects, following the route through the hypergraph
* defined by them.
*
- * This function is like the other buildTree() function, but that one simply follows the best
- * incoming hyperedge for each node.
+ * This function is like Tree#buildTree(DerivationState, int),
+ * but that one simply follows the best incoming hyperedge for each node.
*
- * @param rule
- * @param tailNodes
- * @param derivation - should not be null
- * @param maxDepth
- * @return
+ * @param rule for which corresponding internal fragment can be used to initialize the tree
+ * @param derivationStates array of state objects
+ * @param maxDepth of route through the hypergraph
+ * @return the Tree
*/
public static Tree buildTree(Rule rule, DerivationState[] derivationStates, int maxDepth) {
Tree tree = getFragmentFromYield(rule.getEnglishWords());
@@ -602,19 +606,14 @@ public class Tree implements Serializable {
}
/**
- * Builds a tree from the kth-best derivation state. This is done by initializing the tree with
+ * <p>Builds a tree from the kth-best derivation state. This is done by initializing the tree with
* the internal fragment corresponding to the rule; this will be the top of the tree. We then
* recursively visit the derivation state objects, following the route through the hypergraph
- * defined by them.
- *
- * This function is like the other buildTree() function, but that one simply follows the best
- * incoming hyperedge for each node.
+ * defined by them.</p>
*
- * @param rule
- * @param tailNodes
- * @param derivation
- * @param maxDepth
- * @return
+ * @param derivationState array of state objects
+ * @param maxDepth of route through the hypergraph
+ * @return the Tree
*/
public static Tree buildTree(DerivationState derivationState, int maxDepth) {
Rule rule = derivationState.edge.getRule();
@@ -675,9 +674,10 @@ public class Tree implements Serializable {
* This could be implemented by using the other buildTree() function and using the 1-best
* DerivationState.
*
- * @param rule
- * @param tailNodes
- * @return
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be used whilst building the tree
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+ * @param maxDepth to go in the tree
+ * @return shallow clone of the Tree object
*/
public static Tree buildTree(Rule rule, List<HGNode> tailNodes, int maxDepth) {
Tree tree = getFragmentFromYield(rule.getEnglishWords());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
index 439ba96..d06388c 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
@@ -187,6 +187,9 @@ public class Trees {
* preterminals onto one line of tags and words. Additional complexities are that conjunctions
* (tag CC) are not collapsed in this way, and that the unlabeled outer brackets are collapsed
* onto the same line as the next bracket down.
+ *
+ * @param tree you wish to render and print
+ * @return a rendered String representation of the tree
*/
public static String render(Tree tree) {
StringBuilder sb = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
index 79560fd..e8225dc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
@@ -18,11 +18,7 @@
*/
package org.apache.joshua.decoder.ff.lm;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Support;
-import org.apache.joshua.corpus.SymbolTable;
-
-
import java.util.List;
/**
@@ -31,7 +27,7 @@ import java.util.List;
* methods are declared final, in an attempt to limit what subclasses
* may be defined.
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate: 2009-12-30 10:10:38 -0600 (Wed, 30 Dec 2009) $
*/
public abstract class AbstractLM extends DefaultNGramLanguageModel {
@@ -40,7 +36,7 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
super(symbolTable, order);
}
-
+ @SuppressWarnings("null")
public final double sentenceLogProbability(
List<Integer> sentence, int order, int startIndex
) {
@@ -48,12 +44,10 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
return (Double) null;
}
-
public final float ngramLogProbability(int[] ngram) {
return super.ngramLogProbability(ngram);
}
-
public final float ngramLogProbability(int[] ngram, int order) {
if (ngram.length > order) {
throw new RuntimeException("ngram length is greather than the max order");
@@ -77,11 +71,6 @@ public abstract class AbstractLM extends DefaultNGramLanguageModel {
protected abstract float ngramLogProbability_helper(int[] ngram, int order);
-
- /**
- * @deprecated this function is much slower than the int[]
- * version
- */
@Deprecated
public final double logProbOfBackoffState(List<Integer> ngram, int order, int qtyAdditionalBackoffWeight) {
return logProbabilityOfBackoffState(
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
index 4ff8f59..759479f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/DefaultNGramLanguageModel.java
@@ -27,12 +27,12 @@ import org.apache.joshua.corpus.Vocabulary;
/**
* This class provides a default implementation for the Equivalent LM State optimization (namely,
* don't back off anywhere). It also provides some default implementations for more general
- * functions on the interface to fall back to more specific ones (e.g. from ArrayList<Integer> to
- * int[]) and a default implementation for sentenceLogProbability which enumerates the n-grams and
- * calls calls ngramLogProbability for each of them.
+ * functions on the interface to fall back to more specific ones (e.g. from {@link java.util.ArrayList}
+ * of {@link java.lang.Integer}'s to int[]) and a default implementation for sentenceLogProbability
+ * which enumerates the n-grams and calls calls ngramLogProbability for each of them.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
*/
public abstract class DefaultNGramLanguageModel implements NGramLanguageModel {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index e012421..2e44396 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -29,7 +29,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
* state by itself and just passes in the ngrams for scoring.
*
* @author Kenneth Heafield
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
@@ -91,6 +91,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* Constructor if order is not known.
* Order will be inferred from the model.
+ * @param file_name string path to an input file
*/
public KenLM(String file_name) {
pointer = construct(file_name);
@@ -116,6 +117,8 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* Query for n-gram probability using strings.
+ * @param words a string array of words
+ * @return float value denoting probability
*/
public float prob(String[] words) {
return probForString(pointer, words);
@@ -128,14 +131,15 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* This function is the bridge to the interface in kenlm/lm/left.hh, which has KenLM score the
- * whole rule. It takes a list of words and states retrieved from tail nodes (nonterminals in the
+ * whole rule. It takes an array of words and states retrieved from tail nodes (nonterminals in the
* rule). Nonterminals have a negative value so KenLM can distinguish them. The sentence number is
* needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
* state and the LM probability incurred along this rule.
*
- * @param words
- * @param sentId
- * @return
+ * @param words array of words
+ * @param poolPointer todo
+ * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
+ * KenLM state and the LM probability incurred along this rule
*/
public StateProbPair probRule(long[] words, long poolPointer) {
@@ -154,7 +158,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
* Public facing function that estimates the cost of a rule, which value is used for sorting
* rules during cube pruning.
*
- * @param words
+ * @param words array of words
* @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
*/
public float estimateRule(long[] words) {
@@ -170,6 +174,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
/**
* The start symbol for a KenLM is the Vocabulary.START_SYM.
+ * @return "<s>"
*/
public String getStartSymbol() {
return Vocabulary.START_SYM;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index 741fea6..a601d8c 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -45,14 +45,14 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* This class performs the following:
* <ol>
* <li>Gets the additional LM score due to combinations of small items into larger ones by using
- * rules
- * <li>Gets the LM state
- * <li>Gets the left-side LM state estimation score
+ * rules</li>
+ * <li>Gets the LM state</li>
+ * <li>Gets the left-side LM state estimation score</li>
* </ol>
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class LanguageModelFF extends StatefulFF {
@@ -65,13 +65,14 @@ public class LanguageModelFF extends StatefulFF {
* <ol>
* <li>We assume it is a backoff lm, and high-order ngram implies low-order ngram; absense of
* low-order ngram implies high-order ngram</li>
- * <li>For a ngram, existence of backoffweight => existence a probability Two ways of dealing with
+ * <li>For a ngram, existence of backoffweight => existence a probability Two ways of dealing with
* low counts:
* <ul>
* <li>SRILM: don't multiply zeros in for unknown words</li>
* <li>Pharaoh: cap at a minimum score exp(-10), including unknown words</li>
* </ul>
* </li>
+ * </ol>
*/
protected NGramLanguageModel languageModel;
@@ -90,7 +91,7 @@ public class LanguageModelFF extends StatefulFF {
/* Whether this is a class-based LM */
private boolean isClassLM;
private ClassMap classMap;
-
+
protected class ClassMap {
private final int OOV_id = Vocabulary.getUnknownId();
@@ -133,7 +134,7 @@ public class LanguageModelFF extends StatefulFF {
this.type = parsedArgs.get("lm_type");
this.ngramOrder = Integer.parseInt(parsedArgs.get("lm_order"));
this.path = parsedArgs.get("lm_file");
-
+
if (parsedArgs.containsKey("class_map"))
try {
this.isClassLM = true;
@@ -145,14 +146,14 @@ public class LanguageModelFF extends StatefulFF {
// The dense feature initialization hasn't happened yet, so we have to retrieve this as sparse
this.weight = weights.getSparse(name);
-
+
initializeLM();
}
-
+
@Override
public ArrayList<String> reportDenseFeatures(int index) {
denseFeatureIndex = index;
-
+
ArrayList<String> names = new ArrayList<String>();
names.add(name);
return names;
@@ -160,15 +161,11 @@ public class LanguageModelFF extends StatefulFF {
/**
* Initializes the underlying language model.
- *
- * @param config
- * @param type
- * @param path
*/
protected void initializeLM() {
if (type.equals("kenlm")) {
this.languageModel = new KenLM(ngramOrder, path);
-
+
} else if (type.equals("berkeleylm")) {
this.languageModel = new LMGrammarBerkeley(ngramOrder, path);
@@ -180,14 +177,14 @@ public class LanguageModelFF extends StatefulFF {
Vocabulary.registerLanguageModel(this.languageModel);
Vocabulary.id(config.default_non_terminal);
-
+
startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
}
public NGramLanguageModel getLM() {
return this.languageModel;
}
-
+
public String logString() {
if (languageModel != null)
return String.format("%s, order %d (weight %.3f)", name, languageModel.getOrder(), weight);
@@ -220,9 +217,9 @@ public class LanguageModelFF extends StatefulFF {
newState = computeTransition(rule.getEnglish(), tailNodes, acc);
}
}
-
+
}
-
+
return newState;
}
@@ -230,15 +227,19 @@ public class LanguageModelFF extends StatefulFF {
* Input sentences can be tagged with information specific to the language model. This looks for
* such annotations by following a word's alignments back to the source words, checking for
* annotations, and replacing the surface word if such annotations are found.
- *
+ * @param rule the {@link org.apache.joshua.decoder.ff.tm.Rule} to use
+ * @param begin todo
+ * @param end todo
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return todo
*/
protected int[] getTags(Rule rule, int begin, int end, Sentence sentence) {
/* Very important to make a copy here, so the original rule is not modified */
int[] tokens = Arrays.copyOf(rule.getEnglish(), rule.getEnglish().length);
byte[] alignments = rule.getAlignment();
-// System.err.println(String.format("getTags() %s", rule.getRuleString()));
-
+ // System.err.println(String.format("getTags() %s", rule.getRuleString()));
+
/* For each target-side token, project it to each of its source-language alignments. If any of those
* are annotated, take the first annotation and quit.
*/
@@ -249,8 +250,8 @@ public class LanguageModelFF extends StatefulFF {
if (alignments[j] == i) {
String annotation = sentence.getAnnotation((int)alignments[i] + begin, "class");
if (annotation != null) {
-// System.err.println(String.format(" word %d source %d abs %d annotation %d/%s",
-// i, alignments[i], alignments[i] + begin, annotation, Vocabulary.word(annotation)));
+ // System.err.println(String.format(" word %d source %d abs %d annotation %d/%s",
+ // i, alignments[i], alignments[i] + begin, annotation, Vocabulary.word(annotation)));
tokens[i] = Vocabulary.id(annotation);
break;
}
@@ -259,22 +260,23 @@ public class LanguageModelFF extends StatefulFF {
}
}
}
-
+
return tokens;
}
-
+
/**
* Sets the class map if this is a class LM
- * @param classMap
- * @throws IOException
+ * @param fileName a string path to a file
+ * @throws IOException if there is an error reading the input file
*/
public void setClassMap(String fileName) throws IOException {
this.classMap = new ClassMap(fileName);
}
-
-
+
/**
* Replace each word in a rule with the target side classes.
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to use when obtaining tokens
+ * @return int[] of tokens
*/
protected int[] getClasses(Rule rule) {
if (this.classMap == null) {
@@ -371,7 +373,7 @@ public class LanguageModelFF extends StatefulFF {
int ccount = 0;
float transitionLogP = 0.0f;
int[] left_context = null;
-
+
for (int c = 0; c < enWords.length; c++) {
int curID = enWords[c];
@@ -392,7 +394,7 @@ public class LanguageModelFF extends StatefulFF {
if (ccount == this.ngramOrder) {
// Compute the current word probability, and remove it.
float prob = this.languageModel.ngramLogProbability(current, this.ngramOrder);
-// System.err.println(String.format("-> prob(%s) = %f", Vocabulary.getWords(current), prob));
+ // System.err.println(String.format("-> prob(%s) = %f", Vocabulary.getWords(current), prob));
transitionLogP += prob;
System.arraycopy(current, 1, shadow, 0, this.ngramOrder - 1);
int[] tmp = current;
@@ -411,7 +413,7 @@ public class LanguageModelFF extends StatefulFF {
if (ccount == this.ngramOrder) {
// Compute the current word probability, and remove it.s
float prob = this.languageModel.ngramLogProbability(current, this.ngramOrder);
-// System.err.println(String.format("-> prob(%s) = %f", Vocabulary.getWords(current), prob));
+ // System.err.println(String.format("-> prob(%s) = %f", Vocabulary.getWords(current), prob));
transitionLogP += prob;
System.arraycopy(current, 1, shadow, 0, this.ngramOrder - 1);
int[] tmp = current;
@@ -421,7 +423,7 @@ public class LanguageModelFF extends StatefulFF {
}
}
}
-// acc.add(name, transitionLogP);
+ // acc.add(name, transitionLogP);
acc.add(denseFeatureIndex, transitionLogP);
if (left_context != null) {
@@ -443,8 +445,8 @@ public class LanguageModelFF extends StatefulFF {
*/
private NgramDPState computeFinalTransition(NgramDPState state, Accumulator acc) {
-// System.err.println(String.format("LanguageModel::computeFinalTransition()"));
-
+ // System.err.println(String.format("LanguageModel::computeFinalTransition()"));
+
float res = 0.0f;
LinkedList<Integer> currentNgram = new LinkedList<Integer>();
int[] leftContext = state.getLeftLMStateWords();
@@ -464,14 +466,14 @@ public class LanguageModelFF extends StatefulFF {
}
// Tell the accumulator
-// acc.add(name, res);
+ // acc.add(name, res);
acc.add(denseFeatureIndex, res);
// State is the same
return new NgramDPState(leftContext, rightContext);
}
-
+
/**
* Compatibility method for {@link #scoreChunkLogP(int[], boolean, boolean)}
*/
@@ -479,7 +481,7 @@ public class LanguageModelFF extends StatefulFF {
boolean skipStart) {
return scoreChunkLogP(Ints.toArray(words), considerIncompleteNgrams, skipStart);
}
-
+
/**
* This function is basically a wrapper for NGramLanguageModel::sentenceLogProbability(). It
* computes the probability of a phrase ("chunk"), using lower-order n-grams for the first n-1
@@ -508,7 +510,7 @@ public class LanguageModelFF extends StatefulFF {
return score;
}
-
+
/**
* Public method to set LM_INDEX back to 0.
* Required if multiple instances of the JoshuaDecoder live in the same JVM.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
index 4043171..882424b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/NGramLanguageModel.java
@@ -22,10 +22,10 @@ package org.apache.joshua.decoder.ff.lm;
* An interface for new language models to implement. An object of this type is passed to
* LanguageModelFF, which will handle all the dynamic programming and state maintenance.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public interface NGramLanguageModel {
@@ -41,12 +41,12 @@ public interface NGramLanguageModel {
/**
* Language models may have their own private vocabulary mapping strings to integers; for example,
* if they make use of a compile format (as KenLM and BerkeleyLM do). This mapping is likely
- * different from the global mapping containing in joshua.corpus.Vocabulary, which is used to
+ * different from the global mapping containing in {@link org.apache.joshua.corpus.Vocabulary}, which is used to
* convert the input string and grammars. This function is used to tell the language model what
* the global mapping is, so that the language model can convert it into its own private mapping.
*
- * @param word
- * @param id
+ * @param token string token to be registered
+ * @param id to associate with this word
* @return Whether any collisions were detected.
*/
boolean registerWord(String token, int id);
@@ -63,9 +63,9 @@ public interface NGramLanguageModel {
/**
* Compute the probability of a single word given its context.
*
- * @param ngram
- * @param order
- * @return
+ * @param ngram the NGram for which we wish to compute the probability
+ * @param order NGram order/context
+ * @return float representing the probability
*/
float ngramLogProbability(int[] ngram, int order);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 9bf4cc7..6869def 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -37,8 +37,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
/**
* Wrapper for KenLM LMs with left-state minimization. We inherit from the regular
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public class StateMinimizingLanguageModel extends LanguageModelFF {
@@ -66,10 +66,6 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
/**
* Initializes the underlying language model.
- *
- * @param config
- * @param type
- * @param path
*/
@Override
public void initializeLM() {
@@ -162,10 +158,10 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
/**
* Destroys the pool created to allocate state for this sentence. Called from the
- * {@link joshua.decoder.Translation} class after outputting the sentence or k-best list. Hosting
+ * {@link org.apache.joshua.decoder.Translation} class after outputting the sentence or k-best list. Hosting
* this map here in KenLMFF statically allows pools to be shared across KenLM instances.
*
- * @param sentId
+ * @param sentId a key in the poolmap table to destroy
*/
public void destroyPool(int sentId) {
if (poolMap.containsKey(sentId))
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
index 21dd819..958ea16 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
@@ -115,6 +115,7 @@ public class BloomFilterLanguageModel extends DefaultNGramLanguageModel implemen
*
* @param order the order of the language model
* @param filename path to the file where the language model is stored
+ * @throws IOException if the bloom filter language model cannot be rebuilt from the input file
*/
public BloomFilterLanguageModel(int order, String filename) throws IOException {
super(order);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package-info.java
new file mode 100644
index 0000000..19fa695
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Provides an implementation of a bloom filter language model, and
+ * an associated implementation of the language model feature function typically used in
+ * hierarchical phrase-based decoding for statistical machine translation.
+ */
+package org.apache.joshua.decoder.ff.lm.bloomfilter_lm;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package.html b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package.html
deleted file mode 100644
index 883594a..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/package.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides an implementation of a bloom filter language model, and
-an associated implementation of the language model feature function typically used in
-hierarchical phrase-based decoding for statistical machine translation.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
index 654561c..8a7e786 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
@@ -30,9 +30,7 @@ import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.joshua.corpus.SymbolTable;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.lm.AbstractLM;
import org.apache.joshua.decoder.ff.lm.ArpaFile;
import org.apache.joshua.decoder.ff.lm.ArpaNgram;
@@ -94,8 +92,8 @@ public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
/**
* Constructs a language model object from the specified ARPA file.
*
- * @param arpaFile
- * @throws FileNotFoundException
+ * @param arpaFile input ARPA file
+ * @throws FileNotFoundException if the input file cannot be located
*/
public TrieLM(ArpaFile arpaFile) throws FileNotFoundException {
super(arpaFile.getVocab().size(), arpaFile.getOrder());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/lm/package-info.java
new file mode 100644
index 0000000..22da71e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/package-info.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * <p>Provides abstraction and support for the language model
+ * feature function typically used in hierarchical phrase-based
+ * decoding for statistical machine translation.</p>
+ * <p>The classes contained within this directory are
+ * responsible for two tasks: implementing the feature function,
+ * and representing the language model itself. The class
+ * `LanguageModelFF` implements the feature function by exending
+ * the class `DefaultStatefulFF`. One of these is instantiated
+ * for each language model present in the decoder.</p>
+ * <p>The language models themselves are implemented as a
+ * combination of an interface (`NGramLanguageModel`), a default
+ * implementation (`DefaultNgramLangaugeModel`), and an abstract
+ * implementation of the default (`AbstractLM`).</p>
+ *
+ * <pre>
+ * DefaultStatefulFF
+ * |- LanguageModelFF
+ *
+ * DefaultNgramLanguageModel implements interface NGramLanguageModel
+ * |- AbstractLM
+ * </pre>
+ */
+package org.apache.joshua.decoder.ff.lm;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/lm/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/package.html b/src/main/java/org/apache/joshua/decoder/ff/lm/package.html
deleted file mode 100644
index b99a245..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/package.html
+++ /dev/null
@@ -1,35 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides abstraction and support for the language model feature function typically used in
-hierarchical phrase-based decoding for statistical machine translation.
-
-The classes contained within this directory are responsible for two tasks: implementing the feature
-function, and representing the language model itself. The class `LanguageModelFF` implements the
-feature function by exending the class `DefaultStatefulFF`. One of these is instantiated for each
-language model present in the decoder.
-
-The language models themselves are implemented as a combination of an interface
-(`NGramLanguageModel`), a default implementation (`DefaultNgramLangaugeModel`), and an abstract
-implementation of the default (`AbstractLM`).
-
-<pre>
- DefaultStatefulFF
- |- LanguageModelFF
-
- DefaultNgramLanguageModel implements interface NGramLanguageModel
- |- AbstractLM
-</pre>
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/package-info.java
new file mode 100644
index 0000000..b0af73e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/package-info.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * <p>Provides an implementation of the linear feature functions
+ * typically used in hierarchical phrase-based decoding for
+ * statistical machine translation.</p>
+ * <p>The following is a note from Juri describing some of the
+ * functionality of the feature functions interfaces and default
+ * abstract classes.</p>
+ * <pre>
+ * The equality that I intended for is ff.transitionLogP() =
+ * ff.estimateLogP() + ff.reEstimateTransitionLogP(). The re-estimate
+ * fixes the estimate to be the true transition cost that takes into
+ * account the state. Before decoding the cost of applying a rule is
+ * estimated via estimateLogP() and yields the phrasal feature costs plus
+ * an LM estimate of the cost of the lexical portions of the rule.
+ * transitionLogP() takes rule and state and computes everything from
+ * scratch, whereas reEstimateTransitionLogP() adds in the cost of new
+ * n-grams that result from combining the rule with the LM states and
+ * subtracts out the cost of superfluous less-than-n-grams that were
+ * overridden by the updated cost calculation.
+ *
+ * Hope this helps.
+ * </pre>
+ */
+package org.apache.joshua.decoder.ff;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/package.html b/src/main/java/org/apache/joshua/decoder/ff/package.html
deleted file mode 100644
index b0aa63e..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/package.html
+++ /dev/null
@@ -1,37 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides an implementation of the linear feature functions typically used in
-hierarchical phrase-based decoding for statistical machine translation.
-
-The following is a note from Juri describing some of the functionality of the feature functions
-interfaces and default abstract classes.
-
-<pre>
-The equality that I intended for is ff.transitionLogP() =
-ff.estimateLogP() + ff.reEstimateTransitionLogP(). The re-estimate
-fixes the estimate to be the true transition cost that takes into
-account the state. Before decoding the cost of applying a rule is
-estimated via estimateLogP() and yields the phrasal feature costs plus
-an LM estimate of the cost of the lexical portions of the rule.
-transitionLogP() takes rule and state and computes everything from
-scratch, whereas reEstimateTransitionLogP() adds in the cost of new
-n-grams that result from combining the rule with the LM states and
-subtracts out the cost of superfluous less-than-n-grams that were
-overridden by the updated cost calculation.
-
-Hope this helps.
-</pre>
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/DPState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/DPState.java b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/DPState.java
index bfc7533..e117fde 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/DPState.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/DPState.java
@@ -21,8 +21,8 @@ package org.apache.joshua.decoder.ff.state_maintenance;
/**
* Abstract class enforcing explicit implementation of the standard methods.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Juri Ganitkevitch, <ju...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Juri Ganitkevitch, juri@cs.jhu.edu
*/
public abstract class DPState {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/KenLMState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/KenLMState.java b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/KenLMState.java
index d352383..4fdc631 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/KenLMState.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/KenLMState.java
@@ -21,8 +21,8 @@ package org.apache.joshua.decoder.ff.state_maintenance;
/**
* Maintains a state pointer used by KenLM to implement left-state minimization.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Juri Ganitkevitch <ju...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevitch juri@cs.jhu.edu
*/
public class KenLMState extends DPState {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
index bf6e0a5..b269bd9 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
@@ -23,8 +23,8 @@ import java.util.Arrays;
import org.apache.joshua.corpus.Vocabulary;
/**
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Juri Ganitkevitch, <ju...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Juri Ganitkevitch, juri@cs.jhu.edu
*/
public class NgramDPState extends DPState {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
index 188c2a9..a4ea2f5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
@@ -44,7 +44,7 @@ import cern.colt.Arrays;
*
* @author Zhifei Li
* @author Lane Schwartz
- * @author Matt Post <post@cs.jhu.edu
+ * @author Matt Post post@cs.jhu.edu
*/
public abstract class AbstractGrammar implements Grammar {
@@ -92,6 +92,7 @@ public abstract class AbstractGrammar implements Grammar {
* Constructs an empty, unsorted grammar.
*
* @see Grammar#isSorted()
+ * @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
*/
public AbstractGrammar(JoshuaConfiguration config) {
this.joshuaConfiguration = config;
@@ -110,6 +111,7 @@ public abstract class AbstractGrammar implements Grammar {
* Cube-pruning requires that the grammar be sorted based on the latest feature functions. To
* avoid synchronization, this method should be called before multiple threads are initialized for
* parallel decoding
+ * @param models {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void sortGrammar(List<FeatureFunction> models) {
Trie root = getTrieRoot();
@@ -127,13 +129,13 @@ public abstract class AbstractGrammar implements Grammar {
/**
* Sets the flag indicating whether this grammar is sorted.
* <p>
- * This method is called by {@link #sortGrammar(ArrayList)} to indicate that the grammar has been
- * sorted.
+ * This method is called by {@link org.apache.joshua.decoder.ff.tm.AbstractGrammar#sortGrammar(List)}
+ * to indicate that the grammar has been sorted.</p>
*
- * Its scope is protected so that child classes that override <code>sortGrammar</code> will also
- * be able to call this method to indicate that the grammar has been sorted.
+ * <p>Its scope is protected so that child classes that override <code>sortGrammar</code> will also
+ * be able to call this method to indicate that the grammar has been sorted.</p>
*
- * @param sorted
+ * @param sorted set to true if the grammar is sorted
*/
protected void setSorted(boolean sorted) {
this.sorted = sorted;
@@ -190,8 +192,10 @@ public abstract class AbstractGrammar implements Grammar {
* Adds OOV rules for all words in the input lattice to the current grammar. Uses addOOVRule() so that
* sub-grammars can define different types of OOV rules if needed (as is used in {@link PhraseTable}).
*
+ * @param grammar Grammar in the Trie
* @param inputLattice the lattice representing the input sentence
* @param featureFunctions a list of feature functions used for scoring
+ * @param onlyTrue determine if word is actual OOV.
*/
public static void addOOVRules(Grammar grammar, Lattice<Token> inputLattice,
List<FeatureFunction> featureFunctions, boolean onlyTrue) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
index 6e35e2d..b2299ba 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
@@ -28,7 +28,7 @@ import org.apache.joshua.corpus.SymbolTable;
* Normally, the feature score in the rule should be *cost* (i.e.,
* -LogP), so that the feature weight should be positive
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
* @version $LastChangedDate: 2010-01-20 19:46:54 -0600 (Wed, 20 Jan 2010) $
*/
public class BilingualRule extends MonolingualRule {
@@ -49,9 +49,9 @@ public class BilingualRule extends MonolingualRule {
* @param featureScores Feature value scores for the rule.
* @param arity Number of nonterminals in the source language
* right-hand side.
- * @param owner
- * @param latticeCost
- * @param ruleID
+ * @param owner todo
+ * @param latticeCost todo
+ * @param ruleID todo
*/
public BilingualRule(int lhs, int[] sourceRhs, int[] targetRhs, float[] featureScores, int arity, int owner, float latticeCost, int ruleID) {
super(lhs, sourceRhs, featureScores, arity, owner, latticeCost, ruleID);