You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/23 22:17:54 UTC

[37/50] [abbrv] incubator-joshua git commit: Merge branch 'master' into 7

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index fb8c789,0000000..802aadd
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@@ -1,339 -1,0 +1,339 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.regex.Matcher;
 +import java.util.regex.Pattern;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +/**
 + * <p>This class defines Joshua's feature function interface, for both sparse and
 + * dense features. It is immediately inherited by StatelessFF and StatefulFF,
 + * which provide functionality common to stateless and stateful features,
 + * respectively. Any feature implementation should extend those classes, and not
 + * this one. The distinction between stateless and stateful features is somewhat
 + * narrow: all features have the opportunity to return an instance of a
 + * {@link DPState} object, and stateless ones just return null.</p>
 + * 
 + * <p>Features in Joshua work like templates. Each feature function defines any
 + * number of actual features, which are associated with weights. The task of the
 + * feature function is to compute the features that are fired in different
 + * circumstances and then return the inner product of those features with the
 + * weight vector. Feature functions can also produce estimates of their future
 + * cost (via {@link org.apache.joshua.decoder.ff.FeatureFunction#estimateCost(Rule, Sentence)}); 
 + * these values are not used in computing the
 + * score, but are only used for sorting rules during cube pruning. The
 + * individual features produced by each template should have globally unique
 + * names; a good convention is to prefix each feature with the name of the
 + * template that produced it.</p>
 + * 
 + * <p>Joshua does not retain individual feature values while decoding, since this
 + * requires keeping a sparse feature vector along every hyperedge, which can be
 + * expensive. Instead, it computes only the weighted cost of each edge. If the
 + * individual feature values are requested, the feature functions are replayed
 + * in post-processing, say during k-best list extraction. This is implemented in
 + * a generic way by passing an {@link Accumulator} object to the compute()
 + * function. During decoding, the accumulator simply sums weighted features in a
 + * scalar. During k-best extraction, when individual feature values are needed,
 + * a {@link FeatureAccumulator} is used to retain the individual values.</p>
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + * @author Juri Ganitkevich juri@cs.jhu.edu
 + */
 +public abstract class FeatureFunction {
 +
 +  /*
 +   * The name of the feature function; this generally matches the weight name on
 +   * the config file. This can also be used as a prefix for feature / weight
 +   * names, for templates that define multiple features.
 +   */
 +  protected String name = null;
 +  
 +  /*
 +   * The hashed feature id correspondig to name. This can be changed if name is changed as well
 +   * but provides a good default id for most cases. 
 +   */
 +  protected int featureId;
 +
 +  // The list of arguments passed to the feature, and the hash for the parsed args
 +  protected final String[] args;
 +  protected final HashMap<String, String> parsedArgs; 
 +
 +  /*
 +   * The global weight vector used by the decoder, passed it when the feature is
 +   * instantiated
 +   */
 +  protected final FeatureVector weights;
 +
 +  /* The config */
-   protected JoshuaConfiguration config;
++  protected final JoshuaConfiguration config;
 +
 +  public String getName() {
 +    return name;
 +  }
 +
 +  // Whether the feature has state.
 +  public abstract boolean isStateful();
 +
 +  public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
 +    this.weights = weights;
 +    this.name = name;
 +    this.featureId = FeatureMap.hashFeature(this.name);
 +    this.args = args;
 +    this.config = config;
 +    this.parsedArgs = FeatureFunction.parseArgs(args);
 +  }
 +
 +  public String logString() {
 +    return String.format("%s (weight %.3f)", name, weights.getOrDefault(hashFeature(name)));
 +  }
 +
 +  /**
 +   * This is the main function for defining feature values. The implementor
 +   * should compute all the features along the hyperedge, calling 
 +   * {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator#add(String, float)}
 +   * for each feature. It then returns the newly-computed dynamic
 +   * programming state for this feature (for example, for the
 +   * {@link org.apache.joshua.decoder.ff.lm.LanguageModelFF} feature, this returns the new language model
 +   * context). For stateless features, this value is null.
 +   * 
 +   * Note that the accumulator accumulates *unweighted* feature values. The
 +   * feature vector is multiplied times the weight vector later on.
 +   * 
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
 +   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
 +   * @return the new dynamic programming state (null for stateless features)
 +   */
 +  public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
 +      SourcePath sourcePath, Sentence sentence, Accumulator acc);
 +
 +  /**
 +   * Feature functions must overrided this. StatefulFF and StatelessFF provide
 +   * reasonable defaults since most features do not fire on the goal node.
 +   * 
 +   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
 +   * @return the DPState (null if none)
 +   */
 +  public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc);
 +
 +  /**
 +   * This is a convenience function for retrieving the features fired when
 +   * applying a rule, provided for backward compatibility.
 +   * 
 +   * Returns the *unweighted* cost of the features delta computed at this
 +   * position. Note that this is a feature delta, so existing feature costs of
 +   * the tail nodes should not be incorporated, and it is very important not to
 +   * incorporate the feature weights. This function is used in the kbest
 +   * extraction code but could also be used in computing the cost.
 +   * 
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
 +   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return an *unweighted* feature delta
 +   */
 +  public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
 +      SourcePath sourcePath, Sentence sentence) {
 +
 +    FeatureAccumulator features = new FeatureAccumulator();
 +    compute(rule, tailNodes, i, j, sourcePath, sentence, features);
 +    return features.getFeatures();
 +  }
 +
 +  /**
 +   * This function is called for the final transition. For example, the
 +   * LanguageModel feature function treats the last rule specially. It needs to
 +   * return the *weighted* cost of applying the feature. Provided for backward
 +   * compatibility.
 +   * 
 +   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return a *weighted* feature cost
 +   */
 +  public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
 +      Sentence sentence) {
 +
 +    ScoreAccumulator score = new ScoreAccumulator();
 +    computeFinal(tailNode, i, j, sourcePath, sentence, score);
 +    return score.getScore();
 +  }
 +
 +  /**
 +   * Returns the *unweighted* feature delta for the final transition (e.g., for
 +   * the language model feature function). Provided for backward compatibility.
 +   * 
 +   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return an *weighted* feature vector
 +   */
 +  public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
 +      SourcePath sourcePath, Sentence sentence) {
 +
 +    FeatureAccumulator features = new FeatureAccumulator();
 +    computeFinal(tailNode, i, j, sourcePath, sentence, features);
 +    return features.getFeatures();
 +  }
 +
 +  /**
 +   * This function is called when sorting rules for cube pruning. It must return
 +   * the *weighted* estimated cost of applying a feature. This need not be the
 +   * actual cost of applying the rule in context. Basically, it's the inner
 +   * product of the weight vector and all features found in the grammar rule,
 +   * though some features (like LanguageModelFF) can also compute some of their
 +   * values. This is just an estimate of the cost, which helps do better
 +   * sorting. Later, the real cost of this feature function is called via
 +   * compute();
 +   * 
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return the *weighted* cost of applying the feature.
 +   */
 +  public abstract float estimateCost(Rule rule, Sentence sentence);
 +
 +  /**
 +   * This feature is called to produce a *weighted estimate* of the future cost
 +   * of applying this feature. This value is not incorporated into the model
 +   * score but is used in pruning decisions. Stateless features return 0.0f by
 +   * default, but Stateful features might want to override this.
 +   * 
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
 +   * @param state todo
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return the *weighted* future cost estimate of applying this rule in
 +   *         context.
 +   */
 +  public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);
 +
 +  /**
 +   * Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
 +   * with a proper CLI library at some point Expects key value pairs in the form : -argname value
 +   * Any key without a value is added with an empty string as value Multiple values for the same key
 +   * are not parsed. The first one is used.
 +   * 
 +   * @param args A string with the raw arguments and their names
 +   * @return A hash with the keys and the values of the string
 +   */
 +  public static HashMap<String, String> parseArgs(String[] args) {
-     HashMap<String, String> parsedArgs = new HashMap<String, String>();
++    HashMap<String, String> parsedArgs = new HashMap<>();
 +    boolean lookingForValue = false;
 +    String currentKey = null;
-     for (int i = 0; i < args.length; i++) {
++    for (String arg : args) {
 +
 +      Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
-       Matcher argKey = argKeyPattern.matcher(args[i]);
++      Matcher argKey = argKeyPattern.matcher(arg);
 +      if (argKey.find()) {
 +        // This is a key
 +        // First check to see if there is a key that is waiting to be written
 +        if (lookingForValue) {
 +          // This is a key with no specified value
 +          parsedArgs.put(currentKey, "");
 +        }
 +        // Now store the new key and look for its value
-         currentKey = args[i].substring(1);
++        currentKey = arg.substring(1);
 +        lookingForValue = true;
 +      } else {
 +        // This is a value
 +        if (lookingForValue) {
-           parsedArgs.put(currentKey, args[i]);
++          parsedArgs.put(currentKey, arg);
 +          lookingForValue = false;
 +        }
 +      }
 +    }
 +    
 +    // make sure we add the last key without value
 +    if (lookingForValue && currentKey != null) {
 +      // end of line, no value
 +      parsedArgs.put(currentKey, "");
 +    }
 +    return parsedArgs;
 +  }
 +
 +  /**
 +   * Accumulator objects allow us to generalize feature computation.
 +   * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
 +   * sum (for decoding). FeatureAccumulator records the named feature values
 +   * (for k-best extraction).
 +   */
 +  public interface Accumulator {
 +    public void add(int featureId, float value);
 +  }
 +
 +  public class ScoreAccumulator implements Accumulator {
 +    private float score;
 +
 +    public ScoreAccumulator() {
 +      this.score = 0.0f;
 +    }
 +
 +    @Override
 +    public void add(int featureId, float value) {
 +      score += value * weights.getOrDefault(featureId);
 +    }
 +
 +    public float getScore() {
 +      return score;
 +    }
 +  }
 +
 +  public class FeatureAccumulator implements Accumulator {
-     private FeatureVector features;
++    private final FeatureVector features;
 +
 +    public FeatureAccumulator() {
 +      this.features = new FeatureVector(10);
 +    }
 +
 +    @Override
 +    public void add(int id, float value) {
 +      features.add(id, value);
 +    }
 +
 +    public FeatureVector getFeatures() {
 +      return features;
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
index 766ea0b,0000000..9be3f88
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
@@@ -1,133 -1,0 +1,133 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +/***
 + * @author Gideon Wenniger
 + */
 +
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.ListUtil;
 +
 +public class LabelSubstitutionFF extends StatelessFF {
 +  private static final String MATCH_SUFFIX = "MATCH";
 +  private static final String NO_MATCH_SUFFIX = "NOMATCH";
 +
 +  public LabelSubstitutionFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "LabelSubstitution", args, config);
 +  }
 +
 +  public String getLowerCasedFeatureName() {
 +    return name.toLowerCase();
 +  }
 +
 +  public String getMatchFeatureSuffix(String ruleNonterminal, String substitutionNonterminal) {
 +    if (ruleNonterminal.equals(substitutionNonterminal)) {
 +      return MATCH_SUFFIX;
 +    } else {
 +      return NO_MATCH_SUFFIX;
 +    }
 +  }
 +
 +  public static String getSubstitutionSuffix(String ruleNonterminal, String substitutionNonterminal) {
 +    return substitutionNonterminal + "_substitutes_" + ruleNonterminal;
 +  }
 +
-   private final String computeLabelMatchingFeature(String ruleNonterminal,
++  private String computeLabelMatchingFeature(String ruleNonterminal,
 +      String substitutionNonterminal) {
 +    String result = getLowerCasedFeatureName() + "_";
 +    result += getMatchFeatureSuffix(ruleNonterminal, substitutionNonterminal);
 +    return result;
 +  }
 +
-   private final String computeLabelSubstitutionFeature(String ruleNonterminal,
++  private String computeLabelSubstitutionFeature(String ruleNonterminal,
 +      String substitutionNonterminal) {
 +    String result = getLowerCasedFeatureName() + "_";
 +    result += getSubstitutionSuffix(ruleNonterminal, substitutionNonterminal);
 +    return result;
 +  }
 +
-   private static final String getRuleLabelsDescriptorString(Rule rule) {
++  private static String getRuleLabelsDescriptorString(Rule rule) {
 +    String result = "";
 +    String leftHandSide = RulePropertiesQuerying.getLHSAsString(rule);
 +    List<String> ruleSourceNonterminals = RulePropertiesQuerying
 +        .getRuleSourceNonterminalStrings(rule);
 +    boolean isInverting = rule.isInverting();
 +    result += "<LHS>" + leftHandSide + "</LHS>";
 +    result += "_<Nont>";
 +    result += ListUtil.stringListStringWithoutBracketsCommaSeparated(ruleSourceNonterminals);
 +    result += "</Nont>";
 +    if(isInverting)
 +    {  
 +      result += "_INV";
 +    }
 +    else
 +    {
 +      result += "_MONO";
 +    }
 +    
 +    return result;
 +  }
 +
-   private static final String getSubstitutionsDescriptorString(List<HGNode> tailNodes) {
++  private static String getSubstitutionsDescriptorString(List<HGNode> tailNodes) {
 +    String result = "_<Subst>";
 +    List<String> substitutionNonterminals = RulePropertiesQuerying
 +        .getSourceNonterminalStrings(tailNodes);
 +    result += ListUtil.stringListStringWithoutBracketsCommaSeparated(substitutionNonterminals);
 +    result += "</Subst>";
 +    return result;
 +  }
 +
 +  public final String getGapLabelsForRuleSubstitutionSuffix(Rule rule, List<HGNode> tailNodes) {
 +    String result = getLowerCasedFeatureName() + "_";
 +    result += getRuleLabelsDescriptorString(rule);
 +    result += getSubstitutionsDescriptorString(tailNodes);
 +    return result;
 +  }
 +
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    if (rule != null && (tailNodes != null)) {
 +
 +      List<String> ruleSourceNonterminals = RulePropertiesQuerying
 +          .getRuleSourceNonterminalStrings(rule);
 +      List<String> substitutionNonterminals = RulePropertiesQuerying
 +          .getSourceNonterminalStrings(tailNodes);
 +      // Assert.assertEquals(ruleSourceNonterminals.size(), substitutionNonterminals.size());
 +      for (int nonterinalIndex = 0; nonterinalIndex < ruleSourceNonterminals.size(); nonterinalIndex++) {
 +        String ruleNonterminal = ruleSourceNonterminals.get(nonterinalIndex);
 +        String substitutionNonterminal = substitutionNonterminals.get(nonterinalIndex);
 +        acc.add(hashFeature(computeLabelMatchingFeature(ruleNonterminal, substitutionNonterminal)), 1);
 +        acc.add(hashFeature(computeLabelSubstitutionFeature(ruleNonterminal, substitutionNonterminal)), 1);
 +      }
 +      acc.add(hashFeature(getGapLabelsForRuleSubstitutionSuffix(rule, tailNodes)), 1);
 +    }
 +    return null;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
index 4eacd26,0000000..63d350e
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@@ -1,153 -1,0 +1,153 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static com.google.common.cache.CacheBuilder.newBuilder;
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.util.ArrayList;
 +import java.util.List;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.OwnerId;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.FormatUtils;
 +
 +import com.google.common.cache.Cache;
 +
 +/**
 + *  Lexical alignment features denoting alignments, deletions, and insertions.
 + */
 +public class LexicalFeatures extends StatelessFF {
 +  
 +  private final boolean useAlignments;
 +  private final boolean useDeletions;
 +  private final boolean useInsertions;
 +  
 +  private static final String NAME = "LexicalFeatures";
 +  // value to fire for features
 +  private static final int VALUE = 1;
 +  //whether this feature is restricted to a certain grammar/owner
 +  private final boolean ownerRestriction;
 +  // the grammar/owner this feature is restricted to fire
 +  private final OwnerId owner;
 +  // Strings separating words
 +  private static final String SEPARATOR = "~";
 +  
 +  private final Cache<Rule, List<Integer>> featureCache;
 +  
 +  public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, NAME, args, config);
 +    
-     ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
++    ownerRestriction = (parsedArgs.containsKey("owner"));
 +    owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : OwnerMap.UNKNOWN_OWNER_ID;
 +    
 +    useAlignments = parsedArgs.containsKey("alignments");
 +    useDeletions = parsedArgs.containsKey("deletions");
 +    useInsertions = parsedArgs.containsKey("insertions");
 +    
 +    // initialize cache
 +    if (parsedArgs.containsKey("cacheSize")) {
 +      featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
 +    } else {
 +      featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
 +    }
 +  }
 +
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    
 +    if (ownerRestriction && rule.getOwner().equals(owner)) {
 +      return null;
 +    }
 +
 +    List<Integer> featureIds = featureCache.getIfPresent(rule);
 +    if (featureIds == null) {
 +      featureIds = getFeatures(rule);
 +      featureCache.put(rule, featureIds);
 +    }
 +    for (int featureId : featureIds) {
 +      acc.add(featureId, VALUE);
 +    }
 +    
 +    return null;
 +  }
 +  
 +  /**
 +   * Obtains the feature ids for the given rule.
 +   * @param rule
 +   * @return String representing the feature name.s
 +   */
 +  private List<Integer> getFeatures(final Rule rule) {
 +    final List<Integer> result = new ArrayList<>();
 +    
 +    byte[] alignments = rule.getAlignment();
 +    if (alignments == null) {
 +      return result;
 +    }
 +    int[] sourceWords = rule.getSource();
 +    int[] targetWords = rule.getTarget();
 +    
 +    // sourceAligned & targetAligned indicate whether an index is covered by alignments
 +    boolean[] sourceAligned = new boolean[sourceWords.length];
 +    boolean[] targetAligned = new boolean[targetWords.length];
 +    
 +    // translations: aligned words
 +    for (int i = 0; i < alignments.length; i+=2) {
 +      byte sourceIndex = alignments[i];
 +      byte targetIndex = alignments[i + 1];
 +      sourceAligned[sourceIndex] = true;
 +      targetAligned[targetIndex] = true;
 +      if (useAlignments) {
 +        result.add(hashFeature(
 +            "T:" + 
 +            Vocabulary.word(sourceWords[sourceIndex]) + 
 +            SEPARATOR + 
 +            Vocabulary.word(targetWords[targetIndex])));
 +      }
 +    }
 +    
 +    // deletions: unaligned source words
 +    if (useDeletions) {
 +      for (int i = 0; i < sourceAligned.length; i++) {
 +        if (!sourceAligned[i] && ! FormatUtils.isNonterminal(sourceWords[i])) {
 +          result.add(hashFeature("D:" + Vocabulary.word(sourceWords[i])));
 +        }
 +      }
 +    }
 +    
 +    // insertions: unaligned target words
 +    if (useInsertions) {
 +      for (int i = 0; i < targetAligned.length; i++) {
 +        if (useInsertions && !targetAligned[i] && ! FormatUtils.isNonterminal(targetWords[i])) {
 +          result.add(hashFeature("I:" + Vocabulary.word(targetWords[i])));
 +        }
 +      }
 +    }
 +    
 +    return result;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 6eb1293,0000000..5e99428
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@@ -1,98 -1,0 +1,97 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import java.util.HashMap;
 +import java.util.List;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.OwnerId;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +/**
 + * This feature is fired when an out-of-vocabulary word (with respect to the translation model) is
 + * entered into the chart. OOVs work in the following manner: for each word in the input that is OOV
 + * with respect to the translation model, we create a rule that pushes that word through
 + * untranslated (the suffix "_OOV" can optionally be appended according to the runtime parameter
 + * "mark-oovs") . These rules are all stored in a grammar whose owner is "oov". The OOV feature
 + * function template then fires the "OOVPenalty" feature whenever it is asked to score an OOV rule.
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + */
 +public class OOVPenalty extends StatelessFF {
 +  private final OwnerId ownerID;
-   
-   /* The default value returned for OOVs. Can be overridden with -oov-list */
-   private final float defaultValue = -100f;
++
 +  private final HashMap<Integer,Float> oovWeights;
 +
 +  public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "OOVPenalty", args, config);
 +    ownerID = OwnerMap.register("oov");
-     oovWeights = new HashMap<Integer,Float>();
++    oovWeights = new HashMap<>();
 +    
 +    if (config.oovList != null) {
 +      for (OOVItem item: config.oovList) { 
 +        oovWeights.put(Vocabulary.id(item.label), item.weight);
 +      }
 +    }
 +  }
 +  
 +  /**
 +   * OOV rules cover exactly one word, and such rules belong to a grammar whose owner is "oov". Each
 +   * OOV fires the OOVPenalty feature with a value of 1, so the cost is simply the weight, which was
 +   * cached when the feature was created.
 +   */
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    
 +    if (rule != null && this.ownerID.equals(rule.getOwner())) {
 +      acc.add(featureId, getValue(rule.getLHS()));
 +    }
 +
 +    return null;
 +  }
 +  
 +  /**
 +   * It's important for the OOV feature to contribute to the rule's estimated cost, so that OOV
 +   * rules (which are added for all words, not just ones without translation options) get sorted
 +   * to the bottom during cube pruning.
 +   * 
 +   * Important! estimateCost returns the *weighted* feature value.
 +   */
 +  @Override
 +  public float estimateCost(Rule rule, Sentence sentence) {
 +    if (rule != null && this.ownerID.equals(rule.getOwner())) {
 +      return weights.getOrDefault(featureId) * getValue(rule.getLHS());
 +    }
 +    return 0.0f;
 +  }
 +  
 +  private float getValue(int lhs) {
++    float defaultValue = -100f;
 +    return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index acda1d2,0000000..4f6a61c
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@@ -1,80 -1,0 +1,80 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.OwnerId;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.phrase.Hypothesis;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +/**
 + *  This feature just counts rules that are used. You can restrict it with a number of flags:
 + * 
 + *   -owner OWNER
 + *    Only count rules owned by OWNER
 + *   -target|-source
 + *    Only count the target or source side (plus the LHS)
 + *
 + * TODO: add an option to separately provide a list of rule counts, restrict to counts above a threshold. 
 + */
 +public class PhrasePenalty extends StatelessFF {
 +
 +  private final OwnerId owner;
-   private float value = 1.0f;
++  private final float value = 1.0f;
 +  
 +  public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "PhrasePenalty", args, config);
 +    if (parsedArgs.containsKey("owner"))
 +      this.owner = OwnerMap.register(parsedArgs.get("owner"));
 +    else // default
 +      this.owner = OwnerMap.register("pt"); 
 +  }
 +
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +
 +    if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
 +        && (rule.getOwner().equals(owner))) {
 +      acc.add(featureId, value);
 +    }
 +
 +    return null;
 +  }
 +  
 +  /**
 +   * Returns the *weighted* estimate.
 +   * 
 +   */
 +  @Override
 +  public float estimateCost(Rule rule, Sentence sentence) {
 +    if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
 +        && (rule.getOwner().equals(owner))) {
 +      return weights.getOrDefault(featureId) * value;
 +    }
 +    return 0.0f;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
index df2b180,0000000..7a08043
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@@ -1,126 -1,0 +1,126 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static com.google.common.cache.CacheBuilder.newBuilder;
 +import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER_ID;
 +
 +import java.util.List;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.OwnerId;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +import com.google.common.cache.Cache;
 +
 +/**
 + *  This feature fires for rule ids.
 + *  Firing can be restricted to rules from a certain owner, and rule ids
 + *  can be generated from source side and/or target side. 
 + */
 +public class RuleFF extends StatelessFF {
 +
-   private enum Sides { SOURCE, TARGET, BOTH };
-   
++  private enum Sides { SOURCE, TARGET, BOTH }
++
 +  private static final String NAME = "RuleFF";
 +  // value to fire for features
 +  private static final int VALUE = 1;
 +  // whether this feature is restricted to a certain grammar/owner
 +  private final boolean ownerRestriction;
 +  // the grammar/owner this feature is restricted to fire
 +  private final OwnerId owner;
 +  // what part of the rule should be extracted;
 +  private final Sides sides;
 +  // Strings separating words and rule sides 
 +  private static final String SEPARATOR = "~";
 +  private static final String SIDES_SEPARATOR = "->";
 +  
 +  private final Cache<Rule, Integer> featureCache;
 +  
 +  public RuleFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, NAME, args, config);
 +    
-     ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
++    ownerRestriction = (parsedArgs.containsKey("owner"));
 +    owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : UNKNOWN_OWNER_ID;
 +    
 +    if (parsedArgs.containsKey("sides")) {
 +      final String sideValue = parsedArgs.get("sides");
 +      if (sideValue.equalsIgnoreCase("source")) {
 +        sides = Sides.SOURCE;
 +      } else if (sideValue.equalsIgnoreCase("target")) {
 +        sides = Sides.TARGET;
 +      } else if (sideValue.equalsIgnoreCase("both")){
 +        sides = Sides.BOTH;
 +      } else {
 +        throw new RuntimeException("Unknown side value.");
 +      }
 +    } else {
 +      sides = Sides.BOTH;
 +    }
 +    
 +    // initialize cache
 +    if (parsedArgs.containsKey("cacheSize")) {
 +      featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
 +    } else {
 +      featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
 +    }
 +  }
 +
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    
 +    if (ownerRestriction && !rule.getOwner().equals(owner)) {
 +      return null;
 +    }
 +
 +    Integer featureId = featureCache.getIfPresent(rule);
 +    if (featureId == null) {
 +      featureId = hashRuleFeature(rule);
 +      featureCache.put(rule, featureId);
 +    }
 +    acc.add(featureId, VALUE);
 +    
 +    return null;
 +  }
 +  
 +  /**
 +   * Obtains the feature id for the given rule.
 +   * @param rule
 +   * @return String representing the feature name.s
 +   */
 +  private int hashRuleFeature(final Rule rule) {
 +    final StringBuilder sb = new StringBuilder(Vocabulary.word(rule.getLHS()))
 +      .append(SIDES_SEPARATOR);
 +    if (sides == Sides.SOURCE || sides == Sides.BOTH) {
 +      sb.append(Vocabulary.getWords(rule.getSource(), SEPARATOR));
 +    }
 +    sb.append(SIDES_SEPARATOR);
 +    if (sides == Sides.TARGET || sides == Sides.BOTH) {
 +      sb.append(Vocabulary.getWords(rule.getTarget(), SEPARATOR));
 +    }
 +    return FeatureMap.hashFeature(sb.toString());
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
index a1867a3,0000000..0ee41be
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
@@@ -1,49 -1,0 +1,49 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import java.util.ArrayList;
 +import java.util.List;
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +
 +public class RulePropertiesQuerying {
 +
-   public static final String getLHSAsString(Rule rule) {
++  public static String getLHSAsString(Rule rule) {
 +    return Vocabulary.word(rule.getLHS());
 +  }
 +
 +  public static List<String> getRuleSourceNonterminalStrings(Rule rule) {
-     List<String> result = new ArrayList<String>();
++    List<String> result = new ArrayList<>();
 +    for (int nonTerminalIndex : rule.getForeignNonTerminals()) {
 +      result.add(Vocabulary.word(nonTerminalIndex));
 +    }
 +    return result;
 +  }
 +
 +  public static List<String> getSourceNonterminalStrings(List<HGNode> tailNodes) {
-     List<String> result = new ArrayList<String>();
++    List<String> result = new ArrayList<>();
 +    for (HGNode tailNode : tailNodes) {
 +      result.add(Vocabulary.word(tailNode.lhs));
 +    }
 +    return result;
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
index b389774,0000000..eb7bd50
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
@@@ -1,101 -1,0 +1,101 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.FormatUtils;
 +
 +/*
 + * Implements the RuleShape feature for source, target, and paired source+target sides.
 + */
 +public class RuleShape extends StatelessFF {
 +
 +  public RuleShape(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "RuleShape", args, config);
 +  }
 +
 +  private enum WordType {
 +    N("N"), T("x"), P("+");
 +    private final String string;
 +    private boolean repeats;
 +
-     private WordType(final String string) {
++    WordType(final String string) {
 +      this.string = string;
 +      this.repeats = false;
 +    }
 +    
 +    private void setRepeats() {
 +      repeats = true;
 +    }
 +
 +    @Override
 +    public String toString() {
 +      if (repeats) {
 +        return this.string + "+";
 +      }
 +      return this.string;
 +    }
 +  }
 +
 +  private WordType getWordType(int id) {
 +    if (FormatUtils.isNonterminal(id)) {
 +      return WordType.N;
 +    } else {
 +      return WordType.T;
 +    }
 +  }
 +  
 +  /**
 +   * Returns a String describing the rule pattern.
 +   */
 +  private String getRulePattern(int[] ids) {
 +    final StringBuilder pattern = new StringBuilder();
 +    WordType currentType = getWordType(ids[0]);
 +    for (int i = 1; i < ids.length; i++) {
 +      if (getWordType(ids[i]) != currentType) {
 +        pattern.append(currentType.toString());
 +        currentType = getWordType(ids[i]);
 +      } else {
 +        currentType.setRepeats();
 +      }
 +    }
 +    pattern.append(currentType.toString());
 +    return pattern.toString();
 +  }
 +  
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i_, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    final String sourceShape = getRulePattern(rule.getSource());
 +    final String targetShape = getRulePattern(rule.getTarget());
 +    acc.add(hashFeature(name + "_source_" + sourceShape), 1);
 +    acc.add(hashFeature(name + "_target_" + sourceShape), 1);
 +    acc.add(hashFeature(name + "_sourceTarget_" + sourceShape + "_" + targetShape), 1);
 +    return null;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
index 841402a,0000000..dec509f
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
@@@ -1,29 -1,0 +1,29 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +public interface SourceDependentFF extends Cloneable {
 +
-   public void setSource(Sentence sentence);
++  void setSource(Sentence sentence);
 +
-   public FeatureFunction clone();
++  FeatureFunction clone();
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index cb902a0,0000000..1d0e6e7
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@@ -1,53 -1,0 +1,53 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +/**
 + * This feature returns the scored path through the source lattice, which is recorded in a
 + * SourcePath object.
 + * 
 + * @author Chris Dyer redpony@umd.edu
 + * @author Matt Post post@cs.jhu.edu
 + */
 +public final class SourcePathFF extends StatelessFF {
 +
 +  /*
 +   * This is a single-value feature template, so we cache the weight here.
 +   */
 +  public SourcePathFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "SourcePath", args, config);
 +  }
-   
++
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +
 +    acc.add(featureId,  sourcePath.getPathCost());
 +    return null;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index 888fa03,0000000..9338b0d
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@@ -1,218 -1,0 +1,215 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.LinkedList;
 +import java.util.List;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.FormatUtils;
 +import org.apache.joshua.util.io.LineReader;
 +
 +/***
 + * The RuleBigram feature is an indicator feature that counts target word bigrams that are created when
 + * a rule is applied. It accepts three parameters:
 + *
 + * -vocab /path/to/vocab
 + *
 + *  The path to a vocabulary, where each line is of the format ID WORD COUNT.
 + *
 + * -threshold N
 + *
 + *  Mask to UNK all words whose COUNT is less than N.
 + *
 + * -top-n N
 + *
 + *  Only use the top N words.
 + */
 +
 +public class TargetBigram extends StatefulFF {
 +
 +  private HashSet<String> vocab = null;
 +  private int maxTerms = 1000000;
 +  private int threshold = 0;
 +
 +  public TargetBigram(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "TargetBigram", args, config);
 +
 +    if (parsedArgs.containsKey("threshold"))
 +      threshold = Integer.parseInt(parsedArgs.get("threshold"));
 +
 +    if (parsedArgs.containsKey("top-n"))
 +      maxTerms = Integer.parseInt(parsedArgs.get("top-n"));
 +
 +    if (parsedArgs.containsKey("vocab")) {
 +      loadVocab(parsedArgs.get("vocab"));
 +    }
 +  }
 +
 +  /**
 +   * Load vocabulary items passing the 'threshold' and 'top-n' filters.
 +   *
 +   * @param filename
 +   */
 +  private void loadVocab(String filename) {
-     this.vocab = new HashSet<String>();
++    this.vocab = new HashSet<>();
 +    this.vocab.add("<s>");
 +    this.vocab.add("</s>");
 +    try {
 +      LineReader lineReader = new LineReader(filename);
 +      for (String line: lineReader) {
 +        if (lineReader.lineno() > maxTerms)
 +          break;
 +
 +        String[] tokens = line.split("\\s+");
 +        String word = tokens[1];
 +        int count = Integer.parseInt(tokens[2]);
 +
 +        if (count >= threshold)
 +          vocab.add(word);
 +      }
 +
 +    } catch (IOException e) {
 +      throw new RuntimeException(String.format(
 +          "* FATAL: couldn't load TargetBigram vocabulary '%s'", filename), e);
 +    }
 +  }
 +
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int spanStart, int spanEnd,
 +      SourcePath sourcePath, Sentence sentence, Accumulator acc) {
 +
 +    int[] enWords = rule.getTarget();
 +
 +    int left = -1;
 +    int right = -1;
 +
-     List<String> currentNgram = new LinkedList<String>();
-     for (int c = 0; c < enWords.length; c++) {
-       int curID = enWords[c];
- 
++    List<String> currentNgram = new LinkedList<>();
++    for (int curID : enWords) {
 +      if (FormatUtils.isNonterminal(curID)) {
 +        int index = -(curID + 1);
 +        NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);
 +        int[] leftContext = state.getLeftLMStateWords();
 +        int[] rightContext = state.getRightLMStateWords();
 +
 +        // Left context.
 +        for (int token : leftContext) {
 +          currentNgram.add(getWord(token));
 +          if (left == -1)
 +            left = token;
 +          right = token;
 +          if (currentNgram.size() == 2) {
 +            String ngram = join(currentNgram);
 +            acc.add(hashFeature(String.format("%s_%s", name, ngram)), 1);
 +            //            System.err.println(String.format("ADDING %s_%s", name, ngram));
 +            currentNgram.remove(0);
 +          }
 +        }
 +        // Replace right context.
 +        int tSize = currentNgram.size();
 +        for (int i = 0; i < rightContext.length; i++)
 +          currentNgram.set(tSize - rightContext.length + i, getWord(rightContext[i]));
 +
 +      } else { // terminal words
 +        currentNgram.add(getWord(curID));
 +        if (left == -1)
 +          left = curID;
 +        right = curID;
 +        if (currentNgram.size() == 2) {
 +          String ngram = join(currentNgram);
 +          acc.add(hashFeature(String.format("%s_%s", name, ngram)), 1);
 +          //          System.err.println(String.format("ADDING %s_%s", name, ngram));
 +          currentNgram.remove(0);
 +        }
 +      }
 +    }
 +
-     NgramDPState state = new NgramDPState(new int[] { left }, new int[] { right });
 +    //    System.err.println(String.format("RULE %s -> state %s", rule.getRuleString(), state));
-     return state;
++    return new NgramDPState(new int[] { left }, new int[] { right });
 +  }
 +
 +  /**
 +   * Returns the word after comparing against the private vocabulary (if set).
 +   *
 +   * @param curID
 +   * @return the word
 +   */
 +  private String getWord(int curID) {
 +    String word = Vocabulary.word(curID);
 +
 +    if (vocab != null && ! vocab.contains(word)) {
 +      return "UNK";
 +    }
 +
 +    return word;
 +  }
 +
 +  /**
 +   * We don't compute a future cost.
 +   */
 +  @Override
 +  public float estimateFutureCost(Rule rule, DPState state, Sentence sentence) {
 +    return 0.0f;
 +  }
 +
 +  /**
 +   * There is nothing to be done here, since &lt;s&gt; and &lt;/s&gt; are included in rules that are part
 +   * of the grammar. We simply return the DP state of the tail node.
 +   */
 +  @Override
 +  public DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +
 +    return tailNode.getDPState(stateIndex);
 +  }
 +
 +  /**
 +   * TargetBigram features are only computed across hyperedges, so there is nothing to be done here. 
 +   */
 +  @Override
 +  public float estimateCost(Rule rule, Sentence sentence) {
 +    return 0.0f;
 +  }
 +
 +  /**
 +   * Join a list with the _ character. I am sure this is in a library somewhere.
 +   *
 +   * @param list a list of strings
 +   * @return the joined String
 +   */
 +  private String join(List<String> list) {
 +    StringBuilder sb = new StringBuilder();
 +    for (String item : list) {
-       sb.append(item.toString() + "_");
++      sb.append(item).append("_");
 +    }
 +
 +    return sb.substring(0, sb.length() - 1);
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
index f75dffa,0000000..1d181e7
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
@@@ -1,93 -1,0 +1,91 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.fragmentlm;
 +
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.Collection;
 +import java.util.Collections;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.NoSuchElementException;
 +
 +/**
 + * Concatenates an iterator over iterators into one long iterator.
 + *
 + * @author Dan Klein
 + */
 +public class ConcatenationIterator<E> implements Iterator<E> {
 +
-   Iterator<Iterator<E>> sourceIterators;
++  final Iterator<Iterator<E>> sourceIterators;
 +  Iterator<E> currentIterator;
 +  Iterator<E> lastIteratorToReturn;
 +
 +  public boolean hasNext() {
-     if (currentIterator.hasNext())
-       return true;
-     return false;
++    return currentIterator.hasNext();
 +  }
 +
 +  public E next() {
 +    if (currentIterator.hasNext()) {
 +      E e = currentIterator.next();
 +      lastIteratorToReturn = currentIterator;
 +      advance();
 +      return e;
 +    }
 +    throw new NoSuchElementException();
 +  }
 +
 +  private void advance() {
 +    while (! currentIterator.hasNext() && sourceIterators.hasNext()) {
 +      currentIterator = sourceIterators.next();
 +    }
 +  }
 +
 +  public void remove() {
 +    if (lastIteratorToReturn == null)
 +      throw new IllegalStateException();
 +    currentIterator.remove();
 +  }
 +
 +  public ConcatenationIterator(Iterator<Iterator<E>> sourceIterators) {
 +    this.sourceIterators = sourceIterators;
 +    this.currentIterator = (new ArrayList<E>()).iterator();
 +    this.lastIteratorToReturn = null;
 +    advance();
 +  }
 +
 +  public ConcatenationIterator(Collection<Iterator<E>> iteratorCollection) {
 +    this(iteratorCollection.iterator());
 +  }
 +
 +  public static void main(String[] args) {
 +    List<String> list0 = Collections.emptyList();
 +    List<String> list1 = Arrays.asList("a b c d".split(" "));
 +    List<String> list2 = Arrays.asList("e f".split(" "));
-     List<Iterator<String>> iterators = new ArrayList<Iterator<String>>();
++    List<Iterator<String>> iterators = new ArrayList<>();
 +    iterators.add(list1.iterator());
 +    iterators.add(list0.iterator());
 +    iterators.add(list2.iterator());
 +    iterators.add(list0.iterator());
-     Iterator<String> iterator = new ConcatenationIterator<String>(iterators);
++    Iterator<String> iterator = new ConcatenationIterator<>(iterators);
 +    while (iterator.hasNext()) {
 +      System.out.println(iterator.next());
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 5d6780b,0000000..5332135
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@@ -1,324 -1,0 +1,317 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.fragmentlm;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Stack;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.ff.StatefulFF;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * <p>Feature function that reads in a list of language model fragments and matches them against the
 + * hypergraph. This allows for language model fragment "glue" features, which fire when LM fragments
 + * (supplied as input) are assembled. These LM fragments are presumably useful in ensuring
 + * grammaticality and can be independent of the translation model fragments.</p>
 + * 
 + * <p>Usage: in the Joshua Configuration file, put</p>
 + * 
 + * <code>feature-function = FragmentLM -lm LM_FRAGMENTS_FILE -map RULE_FRAGMENTS_MAP_FILE</code>
 + * 
 + * <p>LM_FRAGMENTS_FILE is a pointer to a file containing a list of fragments that it should look for.
 + * The format of the file is one fragment per line in PTB format, e.g.:</p>
 + * 
 + * <code>(S NP (VP (VBD said) SBAR) (. .))</code>
 + * 
 + * <p>RULE_FRAGMENTS_MAP_FILE points to a file that maps fragments to the flattened SCFG rule format
 + * that Joshua uses. This mapping is necessary because Joshua's rules have been flattened, meaning
 + * that their internal structure has been removed, yet this structure is needed for matching LM
 + * fragments. The format of the file is</p>
 + * 
 + * <code>FRAGMENT ||| RULE-TARGET-SIDE</code>
 + * 
 + * <p>for example,</p>
 + * 
 + * <code>(S (NP (DT the) (NN man)) VP .) ||| the man [VP,1] [.,2] (SBAR (IN that) (S (NP (PRP he)) (VP
 + * (VBD was) (VB done)))) ||| that he was done (VP (VBD said) SBAR) ||| said SBAR</code>
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + */
 +public class FragmentLMFF extends StatefulFF {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(FragmentLMFF.class);
 +
 +  /*
 +   * When building a fragment from a rule rooted in the hypergraph, this parameter determines how
 +   * deep we'll go. Smaller values mean less hypergraph traversal but may also limit the LM
 +   * fragments that can be fired.
 +   */
 +  private int BUILD_DEPTH = 1;
 +
 +  /*
 +   * The maximum depth of a fragment, defined as the longest path from the fragment root to any of
 +   * its leaves.
 +   */
 +  private int MAX_DEPTH = 0;
 +
 +  /*
 +   * This is the minimum depth for lexicalized LM fragments. This allows you to easily exclude small
 +   * depth-one fragments that may be overfit to the training data. A depth of 1 (the default) does
 +   * not exclude any fragments.
 +   */
 +  private int MIN_LEX_DEPTH = 1;
 +
 +  /*
-    * Set to true to activate meta-features.
-    */
-   private boolean OPTS_DEPTH = false;
- 
-   /*
 +   * This contains a list of the language model fragments, indexed by LHS.
 +   */
 +  private HashMap<String, ArrayList<Tree>> lmFragments = null;
 +
 +  private int numFragments = 0;
 +
 +  /* The location of the file containing the language model fragments */
 +  private String fragmentLMFile = "";
 +
 +  /**
 +   * @param weights a {@link org.apache.joshua.decoder.ff.FeatureVector} with weights
 +   * @param args arguments passed to the feature function
 +   * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
 +   */
 +  public FragmentLMFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "FragmentLMFF", args, config);
 +
-     lmFragments = new HashMap<String, ArrayList<Tree>>();
++    lmFragments = new HashMap<>();
 +
 +    fragmentLMFile = parsedArgs.get("lm");
 +    BUILD_DEPTH = Integer.parseInt(parsedArgs.get("build-depth"));
 +    MAX_DEPTH = Integer.parseInt(parsedArgs.get("max-depth"));
 +    MIN_LEX_DEPTH = Integer.parseInt(parsedArgs.get("min-lex-depth"));
 +
 +    /* Read in the language model fragments */
 +    try {
 +      Collection<Tree> trees = PennTreebankReader.readTrees(fragmentLMFile);
-       for (Tree fragment : trees) {
-         addLMFragment(fragment);
- 
-         // System.err.println(String.format("Read fragment: %s",
-         // lmFragments.get(lmFragments.size()-1)));
-       }
++      // System.err.println(String.format("Read fragment: %s",
++      // lmFragments.get(lmFragments.size()-1)));
++      trees.forEach(this::addLMFragment);
 +    } catch (IOException e) {
 +      throw new RuntimeException(String.format("* WARNING: couldn't read fragment LM file '%s'",
 +          fragmentLMFile), e);
 +    }
 +    LOG.info("FragmentLMFF: Read {} LM fragments from '{}'", numFragments, fragmentLMFile);
 +  }
 +
 +  /**
 +   * Add the provided fragment to the language model, subject to some filtering.
 +   * 
 +   * @param fragment a {@link org.apache.joshua.decoder.ff.fragmentlm.Tree} fragment
 +   */
 +  public void addLMFragment(Tree fragment) {
 +    if (lmFragments == null)
 +      return;
 +
 +    int fragmentDepth = fragment.getDepth();
 +
 +    if (MAX_DEPTH != 0 && fragmentDepth > MAX_DEPTH) {
 +      LOG.warn("Skipping fragment {} (depth {} > {})", fragment, fragmentDepth, MAX_DEPTH);
 +      return;
 +    }
 +
 +    if (MIN_LEX_DEPTH > 1 && fragment.isLexicalized() && fragmentDepth < MIN_LEX_DEPTH) {
 +      LOG.warn("Skipping fragment {} (lex depth {} < {})", fragment, fragmentDepth, MIN_LEX_DEPTH);
 +      return;
 +    }
 +
 +    if (lmFragments.get(fragment.getRule()) == null) {
-       lmFragments.put(fragment.getRule(), new ArrayList<Tree>());
++      lmFragments.put(fragment.getRule(), new ArrayList<>());
 +    }
 +    lmFragments.get(fragment.getRule()).add(fragment);
 +    numFragments++;
 +  }
 +  
 +  /**
 +   * This function computes the features that fire when the current rule is applied. The features
 +   * that fire are any LM fragments that match the fragment associated with the current rule. LM
 +   * fragments may recurse over the tail nodes, following 1-best backpointers until the fragment
 +   * either matches or fails.
 +   * 
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
 +   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
 +   * @return the new dynamic programming state (null for stateless features)
 +   */
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, 
 +      Sentence sentence, Accumulator acc) {
 +
 +    /*
 +     * Get the fragment associated with the target side of this rule.
 +     * 
 +     * This could be done more efficiently. For example, just build the tree fragment once and then
 +     * pattern match against it. This would circumvent having to build the tree possibly once every
 +     * time you try to apply a rule.
 +     */
 +    Tree baseTree = Tree.buildTree(rule, tailNodes, BUILD_DEPTH);
 +
-     Stack<Tree> nodeStack = new Stack<Tree>();
++    Stack<Tree> nodeStack = new Stack<>();
 +    nodeStack.add(baseTree);
 +    while (!nodeStack.empty()) {
 +      Tree tree = nodeStack.pop();
 +      if (tree == null)
 +        continue;
 +
 +      if (lmFragments.get(tree.getRule()) != null) {
 +        for (Tree fragment : lmFragments.get(tree.getRule())) {
 +//           System.err.println(String.format("Does\n  %s match\n  %s??\n  -> %s", fragment, tree,
 +//           match(fragment, tree)));
 +
 +          if (fragment.getLabel() == tree.getLabel() && match(fragment, tree)) {
 +//             System.err.println(String.format("  FIRING: matched %s against %s", fragment, tree));
 +            acc.add(hashFeature(fragment.escapedString()), 1);
++            boolean OPTS_DEPTH = false;
 +            if (OPTS_DEPTH)
 +              if (fragment.isLexicalized())
 +                acc.add(hashFeature(String.format("FragmentFF_lexdepth%d", fragment.getDepth())), 1);
 +              else
 +                acc.add(hashFeature(String.format("FragmentFF_depth%d", fragment.getDepth())), 1);
 +          }
 +        }
 +      }
 +
 +      // We also need to try matching rules against internal nodes of the fragment corresponding to
 +      // this
 +      // rule
 +      if (tree.getChildren() != null)
 +        for (Tree childNode : tree.getChildren()) {
 +          if (!childNode.isBoundary())
 +            nodeStack.add(childNode);
 +        }
 +    }
 +
 +    return new FragmentState(baseTree);
 +  }
 +
 +  /**
 +   * Matches the fragment against the (possibly partially-built) tree. Assumption
 +   * 
 +   * @param fragment the language model fragment
 +   * @param tree the tree to match against (expanded from the hypergraph)
 +   * @return
 +   */
 +  private boolean match(Tree fragment, Tree tree) {
 +    // System.err.println(String.format("MATCH(%s,%s)", fragment, tree));
 +
 +    /* Make sure the root labels match. */
 +    if (fragment.getLabel() != tree.getLabel()) {
 +      return false;
 +    }
 +
 +    /* Same number of kids? */
 +    List<Tree> fkids = fragment.getChildren();
 +    if (fkids.size() > 0) {
 +      List<Tree> tkids = tree.getChildren();
 +      if (fkids.size() != tkids.size()) {
 +        return false;
 +      }
 +
 +      /* Do the kids match on all labels? */
 +      for (int i = 0; i < fkids.size(); i++)
 +        if (fkids.get(i).getLabel() != tkids.get(i).getLabel())
 +          return false;
 +
 +      /* Recursive match. */
 +      for (int i = 0; i < fkids.size(); i++) {
 +        if (!match(fkids.get(i), tkids.get(i)))
 +          return false;
 +      }
 +    }
 +
 +    return true;
 +  }
 +
 +  @Override
 +  public DPState computeFinal(HGNode tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence,
 +      Accumulator acc) {
 +    // TODO Auto-generated method stub
 +    return null;
 +  }
 +
 +  @Override
 +  public float estimateFutureCost(Rule rule, DPState state, Sentence sentence) {
 +    // TODO Auto-generated method stub
 +    return 0;
 +  }
 +
 +  @Override
 +  public float estimateCost(Rule rule, Sentence sentence) {
 +    // TODO Auto-generated method stub
 +    return 0;
 +  }
 +
 +  /**
 +   * Maintains a state pointer used by KenLM to implement left-state minimization. 
 +   * 
 +   * @author Matt Post post@cs.jhu.edu
 +   * @author Juri Ganitkevitch juri@cs.jhu.edu
 +   */
 +  public class FragmentState extends DPState {
 +
 +    private Tree tree = null;
 +
 +    public FragmentState(Tree tree) {
 +      this.tree = tree;
 +    }
 +
 +    /**
 +     * Every tree is unique.
 +     * 
 +     * Some savings could be had here if we grouped together items with the same string.
 +     */
 +    @Override
 +    public int hashCode() {
 +      return tree.hashCode();
 +    }
 +
 +    @Override
 +    public boolean equals(Object other) {
 +      return (other instanceof FragmentState && this == other);
 +    }
 +
 +    @Override
 +    public String toString() {
 +      return String.format("[FragmentState %s]", tree);
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
index 1637b5f,0000000..bb1c29a
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
@@@ -1,135 -1,0 +1,134 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.fragmentlm;
 +
 +import java.util.*;
 +import java.io.*;
 +import java.nio.charset.Charset;
 +import java.nio.charset.UnsupportedCharsetException;
 +
 +/**
 + * @author Dan Klein
 + */
 +public class PennTreebankReader {
 +
 +  static class TreeCollection extends AbstractCollection<Tree> {
 +
-     List<File> files;
-     Charset charset;
++    final List<File> files;
++    final Charset charset;
 +
 +    static class TreeIteratorIterator implements Iterator<Iterator<Tree>> {
-       Iterator<File> fileIterator;
++      final Iterator<File> fileIterator;
 +      Iterator<Tree> nextTreeIterator;
-       Charset charset;
++      final Charset charset;
 +
 +      public boolean hasNext() {
 +        return nextTreeIterator != null;
 +      }
 +
 +      public Iterator<Tree> next() {
 +        Iterator<Tree> currentTreeIterator = nextTreeIterator;
 +        advance();
 +        return currentTreeIterator;
 +      }
 +
 +      public void remove() {
 +        throw new UnsupportedOperationException();
 +      }
 +
 +      private void advance() {
 +        nextTreeIterator = null;
 +        while (nextTreeIterator == null && fileIterator.hasNext()) {
 +          File file = fileIterator.next();
 +          // System.out.println(file);
 +          try {
 +            nextTreeIterator = new Trees.PennTreeReader(new BufferedReader(new InputStreamReader(
 +                new FileInputStream(file), this.charset)));
 +          } catch (FileNotFoundException e) {
 +          } catch (UnsupportedCharsetException e) {
 +            throw new Error("Unsupported charset in file " + file.getPath());
 +          }
 +        }
 +      }
 +
 +      TreeIteratorIterator(List<File> files, Charset charset) {
 +        this.fileIterator = files.iterator();
 +        this.charset = charset;
 +        advance();
 +      }
 +    }
 +
 +    public Iterator<Tree> iterator() {
-       return new ConcatenationIterator<Tree>(new TreeIteratorIterator(files, this.charset));
++      return new ConcatenationIterator<>(new TreeIteratorIterator(files, this.charset));
 +    }
 +
 +    public int size() {
 +      int size = 0;
 +      Iterator<Tree> i = iterator();
 +      while (i.hasNext()) {
 +        size++;
 +        i.next();
 +      }
 +      return size;
 +    }
 +
 +    @SuppressWarnings("unused")
 +    private List<File> getFilesUnder(String path, FileFilter fileFilter) {
 +      File root = new File(path);
-       List<File> files = new ArrayList<File>();
++      List<File> files = new ArrayList<>();
 +      addFilesUnder(root, files, fileFilter);
 +      return files;
 +    }
 +
 +    private void addFilesUnder(File root, List<File> files, FileFilter fileFilter) {
 +      if (!fileFilter.accept(root))
 +        return;
 +      if (root.isFile()) {
 +        files.add(root);
 +        return;
 +      }
 +      if (root.isDirectory()) {
 +        File[] children = root.listFiles();
-         for (int i = 0; i < children.length; i++) {
-           File child = children[i];
++        for (File child : children) {
 +          addFilesUnder(child, files, fileFilter);
 +        }
 +      }
 +    }
 +
 +    public TreeCollection(String file) throws FileNotFoundException, IOException {
-       this.files = new ArrayList<File>();
++      this.files = new ArrayList<>();
 +      this.files.add(new File(file));
 +      this.charset = Charset.defaultCharset();
 +    }
 +  }
 +  
 +  public static Collection<Tree> readTrees(String path) throws FileNotFoundException, IOException {
 +    return new TreeCollection(path);
 +  }
 +
 +  public static void main(String[] args) {
 +/*    Collection<Tree> trees = readTrees(args[0], Charset.defaultCharset());
 +    for (Tree tree : trees) {
 +      tree = (new Trees.StandardTreeNormalizer()).transformTree(tree);
 +      System.out.println(Trees.PennTreeRenderer.render(tree));
 +    }
 +  */
 +  }
 +
 +}