You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:52 UTC
[41/60] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
new file mode 100644
index 0000000..6f231ae
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ * <p>This class defines Joshua's feature function interface, for both sparse and
+ * dense features. It is immediately inherited by StatelessFF and StatefulFF,
+ * which provide functionality common to stateless and stateful features,
+ * respectively. Any feature implementation should extend those classes, and not
+ * this one. The distinction between stateless and stateful features is somewhat
+ * narrow: all features have the opportunity to return an instance of a
+ * {@link DPState} object, and stateless ones just return null.</p>
+ * 
+ * <p>Features in Joshua work like templates. Each feature function defines any
+ * number of actual features, which are associated with weights. The task of the
+ * feature function is to compute the features that are fired in different
+ * circumstances and then return the inner product of those features with the
+ * weight vector. Feature functions can also produce estimates of their future
+ * cost (via {@link org.apache.joshua.decoder.ff.FeatureFunction#estimateCost(Rule, Sentence)}); 
+ * these values are not used in computing the
+ * score, but are only used for sorting rules during cube pruning. The
+ * individual features produced by each template should have globally unique
+ * names; a good convention is to prefix each feature with the name of the
+ * template that produced it.</p>
+ * 
+ * <p>Joshua does not retain individual feature values while decoding, since this
+ * requires keeping a sparse feature vector along every hyperedge, which can be
+ * expensive. Instead, it computes only the weighted cost of each edge. If the
+ * individual feature values are requested, the feature functions are replayed
+ * in post-processing, say during k-best list extraction. This is implemented in
+ * a generic way by passing an {@link Accumulator} object to the compute()
+ * function. During decoding, the accumulator simply sums weighted features in a
+ * scalar. During k-best extraction, when individual feature values are needed,
+ * a {@link FeatureAccumulator} is used to retain the individual values.</p>
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
+ */
+public abstract class FeatureFunction {
+
+  /*
+   * The name of the feature function; this generally matches the weight name on
+   * the config file. This can also be used as a prefix for feature / weight
+   * names, for templates that define multiple features.
+   */
+  protected String name = null;
+
+  /*
+   * The list of features each function can contribute, along with the dense feature IDs.
+   */
+  protected String[] denseFeatureNames = null;
+  protected int[] denseFeatureIDs = null;
+
+  /*
+   * The first dense feature index
+   */
+  protected int denseFeatureIndex = -1; 
+
+  // The list of arguments passed to the feature, and the hash for the parsed args
+  protected String[] args;
+  protected HashMap<String, String> parsedArgs = null; 
+
+  /*
+   * The global weight vector used by the decoder, passed it when the feature is
+   * instantiated
+   */
+  protected FeatureVector weights;
+
+  /* The config */
+  protected JoshuaConfiguration config;
+
+  public String getName() {
+    return name;
+  }
+
+  // Whether the feature has state.
+  public abstract boolean isStateful();
+
+  public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
+    this.weights = weights;
+    this.name = name;
+    this.args = args;
+    this.config = config;
+
+    this.parsedArgs = FeatureFunction.parseArgs(args);
+  }
+
+  /**
+   * Any feature function can use this to report dense features names to the master code. The 
+   * parameter tells the feature function the index of the first available dense feature ID; the feature
+   * function will then use IDs (id..id+names.size()-1).
+   * 
+   * @param id the id of the first dense feature id to use
+   * @return a list of dense feature names
+   */
+  public ArrayList<String> reportDenseFeatures(int id) {
+    return new ArrayList<String>();
+  }
+
+  public String logString() {
+    try {
+      return String.format("%s (weight %.3f)", name, weights.getSparse(name));
+    } catch (RuntimeException e) {
+      return name;
+    }
+  }
+
+  /**
+   * This is the main function for defining feature values. The implementor
+   * should compute all the features along the hyperedge, calling 
+   * {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator#add(String, float)}
+   * for each feature. It then returns the newly-computed dynamic
+   * programming state for this feature (for example, for the
+   * {@link org.apache.joshua.decoder.ff.lm.LanguageModelFF} feature, this returns the new language model
+   * context). For stateless features, this value is null.
+   * 
+   * Note that the accumulator accumulates *unweighted* feature values. The
+   * feature vector is multiplied times the weight vector later on.
+   * 
+   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+   * @return the new dynamic programming state (null for stateless features)
+   */
+  public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
+      SourcePath sourcePath, Sentence sentence, Accumulator acc);
+
+  /**
+   * Feature functions must overrided this. StatefulFF and StatelessFF provide
+   * reasonable defaults since most features do not fire on the goal node.
+   * 
+   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+   * @return the DPState (null if none)
+   */
+  public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc);
+
+  /**
+   * This is a convenience function for retrieving the features fired when
+   * applying a rule, provided for backward compatibility.
+   * 
+   * Returns the *unweighted* cost of the features delta computed at this
+   * position. Note that this is a feature delta, so existing feature costs of
+   * the tail nodes should not be incorporated, and it is very important not to
+   * incorporate the feature weights. This function is used in the kbest
+   * extraction code but could also be used in computing the cost.
+   * 
+   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode} tail nodes
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return an *unweighted* feature delta
+   */
+  public final FeatureVector computeFeatures(Rule rule, List<HGNode> tailNodes, int i, int j,
+      SourcePath sourcePath, Sentence sentence) {
+
+    FeatureAccumulator features = new FeatureAccumulator();
+    compute(rule, tailNodes, i, j, sourcePath, sentence, features);
+    return features.getFeatures();
+  }
+
+  /**
+   * This function is called for the final transition. For example, the
+   * LanguageModel feature function treats the last rule specially. It needs to
+   * return the *weighted* cost of applying the feature. Provided for backward
+   * compatibility.
+   * 
+   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return a *weighted* feature cost
+   */
+  public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
+      Sentence sentence) {
+
+    ScoreAccumulator score = new ScoreAccumulator();
+    computeFinal(tailNode, i, j, sourcePath, sentence, score);
+    return score.getScore();
+  }
+
+  /**
+   * Returns the *unweighted* feature delta for the final transition (e.g., for
+   * the language model feature function). Provided for backward compatibility.
+   * 
+   * @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return an *weighted* feature vector
+   */
+  public final FeatureVector computeFinalFeatures(HGNode tailNode, int i, int j,
+      SourcePath sourcePath, Sentence sentence) {
+
+    FeatureAccumulator features = new FeatureAccumulator();
+    computeFinal(tailNode, i, j, sourcePath, sentence, features);
+    return features.getFeatures();
+  }
+
+  /**
+   * This function is called when sorting rules for cube pruning. It must return
+   * the *weighted* estimated cost of applying a feature. This need not be the
+   * actual cost of applying the rule in context. Basically, it's the inner
+   * product of the weight vector and all features found in the grammar rule,
+   * though some features (like LanguageModelFF) can also compute some of their
+   * values. This is just an estimate of the cost, which helps do better
+   * sorting. Later, the real cost of this feature function is called via
+   * compute();
+   * 
+   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return the *weighted* cost of applying the feature.
+   */
+  public abstract float estimateCost(Rule rule, Sentence sentence);
+
+  /**
+   * This feature is called to produce a *weighted estimate* of the future cost
+   * of applying this feature. This value is not incorporated into the model
+   * score but is used in pruning decisions. Stateless features return 0.0f by
+   * default, but Stateful features might want to override this.
+   * 
+   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to be utilized within computation
+   * @param state todo
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return the *weighted* future cost estimate of applying this rule in
+   *         context.
+   */
+  public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);
+
+  /**
+   * Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
+   * with a proper CLI library at some point Expects key value pairs in the form : -argname value
+   * Any key without a value is added with an empty string as value Multiple values for the same key
+   * are not parsed. The first one is used.
+   * 
+   * @param args A string with the raw arguments and their names
+   * @return A hash with the keys and the values of the string
+   */
+  public static HashMap<String, String> parseArgs(String[] args) {
+    HashMap<String, String> parsedArgs = new HashMap<String, String>();
+    boolean lookingForValue = false;
+    String currentKey = "";
+    for (int i = 0; i < args.length; i++) {
+
+      Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
+      Matcher argKey = argKeyPattern.matcher(args[i]);
+      if (argKey.find()) {
+        // This is a key
+        // First check to see if there is a key that is waiting to be written
+        if (lookingForValue) {
+          // This is a key with no specified value
+          parsedArgs.put(currentKey, "");
+        }
+        // Now store the new key and look for its value
+        currentKey = args[i].substring(1);
+        lookingForValue = true;
+      } else {
+        // This is a value
+        if (lookingForValue) {
+          parsedArgs.put(currentKey, args[i]);
+          lookingForValue = false;
+        }
+      }
+    }
+    return parsedArgs;
+  }
+
+  /**
+   * Accumulator objects allow us to generalize feature computation.
+   * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
+   * sum (for decoding). FeatureAccumulator records the named feature values
+   * (for k-best extraction).
+   */
+  public interface Accumulator {
+    public void add(String name, float value);
+    public void add(int id, float value);
+  }
+
+  public class ScoreAccumulator implements Accumulator {
+    private float score;
+
+    public ScoreAccumulator() {
+      this.score = 0.0f;
+    }
+
+    @Override
+    public void add(String name, float value) {
+      score += value * weights.getSparse(name);
+    }
+
+    @Override
+    public void add(int id, float value) {
+      score += value * weights.getDense(id);
+    }
+
+    public float getScore() {
+      return score;
+    }
+  }
+
+  public class FeatureAccumulator implements Accumulator {
+    private FeatureVector features;
+
+    public FeatureAccumulator() {
+      this.features = new FeatureVector();
+    }
+
+    @Override
+    public void add(String name, float value) {
+      features.increment(name, value);
+    }
+
+    @Override
+    public void add(int id, float value) {
+      features.increment(id,  value);
+    }
+
+    public FeatureVector getFeatures() {
+      return features;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
new file mode 100644
index 0000000..1b39c78
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * An implementation of a sparse feature vector, using for representing both weights and feature
+ * values.
+ *
+ * This class is used to hold both the decoder weights and the feature values accumulated across
+ * each edge. When features are read in upon decoder startup, they all start out as sparse features
+ * and are stored in the hash table. After the feature functions have been loaded, the decoder
+ * queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
+ * returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
+ * feature array. Therefore, when a feature registers a dense feature, it should take care to
+ * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
+ * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
+ * values later on.
+ *
+ * @author Matt Post post@cs.jhu.edu
+ */
+
+public class FeatureVector {
+  /*
+   * A list of the dense feature names. Increased via calls to registerDenseFeatures()
+   */
+  public static ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<String>();
+
+  /*
+   * The values of each of the dense features, defaulting to 0.
+   */
+  private ArrayList<Float> denseFeatures = null;
+
+  /*
+   * Value of sparse features.
+   */
+  private HashMap<String, Float> sparseFeatures;
+
+  public FeatureVector() {
+    sparseFeatures = new HashMap<String, Float>();
+    denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size());
+    for (int i = 0; i < denseFeatures.size(); i++)
+      denseFeatures.set(i, 0.0f);
+  }
+
+  /**
+   * This version of the constructor takes an uninitialized feature with potentially intermingled
+   * labeled and unlabeled feature values, of the format:
+   *
+   * [feature1=]value [feature2=]value
+   *
+   * It produces a Feature Vector where all unlabeled features have been labeled by appending the
+   * unlabeled feature index (starting at 0) to the defaultPrefix value.
+   *
+   * **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
+   * of confusion. They have to be inverted here and when the score is actually computed. They
+   * are inverted here (which is used to build the feature vector representation of a rule's dense
+   * features) and in {@link org.apache.joshua.decoder.ff.tm.Rule#estimateRuleCost(java.util.List)}
+   * , where the rule's precomputable (weighted) score is cached.
+   *
+   * @param featureString, the string of labeled and unlabeled features (probably straight from the
+   *          grammar text file)
+   * @param prefix, the prefix to use for unlabeled features (probably "tm_OWNER_")
+   */
+  public FeatureVector(String featureString, String prefix) {
+
+//    System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));
+
+    /*
+     * Read through the features on this rule, adding them to the feature vector. Unlabeled features
+     * are converted to a canonical form.
+     *
+     * Note that it's bad form to mix unlabeled features and the named feature index they are mapped
+     * to, but we are being liberal in what we accept.
+     *
+     * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
+     * This is the source of *no end* of confusion and should be done away with.
+     */
+    this();
+
+    int denseFeatureIndex = 0;
+
+    if (!featureString.trim().equals("")) {
+      for (String token : featureString.split("\\s+")) {
+        if (token.indexOf('=') == -1) {
+          /*
+           * If we encounter an unlabeled feature, it is the next dense feature
+           */
+          while (denseFeatures.size() <= denseFeatureIndex)
+            denseFeatures.add(0.0f);
+          denseFeatures.set(denseFeatureIndex, -Float.parseFloat(token));
+          denseFeatureIndex++;
+        } else {
+          /*
+           * Labeled features are of two types: if they start with the prefix, they are actually
+           * dense feature in disguise; otherwise, they are proper sparse features.
+           */
+          int splitPoint = token.indexOf('=');
+          if (token.startsWith(prefix)) {
+//            System.err.println(String.format("  PREFIX=%s '%s'.substring(%d,%d) = %s", prefix, token, prefix.length(), splitPoint,
+//                token.substring(prefix.length(), splitPoint)));
+            int index = Integer.parseInt(token.substring(prefix.length(), splitPoint));
+            while (denseFeatures.size() <= index)
+              denseFeatures.add(0.0f);
+            denseFeatures.set(index, 1.0f * Float.parseFloat(token.substring(splitPoint + 1)));
+          } else {
+            sparseFeatures.put(token.substring(0, splitPoint),
+                Float.parseFloat(token.substring(splitPoint + 1)));
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Register one or more dense features with the global weight vector. This assumes them global
+   * IDs, and then returns the index of the first feature (from which the calling feature function
+   * can infer them all). This *must* be called by every feature function wishing to register
+   * dense features!
+   *
+   * @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+   */
+  public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
+    for (FeatureFunction feature: featureFunctions) {
+      ArrayList<String> names = feature.reportDenseFeatures(denseFeatures.size());
+      for (String name: names) {
+        DENSE_FEATURE_NAMES.add(name);
+        denseFeatures.add(getSparse(name));
+        sparseFeatures.remove(name);
+      }
+    }
+  }
+
+  public ArrayList<Float> getDenseFeatures() {
+    return denseFeatures;
+  }
+
+  public HashMap<String,Float> getSparseFeatures() {
+    return sparseFeatures;
+  }
+
+  public Set<String> keySet() {
+    return sparseFeatures.keySet();
+  }
+
+  public int size() {
+    return sparseFeatures.size() + denseFeatures.size();
+  }
+
+  public FeatureVector clone() {
+    FeatureVector newOne = new FeatureVector();
+    for (String key : this.sparseFeatures.keySet())
+      newOne.set(key, this.sparseFeatures.get(key));
+    for (int i = 0; i < denseFeatures.size(); i++)
+      newOne.set(i, getDense(i));
+    return newOne;
+  }
+
+  /**
+   * Subtracts the weights in the other feature vector from this one. Note that this is not set
+   * subtraction; keys found in the other FeatureVector but not in this one will be initialized with
+   * a value of 0.0f before subtraction.
+   *
+   * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
+   */
+  public void subtract(FeatureVector other) {
+    for (int i = 0; i < denseFeatures.size(); i++)
+      denseFeatures.set(i, getDense(i) - other.getDense(i));
+
+    for (String key : other.keySet()) {
+      float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
+      sparseFeatures.put(key, oldValue - other.getSparse(key));
+    }
+  }
+
+  /**
+   * Adds the weights in the other feature vector to this one. This is set union, with values shared
+   * between the two being summed.
+   *
+   * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
+   */
+  public void add(FeatureVector other) {
+    while (denseFeatures.size() < other.denseFeatures.size())
+      denseFeatures.add(0.0f);
+
+    for (int i = 0; i < other.denseFeatures.size(); i++)
+      increment(i, other.getDense(i));
+
+    for (String key : other.keySet()) {
+      if (!sparseFeatures.containsKey(key))
+        sparseFeatures.put(key, other.getSparse(key));
+      else
+        sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
+    }
+  }
+
+  /**
+   * Return the weight of a feature by name, after checking to determine if it is sparse or dense.
+   *
+   * @param feature String name of some feature
+   * @return the feature's weight
+   */
+  public float getWeight(String feature) {
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
+        return getDense(i);
+      }
+    }
+    return getSparse(feature);
+  }
+
+  /**
+   * Return the weight of a sparse feature, indexed by its name.
+   *
+   * @param feature String name of some feature
+   * @return the sparse feature's weight, or 0 if not found.
+   */
+  public float getSparse(String feature) {
+    if (sparseFeatures.containsKey(feature))
+      return sparseFeatures.get(feature);
+    return 0.0f;
+  }
+
+  public boolean hasValue(String name) {
+    return sparseFeatures.containsKey(name);
+  }
+
+  /**
+   * Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
+   * is not found. In other words, this is a safe way to query the dense feature vector.
+   *
+   * @param id int representing of some dense feature
+   * @return the dense feature's value, or 0 if not found.
+   */
+  public float getDense(int id) {
+    if (id < denseFeatures.size())
+      return denseFeatures.get(id);
+    return 0.0f;
+  }
+
+  public void increment(String feature, float value) {
+    sparseFeatures.put(feature, getSparse(feature) + value);
+  }
+
+  public void increment(int id, float value) {
+    while (id >= denseFeatures.size())
+      denseFeatures.add(0.0f);
+    denseFeatures.set(id, getDense(id) + value);
+  }
+
+  /**
+   * Set the value of a feature. We need to first determine whether the feature is a dense or
+   * sparse one, then set accordingly.
+   *
+   * @param feature String name of some feature
+   * @param value float value to set to the featue with the associated name
+   */
+  public void set(String feature, float value) {
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
+        denseFeatures.set(i, value);
+        return;
+      }
+    }
+    // No dense feature was found; assume it's sparse
+    sparseFeatures.put(feature, value);
+  }
+
+  public void set(int id, float value) {
+    while (id >= denseFeatures.size())
+      denseFeatures.add(0.0f);
+    denseFeatures.set(id, value);
+  }
+
+  public Map<String, Float> getMap() {
+    Map<String, Float> allFeatures = new HashMap<>(sparseFeatures.size() + denseFeatures.size());
+    allFeatures.putAll(sparseFeatures);
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      allFeatures.put(DENSE_FEATURE_NAMES.get(i), getDense(i));
+    }
+    return allFeatures;
+  }
+
+  /**
+   * Computes the inner product between this feature vector and another one.
+   *
+   * @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
+   * @return float value representing the computation
+   */
+  public float innerProduct(FeatureVector other) {
+    float cost = 0.0f;
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
+      cost += getDense(i) * other.getDense(i);
+
+    for (String key : sparseFeatures.keySet())
+      cost += sparseFeatures.get(key) * other.getSparse(key);
+
+    return cost;
+  }
+
+  public void times(float value) {
+    for (String key : sparseFeatures.keySet())
+      sparseFeatures.put(key, sparseFeatures.get(key) * value);
+  }
+
+  /***
+   * Moses distinguishes sparse features as those containing an underscore, so we have to fake it
+   * to be compatible with their tuners.
+   *
+   * @return trimmed Moses output string
+   */
+  public String mosesString() {
+    StringBuilder outputString = new StringBuilder();
+
+    HashSet<String> printed_keys = new HashSet<String>();
+
+    // First print all the dense feature names in order
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), getDense(i)));
+      printed_keys.add(DENSE_FEATURE_NAMES.get(i));
+    }
+
+    // Now print the sparse features
+    ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
+    Collections.sort(keys);
+    for (String key: keys) {
+      if (! printed_keys.contains(key)) {
+        float value = sparseFeatures.get(key);
+        if (key.equals("OOVPenalty"))
+          // force moses to see it as sparse
+          key = "OOV_Penalty";
+        outputString.append(String.format("%s=%.3f ", key, value));
+      }
+    }
+    return outputString.toString().trim();
+  }
+
+  /***
+   * Outputs a list of feature names. All dense features are printed. Feature names are printed
+   * in the order they were read in.
+   */
+  @Override
+  public String toString() {
+    StringBuilder outputString = new StringBuilder();
+
+    HashSet<String> printed_keys = new HashSet<String>();
+
+    // First print all the dense feature names in order
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
+      printed_keys.add(DENSE_FEATURE_NAMES.get(i));
+    }
+
+    // Now print the rest of the features
+    ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
+    Collections.sort(keys);
+    for (String key: keys)
+      if (! printed_keys.contains(key))
+        outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key)));
+
+    return outputString.toString().trim();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
new file mode 100644
index 0000000..bfebaa5
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+/***
+ * @author Gideon Wenniger
+ */
+
+import java.util.List;	
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+public class LabelCombinationFF extends StatelessFF {
+
+  public LabelCombinationFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "LabelCombination", args, config);
+  }
+
+  public String getLowerCasedFeatureName() {
+    return name.toLowerCase();
+  }
+
+  private final String computeRuleLabelCombinationDescriptor(Rule rule) {
+    StringBuilder result = new StringBuilder(getLowerCasedFeatureName() + "_");
+    result.append(RulePropertiesQuerying.getLHSAsString(rule));
+    // System.out.println("Rule: " + rule);
+    for (String foreignNonterminalString : RulePropertiesQuerying.getRuleSourceNonterminalStrings(rule)) {
+      result.append("_").append(foreignNonterminalString);
+    }
+    return result.toString();
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    if (rule != null)
+      acc.add(computeRuleLabelCombinationDescriptor(rule), 1);
+
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
new file mode 100644
index 0000000..8735be6
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+/***
+ * @author Gideon Wenniger
+ */
+
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.ListUtil;
+
+public class LabelSubstitutionFF extends StatelessFF {
+  private static final String MATCH_SUFFIX = "MATCH";
+  private static final String NO_MATCH_SUFFIX = "NOMATCH";
+
+  public LabelSubstitutionFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "LabelSubstitution", args, config);
+  }
+
+  public String getLowerCasedFeatureName() {
+    return name.toLowerCase();
+  }
+
+  public String getMatchFeatureSuffix(String ruleNonterminal, String substitutionNonterminal) {
+    if (ruleNonterminal.equals(substitutionNonterminal)) {
+      return MATCH_SUFFIX;
+    } else {
+      return NO_MATCH_SUFFIX;
+    }
+  }
+
+  public static String getSubstitutionSuffix(String ruleNonterminal, String substitutionNonterminal) {
+    return substitutionNonterminal + "_substitutes_" + ruleNonterminal;
+  }
+
+  private final String computeLabelMatchingFeature(String ruleNonterminal,
+      String substitutionNonterminal) {
+    String result = getLowerCasedFeatureName() + "_";
+    result += getMatchFeatureSuffix(ruleNonterminal, substitutionNonterminal);
+    return result;
+  }
+
+  private final String computeLabelSubstitutionFeature(String ruleNonterminal,
+      String substitutionNonterminal) {
+    String result = getLowerCasedFeatureName() + "_";
+    result += getSubstitutionSuffix(ruleNonterminal, substitutionNonterminal);
+    return result;
+  }
+
+  private static final String getRuleLabelsDescriptorString(Rule rule) {
+    String result = "";
+    String leftHandSide = RulePropertiesQuerying.getLHSAsString(rule);
+    List<String> ruleSourceNonterminals = RulePropertiesQuerying
+        .getRuleSourceNonterminalStrings(rule);
+    boolean isInverting = rule.isInverting();
+    result += "<LHS>" + leftHandSide + "</LHS>";
+    result += "_<Nont>";
+    result += ListUtil.stringListStringWithoutBracketsCommaSeparated(ruleSourceNonterminals);
+    result += "</Nont>";
+    if(isInverting)
+    {  
+      result += "_INV";
+    }
+    else
+    {
+      result += "_MONO";
+    }
+    
+    return result;
+  }
+
+  private static final String getSubstitutionsDescriptorString(List<HGNode> tailNodes) {
+    String result = "_<Subst>";
+    List<String> substitutionNonterminals = RulePropertiesQuerying
+        .getSourceNonterminalStrings(tailNodes);
+    result += ListUtil.stringListStringWithoutBracketsCommaSeparated(substitutionNonterminals);
+    result += "</Subst>";
+    return result;
+  }
+
+  public final String getGapLabelsForRuleSubstitutionSuffix(Rule rule, List<HGNode> tailNodes) {
+    String result = getLowerCasedFeatureName() + "_";
+    result += getRuleLabelsDescriptorString(rule);
+    result += getSubstitutionsDescriptorString(tailNodes);
+    return result;
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    if (rule != null && (tailNodes != null)) {
+
+      List<String> ruleSourceNonterminals = RulePropertiesQuerying
+          .getRuleSourceNonterminalStrings(rule);
+      List<String> substitutionNonterminals = RulePropertiesQuerying
+          .getSourceNonterminalStrings(tailNodes);
+      // Assert.assertEquals(ruleSourceNonterminals.size(), substitutionNonterminals.size());
+      for (int nonterinalIndex = 0; nonterinalIndex < ruleSourceNonterminals.size(); nonterinalIndex++) {
+        String ruleNonterminal = ruleSourceNonterminals.get(nonterinalIndex);
+        String substitutionNonterminal = substitutionNonterminals.get(nonterinalIndex);
+        acc.add(computeLabelMatchingFeature(ruleNonterminal, substitutionNonterminal), 1);
+        acc.add(computeLabelSubstitutionFeature(ruleNonterminal, substitutionNonterminal), 1);
+      }
+      acc.add(getGapLabelsForRuleSubstitutionSuffix(rule, tailNodes), 1);
+    }
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
new file mode 100644
index 0000000..58de5f4
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import static com.google.common.cache.CacheBuilder.newBuilder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.FormatUtils;
+
+import com.google.common.cache.Cache;
+
+/**
+ *  Lexical alignment features denoting alignments, deletions, and insertions.
+ */
+public class LexicalFeatures extends StatelessFF {
+  
+  private final boolean useAlignments;
+  private final boolean useDeletions;
+  private final boolean useInsertions;
+  
+  private static final String NAME = "LexicalFeatures";
+  // value to fire for features
+  private static final int VALUE = 1;
+  //whether this feature is restricted to a certain grammar/owner
+  private final boolean ownerRestriction;
+  // the grammar/owner this feature is restricted to fire
+  private final int owner;
+  // Strings separating words
+  private static final String SEPARATOR = "~";
+  
+  private final Cache<Rule, List<String>> featureCache;
+  
+  public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, NAME, args, config);
+    
+    ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
+    owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
+    
+    useAlignments = parsedArgs.containsKey("alignments");
+    useDeletions = parsedArgs.containsKey("deletions");
+    useInsertions = parsedArgs.containsKey("insertions");
+    
+    // initialize cache
+    if (parsedArgs.containsKey("cacheSize")) {
+      featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
+    } else {
+      featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
+    }
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    
+    if (ownerRestriction && rule.getOwner() != owner) {
+      return null;
+    }
+
+    List<String> featureNames = featureCache.getIfPresent(rule);
+    if (featureNames == null) {
+      featureNames = getFeatures(rule);
+      featureCache.put(rule, featureNames);
+    }
+    for (String feature : featureNames) {
+      acc.add(feature, VALUE);
+    }
+    
+    return null;
+  }
+  
+  /**
+   * Obtains the feature ids for the given rule.
+   * @param rule
+   * @return String representing the feature name.s
+   */
+  private List<String> getFeatures(final Rule rule) {
+    final List<String> result = new ArrayList<>();
+    
+    byte[] alignments = rule.getAlignment();
+    if (alignments == null) {
+      return result;
+    }
+    int[] sourceWords = rule.getFrench();
+    int[] targetWords = rule.getEnglish();
+    
+    // sourceAligned & targetAligned indicate whether an index is covered by alignments
+    boolean[] sourceAligned = new boolean[sourceWords.length];
+    boolean[] targetAligned = new boolean[targetWords.length];
+    
+    // translations: aligned words
+    for (int i = 0; i < alignments.length; i+=2) {
+      byte sourceIndex = alignments[i];
+      byte targetIndex = alignments[i + 1];
+      sourceAligned[sourceIndex] = true;
+      targetAligned[targetIndex] = true;
+      if (useAlignments) {
+        result.add(
+            "T:" + 
+            Vocabulary.word(sourceWords[sourceIndex]) + 
+            SEPARATOR + 
+            Vocabulary.word(targetWords[targetIndex]));
+      }
+    }
+    
+    // deletions: unaligned source words
+    if (useDeletions) {
+      for (int i = 0; i < sourceAligned.length; i++) {
+        if (!sourceAligned[i] && ! FormatUtils.isNonterminal(sourceWords[i])) {
+          result.add("D:" + Vocabulary.word(sourceWords[i]));
+        }
+      }
+    }
+    
+    // insertions: unaligned target words
+    if (useInsertions) {
+      for (int i = 0; i < targetAligned.length; i++) {
+        if (useInsertions && !targetAligned[i] && ! FormatUtils.isNonterminal(targetWords[i])) {
+          result.add("I:" + Vocabulary.word(targetWords[i]));
+        }
+      }
+    }
+    
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
new file mode 100644
index 0000000..5278172
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+
+/**
+ * This feature is fired when an out-of-vocabulary word (with respect to the translation model) is
+ * entered into the chart. OOVs work in the following manner: for each word in the input that is OOV
+ * with respect to the translation model, we create a rule that pushes that word through
+ * untranslated (the suffix "_OOV" can optionally be appended according to the runtime parameter
+ * "mark-oovs") . These rules are all stored in a grammar whose owner is "oov". The OOV feature
+ * function template then fires the "OOVPenalty" feature whenever it is asked to score an OOV rule.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ */
+public class OOVPenalty extends StatelessFF {
+  private final int ownerID;
+  
+  /* The default value returned for OOVs. Can be overridden with -oov-list */
+  private final float defaultValue = -100f;
+  private final HashMap<Integer,Float> oovWeights;
+
+  public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "OOVPenalty", args, config);
+
+    ownerID = Vocabulary.id("oov");
+    oovWeights = new HashMap<Integer,Float>();
+    
+    if (config.oovList != null) {
+      for (OOVItem item: config.oovList) { 
+        oovWeights.put(Vocabulary.id(item.label), item.weight);
+      }
+    }
+  }
+  
+  @Override
+  public ArrayList<String> reportDenseFeatures(int index) {
+    denseFeatureIndex = index;
+    
+    ArrayList<String> names = new ArrayList<>(1);
+    names.add(name);
+    return names;
+  }
+
+  /**
+   * OOV rules cover exactly one word, and such rules belong to a grammar whose owner is "oov". Each
+   * OOV fires the OOVPenalty feature with a value of 1, so the cost is simply the weight, which was
+   * cached when the feature was created.
+   */
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    
+    if (rule != null && this.ownerID == rule.getOwner()) {
+      acc.add(denseFeatureIndex, getValue(rule.getLHS()));
+    }
+
+    return null;
+  }
+  
+  /**
+   * It's important for the OOV feature to contribute to the rule's estimated cost, so that OOV
+   * rules (which are added for all words, not just ones without translation options) get sorted
+   * to the bottom during cube pruning.
+   * 
+   * Important! estimateCost returns the *weighted* feature value.
+   */
+  @Override
+  public float estimateCost(Rule rule, Sentence sentence) {
+    if (rule != null && this.ownerID == rule.getOwner())
+      return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
+    return 0.0f;
+  }
+  
+  private float getValue(int lhs) {
+    return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
new file mode 100644
index 0000000..2324292
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ * This feature handles the list of features that are found with grammar rules in the grammar file.
+ * dense features that may be associated with the rules in a grammar file. The feature names of
+ * these dense rules are a function of the phrase model owner. When the feature is loaded, it
+ * queries the weights for the set of features that are active for this grammar, storing them in an
+ * array.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li zhifei.work@gmail.com
+ */
+
+public class PhraseModel extends StatelessFF {
+
+  /* The owner of the grammar. */
+  private int ownerID;
+  private String owner;
+
+  private float[] phrase_weights = null;
+
+  public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config, Grammar g) {
+    super(weights, "tm_", args, config);
+
+    String owner = parsedArgs.get("owner");
+    this.name = String.format("tm_%s", owner);
+
+    /*
+     * Determine the number of features by querying the example grammar that was passed in.
+     */
+    phrase_weights = new float[g.getNumDenseFeatures()];
+//    System.err.println(String.format("GOT %d FEATURES FOR %s", g.getNumDenseFeatures(), owner));
+    for (int i = 0; i < phrase_weights.length; i++)
+      phrase_weights[i] = weights.getSparse(String.format("tm_%s_%d", owner, i));
+
+    // Store the owner.
+    this.owner = owner;
+    this.ownerID = Vocabulary.id(owner);
+  }
+
+  /**
+   * Just register a single weight, tm_OWNER, and use that to set its precomputed cost
+   */
+  @Override
+  public ArrayList<String> reportDenseFeatures(int index) {
+    denseFeatureIndex = index;
+
+    ArrayList<String> names = new ArrayList<String>();
+    for (int i = 0; i < phrase_weights.length; i++)
+      names.add(String.format("tm_%s_%d", owner, i));
+    return names;
+  }
+
+  /**
+   * Estimates the cost of applying this rule, which is just the score of the precomputable feature
+   * functions.
+   */
+  @Override
+  public float estimateCost(final Rule rule, Sentence sentence) {
+
+    if (rule != null && rule.getOwner() == ownerID) {
+      if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)
+        rule.setPrecomputableCost(phrase_weights, weights);
+
+      return rule.getPrecomputableCost();
+    }
+
+    return 0.0f;
+  }
+
+  /**
+   * Just chain to computeFeatures(rule), since this feature doesn't use the sourcePath or sentID. *
+   */
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+
+    if (rule != null && rule.getOwner() == ownerID) {
+      /*
+       * Here, we peak at the Accumulator object. If it's asking for scores, then we don't bother to
+       * add each feature, but rather compute the inner product and add *that*. This is totally
+       * cheating; the Accumulator is supposed to be a generic object. But without this cheat
+       */
+      if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY) {
+        // float score = rule.getFeatureVector().innerProduct(weights);
+        rule.setPrecomputableCost(phrase_weights, weights);
+      }
+      
+//      System.err.println(String.format("RULE = %s / %f", rule.getEnglishWords(), rule.getPrecomputableCost()));
+      for (int k = 0; k < phrase_weights.length; k++) {
+//        System.err.println(String.format("k = %d, denseFeatureIndex = %d, owner = %s, ownerID = %d", k, denseFeatureIndex, owner, ownerID));
+        acc.add(k + denseFeatureIndex, rule.getDenseFeature(k));
+      }
+      
+      for (String key: rule.getFeatureVector().keySet())
+        acc.add(key, rule.getFeatureVector().getSparse(key));
+    }
+
+    return null;
+  }
+
+  public String toString() {
+    return name + " " + Vocabulary.word(ownerID);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
new file mode 100644
index 0000000..3c38e60
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.List;	
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.phrase.Hypothesis;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ *  This feature just counts rules that are used. You can restrict it with a number of flags:
+ * 
+ *   -owner OWNER
+ *    Only count rules owned by OWNER
+ *   -target|-source
+ *    Only count the target or source side (plus the LHS)
+ *
+ * TODO: add an option to separately provide a list of rule counts, restrict to counts above a threshold. 
+ */
+public class PhrasePenalty extends StatelessFF {
+
+  private int owner = 0;
+  private float value = 1.0f;
+  
+  public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "PhrasePenalty", args, config);
+    if (parsedArgs.containsKey("owner"))
+      this.owner = Vocabulary.id(parsedArgs.get("owner"));
+    else // default
+      this.owner = Vocabulary.id("pt"); 
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+
+    if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
+        && (owner == 0 || rule.getOwner() == owner))
+      acc.add(denseFeatureIndex, value);
+
+    return null;
+  }
+    
+  @Override
+  public ArrayList<String> reportDenseFeatures(int index) {
+    denseFeatureIndex = index;
+    ArrayList<String> names = new ArrayList<String>();
+    names.add(name);
+    return names;
+  }
+  
+  /**
+   * Returns the *weighted* estimate.
+   * 
+   */
+  @Override
+  public float estimateCost(Rule rule, Sentence sentence) {
+    if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
+        && (owner == 0 || rule.getOwner() == owner))
+      return weights.getDense(denseFeatureIndex) * value;
+    return 0.0f;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
new file mode 100644
index 0000000..5ba0c66
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.List;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/*
+ * This feature computes a bin for the rule and activates a feature for it. It requires access to
+ * the index of the RarityPenalty field, from which the rule count can be computed.
+ */
+public class RuleCountBin extends StatelessFF {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RuleCountBin.class);
+  private int field = -1;
+
+  public RuleCountBin(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "RuleCountBin", args, config);
+
+    field = Integer.parseInt(parsedArgs.get("field"));
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+
+    if (rule.getOwner() != Vocabulary.id("pt"))
+      return null;
+    
+    float rarityPenalty = -rule.getFeatureVector().getSparse(String.format("tm_pt_%d", field));
+    int count = (int) (1.0 - Math.log(rarityPenalty));
+
+    String feature = "RuleCountBin_inf";
+
+    int[] bins = { 1, 2, 4, 8, 16, 32, 64, 128, 1000, 10000 };
+    for (int k : bins) {
+      if (count <= k) {
+        feature = String.format("RuleCountBin_%d", k);
+        break;
+      }
+    }
+
+    LOG.debug("RuleCountBin({}) = {} ==> {}", rarityPenalty, count, feature);
+    
+    acc.add(feature, 1.0f);
+
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
new file mode 100644
index 0000000..909e481
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import static com.google.common.cache.CacheBuilder.newBuilder;
+
+import java.util.List;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import com.google.common.cache.Cache;
+
+/**
+ *  This feature fires for rule ids.
+ *  Firing can be restricted to rules from a certain owner, and rule ids
+ *  can be generated from source side and/or target side. 
+ */
+public class RuleFF extends StatelessFF {
+
+  private enum Sides { SOURCE, TARGET, BOTH };
+  
+  private static final String NAME = "RuleFF";
+  // value to fire for features
+  private static final int VALUE = 1;
+  // whether this feature is restricted to a certain grammar/owner
+  private final boolean ownerRestriction;
+  // the grammar/owner this feature is restricted to fire
+  private final int owner;
+  // what part of the rule should be extracted;
+  private final Sides sides;
+  // Strings separating words and rule sides 
+  private static final String SEPARATOR = "~";
+  private static final String SIDES_SEPARATOR = "->";
+  
+  private final Cache<Rule, String> featureCache;
+  
+  public RuleFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, NAME, args, config);
+    
+    ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
+    owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
+    
+    if (parsedArgs.containsKey("sides")) {
+      final String sideValue = parsedArgs.get("sides");
+      if (sideValue.equalsIgnoreCase("source")) {
+        sides = Sides.SOURCE;
+      } else if (sideValue.equalsIgnoreCase("target")) {
+        sides = Sides.TARGET;
+      } else if (sideValue.equalsIgnoreCase("both")){
+        sides = Sides.BOTH;
+      } else {
+        throw new RuntimeException("Unknown side value.");
+      }
+    } else {
+      sides = Sides.BOTH;
+    }
+    
+    // initialize cache
+    if (parsedArgs.containsKey("cacheSize")) {
+      featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
+    } else {
+      featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
+    }
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    
+    if (ownerRestriction && rule.getOwner() != owner) {
+      return null;
+    }
+
+    String featureName = featureCache.getIfPresent(rule);
+    if (featureName == null) {
+      featureName = getRuleString(rule);
+      featureCache.put(rule, featureName);
+    }
+    acc.add(featureName, VALUE);
+    
+    return null;
+  }
+  
+  /**
+   * Obtains the feature id for the given rule.
+   * @param rule
+   * @return String representing the feature name.s
+   */
+  private String getRuleString(final Rule rule) {
+    final StringBuilder sb = new StringBuilder(Vocabulary.word(rule.getLHS()))
+      .append(SIDES_SEPARATOR);
+    if (sides == Sides.SOURCE || sides == Sides.BOTH) {
+      sb.append(Vocabulary.getWords(rule.getFrench(), SEPARATOR));
+    }
+    sb.append(SIDES_SEPARATOR);
+    if (sides == Sides.TARGET || sides == Sides.BOTH) {
+      sb.append(Vocabulary.getWords(rule.getEnglish(), SEPARATOR));
+    }
+    return sb.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
new file mode 100644
index 0000000..02c520b
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/*
+ * This feature computes three feature templates: a feature indicating the length of the rule's
+ * source side, its target side, and a feature that pairs them.
+ */
+public abstract class RuleLength extends StatelessFF {
+  
+  private static final int VALUE = 1;
+
+  public RuleLength(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "RuleLength", args, config);
+  }
+
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    int sourceLength = rule.getFrench().length;
+    int targetLength = rule.getEnglish().length;
+    acc.add(name + "_source" + sourceLength, VALUE);
+    acc.add(name + "_target" + sourceLength, VALUE);
+    acc.add(name + "_sourceTarget" + sourceLength + "-" + targetLength, VALUE);
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
new file mode 100644
index 0000000..a1867a3
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+
+public class RulePropertiesQuerying {
+
+  public static final String getLHSAsString(Rule rule) {
+    return Vocabulary.word(rule.getLHS());
+  }
+
+  public static List<String> getRuleSourceNonterminalStrings(Rule rule) {
+    List<String> result = new ArrayList<String>();
+    for (int nonTerminalIndex : rule.getForeignNonTerminals()) {
+      result.add(Vocabulary.word(nonTerminalIndex));
+    }
+    return result;
+  }
+
+  public static List<String> getSourceNonterminalStrings(List<HGNode> tailNodes) {
+    List<String> result = new ArrayList<String>();
+    for (HGNode tailNode : tailNodes) {
+      result.add(Vocabulary.word(tailNode.lhs));
+    }
+    return result;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
new file mode 100644
index 0000000..8483ad6
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.FormatUtils;
+
+/*
+ * Implements the RuleShape feature for source, target, and paired source+target sides.
+ */
+public class RuleShape extends StatelessFF {
+
+  public RuleShape(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "RuleShape", args, config);
+  }
+
+  private enum WordType {
+    N("N"), T("x"), P("+");
+    private final String string;
+    private boolean repeats;
+
+    private WordType(final String string) {
+      this.string = string;
+      this.repeats = false;
+    }
+    
+    private void setRepeats() {
+      repeats = true;
+    }
+
+    @Override
+    public String toString() {
+      if (repeats) {
+        return this.string + "+";
+      }
+      return this.string;
+    }
+  }
+
+  private WordType getWordType(int id) {
+    if (FormatUtils.isNonterminal(id)) {
+      return WordType.N;
+    } else {
+      return WordType.T;
+    }
+  }
+  
+  /**
+   * Returns a String describing the rule pattern.
+   */
+  private String getRulePattern(int[] ids) {
+    final StringBuilder pattern = new StringBuilder();
+    WordType currentType = getWordType(ids[0]);
+    for (int i = 1; i < ids.length; i++) {
+      if (getWordType(ids[i]) != currentType) {
+        pattern.append(currentType.toString());
+        currentType = getWordType(ids[i]);
+      } else {
+        currentType.setRepeats();
+      }
+    }
+    pattern.append(currentType.toString());
+    return pattern.toString();
+  }
+  
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i_, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+    final String sourceShape = getRulePattern(rule.getFrench());
+    final String targetShape = getRulePattern(rule.getEnglish());
+    acc.add(name + "_source_" + sourceShape, 1);
+    acc.add(name + "_target_" + sourceShape, 1);
+    acc.add(name + "_sourceTarget_" + sourceShape + "_" + targetShape, 1);
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
new file mode 100644
index 0000000..841402a
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+public interface SourceDependentFF extends Cloneable {
+
+  public void setSource(Sentence sentence);
+
+  public FeatureFunction clone();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
new file mode 100644
index 0000000..b138426
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ * This feature returns the scored path through the source lattice, which is recorded in a
+ * SourcePath object.
+ * 
+ * @author Chris Dyer redpony@umd.edu
+ * @author Matt Post post@cs.jhu.edu
+ */
+public final class SourcePathFF extends StatelessFF {
+
+  /*
+   * This is a single-value feature template, so we cache the weight here.
+   */
+  public SourcePathFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+    super(weights, "SourcePath", args, config);
+  }
+
+  @Override
+  public ArrayList<String> reportDenseFeatures(int index) {
+    denseFeatureIndex = index;
+    
+    ArrayList<String> names = new ArrayList<String>();
+    names.add(name);
+    return names;
+  }
+  
+  @Override
+  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc) {
+
+    acc.add(denseFeatureIndex,  sourcePath.getPathCost());
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
new file mode 100644
index 0000000..1f5d0ed
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Stateful features contribute dynamic programming state. Unlike earlier versions of Joshua, the
+ * stateful feature itself is responsible for computing and return its updated state. Each
+ * state-computing feature function is assigned a global index, which is used to index the list of
+ * state-contributing objects in each HGNode. State can no longer be shared among different feature
+ * functions.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
+ */
+public abstract class StatefulFF extends FeatureFunction {
+
+  private static final Logger LOG = LoggerFactory.getLogger(StatefulFF.class);
+  /* Every stateful FF takes a unique index value and increments this. */
+  static int GLOBAL_STATE_INDEX = 0;
+
+  /* This records the state index for each instantiated stateful feature function. */
+  protected int stateIndex = 0;
+
+  public StatefulFF(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
+    super(weights, name, args, config);
+
+    LOG.info("Stateful object with state index {}",  GLOBAL_STATE_INDEX);
+    stateIndex = GLOBAL_STATE_INDEX++;
+  }
+
+  public static void resetGlobalStateIndex() {
+    GLOBAL_STATE_INDEX = 0;
+  }
+
+  public final boolean isStateful() {
+    return true;
+  }
+
+  public final int getStateIndex() {
+    return stateIndex;
+  }
+
+  /**
+   * Function computing the features that this function fires when a rule is applied. Must return
+   * its updated DPState. The accumulator is used to record every feature that fires.
+   */
+  @Override
+  public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
+      SourcePath sourcePath, Sentence sentence, Accumulator acc);
+
+  @Override
+  public abstract DPState computeFinal(HGNode tailNodes, int i, int j, SourcePath sourcePath,
+      Sentence sentence, Accumulator acc);
+
+  /**
+   * Computes an estimated future cost of this rule. Note that this is not compute as part of the
+   * score but is used for pruning.
+   */
+  @Override
+  public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
new file mode 100644
index 0000000..e473c37
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ * Stateless feature functions do not contribute any state. You need not implement this class to
+ * create a stateless feature function, but it provides a few convenience functions.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Juri Ganitkevich juri@cs.jhu.edu
+ */
+
+public abstract class StatelessFF extends FeatureFunction {
+
+  public StatelessFF(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
+    super(weights, name, args, config);
+  }
+
+  public final boolean isStateful() {
+    return false;
+  }
+
+  /**
+   * The estimated cost of applying this feature, given only the rule. This is used in sorting the
+   * rules for cube pruning. For most features, this will be 0.0.
+   */
+  public float estimateCost(Rule rule, Sentence sentence) {
+    return 0.0f;
+  }
+
+  /**
+   * Implementations of this should return null, since no state is contributed.
+   */
+  @Override
+  public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
+      SourcePath sourcePath, Sentence sentence, Accumulator acc);
+
+  /**
+   * Implementations of this should return null, since no state is contributed.
+   */
+  @Override
+  public DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath, Sentence sentence,
+      Accumulator acc) {
+    return null;
+  }
+
+  /**
+   * Stateless functions do not have an estimate of the future cost because they do not have access
+   * to the state.
+   */
+  public final float estimateFutureCost(Rule rule, DPState state, Sentence sentence) {
+    return 0.0f;
+  }
+}