You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:28 UTC

[12/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/FeatureVectorExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/FeatureVectorExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/FeatureVectorExtractor.java
new file mode 100644
index 0000000..dbe4f4b
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/FeatureVectorExtractor.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import static joshua.decoder.chart_parser.ComputeNodeResult.computeTransitionFeatures;
+
+import java.util.List;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * During decoding, individual features values are not stored, only the model score on each edge.
+ * This saves space. If you want to print the actual feature values, they have to be assembled
+ * from the edges of the derivation, which means replaying the feature functions. This visitor
+ * does just that, using the generic derivation visitor.
+ */
+public class FeatureVectorExtractor implements WalkerFunction, DerivationVisitor {
+  
+  private final FeatureVector features;
+  private final List<FeatureFunction> featureFunctions;
+  private final Sentence sourceSentence;
+  
+  public FeatureVectorExtractor(
+      final List<FeatureFunction> featureFunctions,
+      final Sentence sourceSentence) {
+    this.features = new FeatureVector();
+    this.featureFunctions = featureFunctions;
+    this.sourceSentence = sourceSentence;
+  }
+
+  /** Accumulate edge features from Viterbi path */
+  @Override
+  public void apply(HGNode node, int nodeIndex) {
+    features.add(
+        computeTransitionFeatures(
+          featureFunctions,
+          node.bestHyperedge,
+          node.i, node.j,
+          sourceSentence));
+  }
+
+  /** Accumulate edge features for that DerivationState */
+  @Override
+  public void before(DerivationState state, int level, int tailNodeIndex) {
+    features.add(
+        computeTransitionFeatures(
+          featureFunctions,
+          state.edge,
+          state.parentNode.i, state.parentNode.j,
+          sourceSentence));
+  }
+  
+  /** Nothing to do */
+  @Override
+  public void after(DerivationState state, int level, int tailNodeIndex) {}
+  
+  public FeatureVector getFeatures() {
+    return features;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java b/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
new file mode 100644
index 0000000..72b7fc7
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+
+/**
+ * This class visits every node in a forest using a depth-first, preorder traversal, applying the
+ * WalkerFunction to each node. It would be easy to add other traversals if the demand arose.
+ */
+public class ForestWalker {
+
+  public static enum TRAVERSAL {
+    PREORDER, POSTORDER
+  };
+
+  private Set<HGNode> visitedNodes;
+  private TRAVERSAL traversalType = TRAVERSAL.PREORDER;
+
+  public ForestWalker() {
+    visitedNodes = new HashSet<HGNode>();
+  }
+
+  public ForestWalker(TRAVERSAL traversal) {
+    this.traversalType = traversal;
+    visitedNodes = new HashSet<HGNode>();
+  }
+  
+  public void walk(HGNode node, WalkerFunction walker) {
+      walk(node, walker, 0);
+  }
+
+  private void walk(HGNode node, WalkerFunction walker, int nodeIndex) {
+    // short circuit
+    if (visitedNodes.contains(node))
+      return;
+
+    visitedNodes.add(node);
+    
+    if (this.traversalType == TRAVERSAL.PREORDER)
+      walker.apply(node, 0);
+
+    if (node.getHyperEdges() != null) {
+      for (HyperEdge edge : node.getHyperEdges()) {
+        if (edge.getTailNodes() != null) {
+          int tailNodeIndex = 0;
+          for (HGNode tailNode : edge.getTailNodes()) {
+            walk(tailNode, walker, tailNodeIndex);
+            tailNodeIndex++;
+          }
+        }
+      }
+    }
+    
+    if (this.traversalType == TRAVERSAL.POSTORDER)
+      walker.apply(node, nodeIndex);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
new file mode 100644
index 0000000..12e79c5
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.io.PrintStream;
+import java.util.HashSet;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.ff.tm.Grammar;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.ff.tm.format.HieroFormatReader;
+import joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+
+/**
+ * This walker function builds up a new context-free grammar by visiting each node in a hypergraph.
+ * For a quick overview, see Chris Dyer's 2010 NAACL paper
+ * "Two monlingual parses are better than one (synchronous parse)".
+ * <p>
+ * From a functional-programming point of view, this walker really wants to calculate a fold over
+ * the entire hypergraph: the initial value is an empty grammar, and as we visit each node, we add
+ * more rules to the grammar. After we have traversed the whole hypergraph, the resulting grammar
+ * will contain all rules needed for synchronous parsing.
+ * <p>
+ * These rules look just like the rules already present in the hypergraph, except that each
+ * non-terminal symbol is annotated with the span of its node.
+ */
+public class GrammarBuilderWalkerFunction implements WalkerFunction {
+  private MemoryBasedBatchGrammar grammar;
+  private static HieroFormatReader reader = new HieroFormatReader();
+  private PrintStream outStream;
+  private int goalSymbol;
+  private HashSet<Rule> rules;
+
+  public GrammarBuilderWalkerFunction(String goal,JoshuaConfiguration joshuaConfiguration) {
+    grammar = new MemoryBasedBatchGrammar(reader,joshuaConfiguration);
+    grammar.setSpanLimit(1000);
+    outStream = null;
+    goalSymbol = Vocabulary.id(goal);
+    rules = new HashSet<Rule>();
+  }
+
+  public GrammarBuilderWalkerFunction(String goal, PrintStream out,JoshuaConfiguration joshuaConfiguration) {
+    this(goal,joshuaConfiguration);
+    outStream = out;
+  }
+
+  public void apply(HGNode node, int index) {
+    // System.err.printf("VISITING NODE: %s\n", getLabelWithSpan(node));
+    for (HyperEdge e : node.hyperedges) {
+      Rule r = getRuleWithSpans(e, node);
+      if (r != null && !rules.contains(r)) {
+        if (outStream != null) outStream.println(r);
+        grammar.addRule(r);
+        rules.add(r);
+      }
+    }
+  }
+
+  private static int getLabelWithSpan(HGNode node) {
+    return Vocabulary.id(getLabelWithSpanAsString(node));
+  }
+
+  private static String getLabelWithSpanAsString(HGNode node) {
+    String label = Vocabulary.word(node.lhs);
+    String cleanLabel = HieroFormatReader.cleanNonTerminal(label);
+    String unBracketedCleanLabel = cleanLabel.substring(1, cleanLabel.length() - 1);
+    return String.format("[%d-%s-%d]", node.i, unBracketedCleanLabel, node.j);
+  }
+
+  private boolean nodeHasGoalSymbol(HGNode node) {
+    return node.lhs == goalSymbol;
+  }
+
+  private Rule getRuleWithSpans(HyperEdge edge, HGNode head) {
+    Rule edgeRule = edge.getRule();
+    int headLabel = getLabelWithSpan(head);
+    // System.err.printf("Head label: %s\n", headLabel);
+    // if (edge.getAntNodes() != null) {
+    // for (HGNode n : edge.getAntNodes())
+    // System.err.printf("> %s\n", getLabelWithSpan(n));
+    // }
+    int[] source = getNewSource(nodeHasGoalSymbol(head), edge);
+    // if this would be unary abstract, getNewSource will be null
+    if (source == null) return null;
+    int[] target = getNewTargetFromSource(source);
+    Rule result =
+        new Rule(headLabel, source, target, edgeRule.getFeatureString(), edgeRule.getArity());
+    // System.err.printf("new rule is %s\n", result);
+    return result;
+  }
+
+  private static int[] getNewSource(boolean isGlue, HyperEdge edge) {
+    Rule rule = edge.getRule();
+    int[] english = rule.getEnglish();
+    // if this is a unary abstract rule, just return null
+    // TODO: except glue rules!
+    if (english.length == 1 && english[0] < 0 && !isGlue) return null;
+    int[] result = new int[english.length];
+    for (int i = 0; i < english.length; i++) {
+      int curr = english[i];
+      if (!Vocabulary.nt(curr)) {
+				// If it's a terminal symbol, we just copy it into the new rule.
+        result[i] = curr;
+      } else {
+				// If it's a nonterminal, its value is -N, where N is the index
+				// of the nonterminal on the source side.
+				//
+				// That is, if we would call a nonterminal "[X,2]", the value of
+				// curr at this point is -2. And the tail node that it points at
+				// is #1 (since getTailNodes() is 0-indexed).
+        int index = -curr - 1;
+        result[i] = getLabelWithSpan(edge.getTailNodes().get(index));
+      }
+    }
+    // System.err.printf("source: %s\n", result);
+    return result;
+  }
+
+  private static int[] getNewTargetFromSource(int[] source) {
+    int[] result = new int[source.length];
+		int currNT = -1; // value to stick into NT slots
+    for (int i = 0; i < source.length; i++) {
+      result[i] = source[i];
+      if (Vocabulary.nt(result[i])) {
+        result[i] = currNT;
+				currNT--;
+      }
+    }
+    // System.err.printf("target: %s\n", result);
+    return result;
+  }
+
+  private static HGNode getGoalSymbolNode(HGNode root) {
+    if (root.hyperedges == null || root.hyperedges.size() == 0) {
+      System.err.println("getGoalSymbolNode: root node has no hyperedges");
+      return null;
+    }
+    return root.hyperedges.get(0).getTailNodes().get(0);
+  }
+
+
+  public static int goalSymbol(HyperGraph hg) {
+    if (hg.goalNode == null) {
+      System.err.println("goalSymbol: goalNode of hypergraph is null");
+      return -1;
+    }
+    HGNode symbolNode = getGoalSymbolNode(hg.goalNode);
+    if (symbolNode == null) return -1;
+    // System.err.printf("goalSymbol: %s\n", result);
+    // System.err.printf("symbol node LHS is %d\n", symbolNode.lhs);
+    // System.err.printf("i = %d, j = %d\n", symbolNode.i, symbolNode.j);
+    return getLabelWithSpan(symbolNode);
+  }
+
+  public Grammar getGrammar() {
+    return grammar;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
new file mode 100644
index 0000000..c45f40c
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.ff.state_maintenance.DPState;
+
+/**
+ * this class implement Hypergraph node (i.e., HGNode); also known as Item in parsing.
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @author Juri Ganitkevitch, <ju...@cs.jhu.edu>
+ */
+
+// TODO: handle the case that the Hypergraph only maintains the one-best tree
+
+public class HGNode {
+
+  public int i, j;
+
+  // this is the symbol like: NP, VP, and so on
+  public int lhs;
+
+  // each hyperedge is an "and" node
+  public List<HyperEdge> hyperedges = null;
+
+  // used in pruning, compute_item, and transit_to_goal
+  public HyperEdge bestHyperedge = null;
+
+  // the key is the state id; remember the state required by each model, for example, edge-ngrams
+  // for LM model
+  protected List<DPState> dpStates;
+
+  private Signature signature = null;
+//  private int hash = 0;
+
+  protected float score = 0.0f;
+
+  // ===============================================================
+  // Constructors
+  // ===============================================================
+
+  public HGNode(int i, int j, int lhs, List<DPState> dpStates, HyperEdge hyperEdge,
+      float pruningEstimate) {
+    this.lhs = lhs;
+    this.i = i;
+    this.j = j;
+    this.dpStates = dpStates;
+    this.score = pruningEstimate;
+    addHyperedgeInNode(hyperEdge);
+  }
+
+  // used by disk hg
+  public HGNode(int i, int j, int lhs, List<HyperEdge> hyperedges, HyperEdge bestHyperedge,
+      List<DPState> states) {
+    this.i = i;
+    this.j = j;
+    this.lhs = lhs;
+    this.hyperedges = hyperedges;
+    this.bestHyperedge = bestHyperedge;
+    this.dpStates = states;
+  }
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+  public float getScore() {
+    return this.score;
+  }
+  
+  /**
+   * Adds the hyperedge to the list of incoming hyperedges (i.e., ways to form this node), creating
+   * the list if necessary. We then update the cache of the best incoming hyperedge via a call to
+   * the (obscurely named) semiringPlus().
+   */
+  public void addHyperedgeInNode(HyperEdge hyperEdge) {
+    if (hyperEdge != null) {
+      if (null == hyperedges)
+        hyperedges = new ArrayList<HyperEdge>();
+      hyperedges.add(hyperEdge);
+      // Update the cache of this node's best incoming edge.
+      semiringPlus(hyperEdge);
+    }
+  }
+
+  /**
+   * Convenience function to add a list of hyperedges one at a time.
+   */
+  public void addHyperedgesInNode(List<HyperEdge> hyperedges) {
+    for (HyperEdge hyperEdge : hyperedges)
+      addHyperedgeInNode(hyperEdge);
+  }
+
+  /**
+   * Updates the cache of the best incoming hyperedge.
+   */
+  public void semiringPlus(HyperEdge hyperEdge) {
+    if (null == bestHyperedge || bestHyperedge.getBestDerivationScore() < hyperEdge.getBestDerivationScore()) {
+      bestHyperedge = hyperEdge;
+    }
+  }
+
+  public List<DPState> getDPStates() {
+    return dpStates;
+  }
+
+  public DPState getDPState(int i) {
+    if (null == this.dpStates) {
+      return null;
+    } else {
+      return this.dpStates.get(i);
+    }
+  }
+
+  public Signature signature() {
+    if (signature == null)
+      signature = new Signature();
+    return signature;
+  }
+  
+  /*
+   * Including hashCode() and equals() directly in the class causes problems, because the 
+   * virtual node table (in KBestExtractor) does not combine HGNodes.
+   */
+//  @Override
+//  public int hashCode() {
+//    if (hash == 0) {
+//      hash = 31 * lhs + 2399 * i + 7853 * j;
+//      if (null != dpStates && dpStates.size() > 0)
+//        for (DPState dps : dpStates)
+//          hash = hash * 19 + dps.hashCode();
+//    }
+//    return hash;
+//  }
+//
+//  @Override
+//  public boolean equals(Object other) {
+//    if (other instanceof HGNode) {
+//      HGNode that = (HGNode) other;
+//      if (lhs != that.lhs)
+//        return false;
+//      if (i != that.i || j != that.j)
+//        return false;
+//      if (bestHyperedge == null && that.bestHyperedge != null)
+//        return false;
+//      if (bestHyperedge != null && that.bestHyperedge == null)
+//        return false;
+//      if (score != that.score)
+//        return false;
+//      if (dpStates == null)
+//        return (that.dpStates == null);
+//      if (that.dpStates == null)
+//        return false;
+//      if (dpStates.size() != that.dpStates.size())
+//        return false;
+//      for (int i = 0; i < dpStates.size(); i++) {
+//        if (!dpStates.get(i).equals(that.dpStates.get(i)))
+//          return false;
+//      }
+//      return true;
+//    }
+//    return false;
+//  }
+
+  /***
+   * We have different purposes when hashing HGNodes. For dynamic programming, we want to establish
+   * equivalency based on dynamic programming state, but when doing k-best extraction, we need
+   * to maintain a separate entry for every object. The Signature class provides a way to hash
+   * based on the dynamic programming state.
+   */
+  public class Signature {
+    // Cached hash code.
+    private int hash = 0;
+
+    @Override
+    public int hashCode() {
+      if (hash == 0) {
+        hash = 31 * lhs;
+        if (null != dpStates && dpStates.size() > 0)
+          for (DPState dps : dpStates)
+            hash = hash * 19 + dps.hashCode();
+      }
+      return hash;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof Signature) {
+        HGNode that = ((Signature) other).node();
+        if (lhs != that.lhs)
+          return false;
+        if (i != that.i || j != that.j)
+          return false;
+        if (dpStates == null)
+          return (that.dpStates == null);
+        if (that.dpStates == null)
+          return false;
+        if (dpStates.size() != that.dpStates.size())
+          return false;
+        for (int i = 0; i < dpStates.size(); i++) {
+          if (!dpStates.get(i).equals(that.dpStates.get(i)))
+            return false;
+        }
+        return true;
+      }
+      return false;
+    }
+
+    public String toString() {
+      return String.format("%d", hashCode());
+    }
+
+    public HGNode node() {
+      return HGNode.this;
+    }
+  }
+
+  /*
+   * this will called by the sorting in Cell.ensureSorted()
+   */
+  // sort by estTotalLogP: for pruning purpose
+  public int compareTo(HGNode anotherItem) {
+    System.out.println("HGNode, compare functiuon should never be called");
+    System.exit(1);
+    return 0;
+    /*
+     * if (this.estTotalLogP > anotherItem.estTotalLogP) { return -1; } else if (this.estTotalLogP
+     * == anotherItem.estTotalLogP) { return 0; } else { return 1; }
+     */
+
+  }
+
+  /**
+   * This sorts nodes by span, useful when dumping the hypergraph.
+   */
+  public static Comparator<HGNode> spanComparator = new Comparator<HGNode>() {
+    public int compare(HGNode item1, HGNode item2) {
+      int span1 = item1.j - item1.i;
+      int span2 = item2.j - item2.i;
+      if (span1 < span2)
+        return -1;
+      else if (span1 > span2)
+        return 1;
+      else if (item1.i < item2.i)
+        return -1;
+      else if (item1.i > item2.i)
+        return 1;
+      return 0;
+    }
+  };
+
+  public static Comparator<HGNode> inverseLogPComparator = new Comparator<HGNode>() {
+    public int compare(HGNode item1, HGNode item2) {
+      float logp1 = item1.score;
+      float logp2 = item2.score;
+      if (logp1 > logp2) {
+        return -1;
+      } else if (logp1 == logp2) {
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+  };
+
+  /**
+   * natural order
+   * */
+  public static Comparator<HGNode> logPComparator = new Comparator<HGNode>() {
+    public int compare(HGNode item1, HGNode item2) {
+      float logp1 = item1.score;
+      float logp2 = item2.score;
+      if (logp1 > logp2) {
+        return 1;
+      } else if (logp1 == logp2) {
+        return 0;
+      } else {
+        return -1;
+      }
+    }
+  };
+
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+
+    sb.append(String.format("%s (%d,%d) score=%.5f", Vocabulary.word(lhs), i, j,
+        bestHyperedge.getBestDerivationScore()));
+    if (dpStates != null)
+      for (DPState state : dpStates)
+        sb.append(" <" + state + ">");
+
+    // if (this.hyperedges != null) {
+    // sb.append(" hyperedges: " + hyperedges.size());
+    // for (HyperEdge edge: hyperedges) {
+    // sb.append("\n\t" + edge.getRule() + " ||| pathcost=" + edge.getSourcePath() + " ref="+
+    // Integer.toHexString(edge.hashCode()));
+    // }
+    // }
+
+    // sb.append("\n\ttransition score = " + bestHyperedge.getTransitionLogP(true));
+    return sb.toString();
+  }
+
+  public List<HyperEdge> getHyperEdges() {
+    return this.hyperedges;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
new file mode 100644
index 0000000..114908e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.util.List;
+
+import joshua.decoder.chart_parser.SourcePath;
+import joshua.decoder.ff.tm.Rule;
+
+/**
+ * this class implement Hyperedge
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+
+public class HyperEdge {
+
+  /**
+   * the 1-best logP of all possible derivations: best logP of ant hgnodes + transitionlogP
+   **/
+  private float bestDerivationScore = Float.NEGATIVE_INFINITY;
+
+  /**
+   * this remembers the stateless + non_stateless logP assocated with the rule (excluding the
+   * best-logP from ant nodes)
+   * */
+  private Float transitionScore = null;
+
+  private Rule rule;
+
+  private SourcePath srcPath = null;
+
+  /**
+   * If antNodes is null, then this edge corresponds to a rule with zero arity. Aslo, the nodes
+   * appear in the list as per the index of the Foreign side non-terminal
+   * */
+  private List<HGNode> tailNodes = null;
+
+  public HyperEdge(Rule rule, float bestDerivationScore, float transitionScore,
+      List<HGNode> tailNodes, SourcePath srcPath) {
+    this.bestDerivationScore = bestDerivationScore;
+    this.transitionScore = transitionScore;
+    this.rule = rule;
+    this.tailNodes = tailNodes;
+    this.srcPath = srcPath;
+  }
+
+  public Rule getRule() {
+    return rule;
+  }
+  
+  public float getBestDerivationScore() {
+    return bestDerivationScore;
+  }
+
+  public SourcePath getSourcePath() {
+    return srcPath;
+  }
+
+  public List<HGNode> getTailNodes() {
+    return tailNodes;
+  }
+
+  public float getTransitionLogP(boolean forceCompute) {
+    StringBuilder sb = new StringBuilder();
+    if (forceCompute || transitionScore == null) {
+      float res = bestDerivationScore;
+      sb.append(String.format("Best derivation = %.5f", res));
+      if (tailNodes != null) for (HGNode tailNode : tailNodes) {
+        res += tailNode.bestHyperedge.bestDerivationScore;
+        sb.append(String.format(", tail = %.5f", tailNode.bestHyperedge.bestDerivationScore));
+      }
+      transitionScore = res;
+    }
+    // System.err.println("HYPEREDGE SCORE = " + sb.toString());
+    return transitionScore;
+  }
+
+  public void setTransitionLogP(float transitionLogP) {
+    this.transitionScore = transitionLogP;
+  }
+
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append(this.rule);
+//    if (getTailNodes() != null) for (HGNode tailNode : getTailNodes()) {
+//      sb.append(" tail=" + tailNode);
+//    }
+    return sb.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
new file mode 100644
index 0000000..003c930
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.logging.Logger;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.chart_parser.ComputeNodeResult;
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.hypergraph.ForestWalker.TRAVERSAL;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * this class implement (1) HyperGraph-related data structures (Item and Hyper-edges)
+ * 
+ * Note: to seed the kbest extraction, each deduction should have the best_cost properly set. We do
+ * not require any list being sorted
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ */
+public class HyperGraph {
+
+  // pointer to goal HGNode
+  public HGNode goalNode = null;
+
+  public int numNodes = -1;
+  public int numEdges = -1;
+  public Sentence sentence = null;
+
+  static final Logger logger = Logger.getLogger(HyperGraph.class.getName());
+
+  public HyperGraph(HGNode goalNode, int numNodes, int numEdges, Sentence sentence) {
+    this.goalNode = goalNode;
+    this.numNodes = numNodes;
+    this.numEdges = numEdges;
+    this.sentence = sentence;
+  }
+  
+  public void count() {
+    new ForestWalker().walk(this.goalNode, new HyperGraphCounter(this));
+  }
+  
+  public int sentID() {
+    return sentence.id();
+  }
+  
+  public int sentLen() {
+    return sentence.length();
+  }
+  
+  private class HyperGraphCounter implements WalkerFunction {
+
+    private HyperGraph hg = null;
+    private HashSet<HGNode> nodesVisited = null;
+    
+    public HyperGraphCounter(HyperGraph hg) {
+      this.hg = hg;
+      this.hg.numNodes = 0;
+      this.hg.numEdges = 0;
+      this.nodesVisited = new HashSet<HGNode>();
+    }
+    
+    @Override
+    public void apply(HGNode node, int index) {
+      if (! nodesVisited.contains(node)) {
+        if (node.bestHyperedge.getRule() != null) {
+          hg.numNodes++;
+          if (node.hyperedges != null)
+            hg.numEdges += node.hyperedges.size();
+        }
+      }
+    }
+  }
+
+  private class HyperGraphDumper implements WalkerFunction {
+
+    private int node_number = 1;
+    private List<FeatureFunction> model = null;
+    private PrintWriter out = null;
+    
+    private HashMap<HGNode, Integer> nodeMap;
+    
+    public HyperGraphDumper(PrintWriter out, List<FeatureFunction> model) {
+      this.out = out;
+      this.model = model;
+      this.nodeMap = new HashMap<HGNode, Integer>();
+    }
+    
+    @Override
+    public void apply(HGNode node, int index) {
+      if (! nodeMap.containsKey(node)) { // Make sure each node is listed only once
+        nodeMap.put(node,  this.node_number);
+
+        if (node.hyperedges.size() != 0 && node.bestHyperedge.getRule() != null) {
+          out.println(this.node_number);
+          for (HyperEdge e: node.hyperedges) {
+            if (e.getRule() != null) {
+              for (int id: e.getRule().getEnglish()) {
+                if (id < 0) {
+                  out.print(String.format("[%d] ", nodeMap.get(e.getTailNodes().get(-id-1))));
+                } else {
+                  out.print(String.format("%s ", Vocabulary.word(id)));
+                }
+              }
+
+              FeatureVector edgeFeatures = ComputeNodeResult.computeTransitionFeatures(
+                  model, e, node.i, node.j, sentence);
+              out.println(String.format("||| %s", edgeFeatures));
+            }
+          }
+        }
+        
+        this.node_number++;
+      }
+    }
+  }
+  
+  /**
+   * Dump the hypergraph to the specified file.
+   * 
+   * @param fileName
+   */
+  public void dump(String fileName, List<FeatureFunction> model) {
+    try ( PrintWriter out = new PrintWriter(fileName, "UTF-8") ) {
+      count();
+      out.println("# target ||| features");
+      out.println(String.format("%d %d", numNodes, numEdges));
+      new ForestWalker(TRAVERSAL.POSTORDER).walk(this.goalNode, new HyperGraphDumper(out, model));
+    } catch (IOException e) {
+      System.err.println("* Can't dump hypergraph to file '" + fileName + "'");
+      e.printStackTrace();
+    }
+  }
+
+  public float bestScore() {
+    return this.goalNode.bestHyperedge.getBestDerivationScore();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
new file mode 100644
index 0000000..98b97d3
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.util.HashMap;
+
+import joshua.corpus.Vocabulary;
+
+/**
+ * during the pruning process, many Item/Deductions may not be explored at all due to the early-stop
+ * in pruning_deduction
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @version $LastChangedDate$
+ */
+public class HyperGraphPruning extends TrivialInsideOutside {
+
+  HashMap<HGNode, Boolean> processedNodesTbl = new HashMap<HGNode, Boolean>();
+  double bestLogProb;// viterbi unnormalized log prob in the hypergraph
+
+  boolean ViterbiPruning = false;// Viterbi or Posterior pruning
+
+  boolean fixThresholdPruning = true;
+  double THRESHOLD_GENERAL = 10;// if the merit is worse than the best_log_prob by this number, then
+                                // prune
+  double THRESHOLD_GLUE = 10;// if the merit is worse than the best_log_prob by this number, then
+                             // prune
+
+  int numSurvivedEdges = 0;
+  int numSurvivedNodes = 0;
+
+  int glueGrammarOwner = 0;// TODO
+
+
+  public HyperGraphPruning(boolean fixThreshold, double thresholdGeneral, double thresholdGlue) {
+    fixThresholdPruning = fixThreshold;
+    THRESHOLD_GENERAL = thresholdGeneral;
+    THRESHOLD_GLUE = thresholdGlue;
+    glueGrammarOwner = Vocabulary.id("glue");// TODO
+  }
+
+  public void clearState() {
+    processedNodesTbl.clear();
+    super.clearState();
+  }
+
+
+  // ######################### pruning here ##############
+  public void pruningHG(HyperGraph hg) {
+
+    runInsideOutside(hg, 2, 1, 1.0);// viterbi-max, log-semiring
+
+    if (fixThresholdPruning) {
+      pruningHGHelper(hg);
+      super.clearState();
+    } else {
+      throw new RuntimeException("wrong call");
+    }
+  }
+
+  private void pruningHGHelper(HyperGraph hg) {
+
+    this.bestLogProb = getLogNormalizationConstant();// set the best_log_prob
+
+    numSurvivedEdges = 0;
+    numSurvivedNodes = 0;
+    processedNodesTbl.clear();
+    pruningNode(hg.goalNode);
+
+    // clear up
+    processedNodesTbl.clear();
+
+    System.out.println("Item suvived ratio: " + numSurvivedNodes * 1.0 / hg.numNodes + " =  "
+        + numSurvivedNodes + "/" + hg.numNodes);
+    System.out.println("Deduct suvived ratio: " + numSurvivedEdges * 1.0 / hg.numEdges + " =  "
+        + numSurvivedEdges + "/" + hg.numEdges);
+  }
+
+
+  private void pruningNode(HGNode it) {
+
+    if (processedNodesTbl.containsKey(it)) return;
+
+    processedNodesTbl.put(it, true);
+    boolean shouldSurvive = false;
+
+    // ### recursive call on each deduction
+    for (int i = 0; i < it.hyperedges.size(); i++) {
+      HyperEdge dt = it.hyperedges.get(i);
+      boolean survived = pruningEdge(dt, it);// deduction-specifc operation
+      if (survived) {
+        shouldSurvive = true; // at least one deduction survive
+      } else {
+        it.hyperedges.remove(i);
+        i--;
+      }
+    }
+    // TODO: now we simply remove the pruned deductions, but in general, we may want to update the
+    // variables mainted in the item (e.g., best_deduction); this depends on the pruning method used
+
+    /*
+     * by defintion: "should_surive==false" should be impossible, since if I got called, then my
+     * upper-deduction must survive, then i will survive because there must be one way to reach me
+     * from lower part in order for my upper-deduction survive
+     */
+    if (!shouldSurvive) {
+      throw new RuntimeException("item explored but does not survive");
+      // TODO: since we always keep the best_deduction, this should never be true
+    } else {
+      numSurvivedNodes++;
+    }
+  }
+
+
+  // if survive, return true
+  // best-deduction is always kept
+  private boolean pruningEdge(HyperEdge dt, HGNode parent) {
+
+    /**
+     * TODO: theoretically, if an item is get called, then its best deduction should always be kept
+     * even just by the threshold-checling. In reality, due to precision of Double, the
+     * threshold-checking may not be perfect
+     */
+    if (dt != parent.bestHyperedge) { // best deduction should always survive if the Item is get
+                                      // called
+      // ### prune?
+      if (shouldPruneHyperedge(dt, parent)) {
+        return false; // early stop
+      }
+    }
+
+    // ### still survive, recursive call all my ant-items
+    if (null != dt.getTailNodes()) {
+      for (HGNode ant_it : dt.getTailNodes()) {
+        pruningNode(ant_it); // recursive call on each ant item, note: the ant_it will not be pruned
+                             // as I need it
+      }
+    }
+
+    // ### if get to here, then survive; remember: if I survive, then my upper-item must survive
+    numSurvivedEdges++;
+    return true; // survive
+  }
+
+  private boolean shouldPruneHyperedge(HyperEdge dt, HGNode parent) {
+
+    // ### get merit
+    double postLogProb = getEdgeUnormalizedPosteriorLogProb(dt, parent);
+
+
+    if (dt.getRule() != null && dt.getRule().getOwner() == glueGrammarOwner
+        && dt.getRule().getArity() == 2) { // specicial rule: S->S X
+      // TODO
+      return (postLogProb - this.bestLogProb < THRESHOLD_GLUE);
+    } else {
+      return (postLogProb - this.bestLogProb < THRESHOLD_GENERAL);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
new file mode 100644
index 0000000..6dd3207
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
@@ -0,0 +1,1006 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import static joshua.util.FormatUtils.unescapeSpecialSymbols;
+import static joshua.util.FormatUtils.removeSentenceMarkers;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.BLEU;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.ff.fragmentlm.Tree;
+import joshua.decoder.ff.state_maintenance.DPState;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.io.DeNormalize;
+import joshua.decoder.segment_file.Sentence;
+import joshua.decoder.segment_file.Token;
+import joshua.util.FormatUtils;
+
+/**
+ * This class implements lazy k-best extraction on a hyper-graph.
+ * 
+ * K-best extraction over hypergraphs is a little hairy, but is best understood in the following
+ * manner. Imagine a hypergraph, which is composed of nodes connected by hyperedges. A hyperedge has
+ * exactly one parent node and 1 or more tail nodes, corresponding to the rank of the rule that gave
+ * rise to the hyperedge. Each node has 1 or more incoming hyperedges.
+ * 
+ * K-best extraction works in the following manner. A derivation is a set of nodes and hyperedges
+ * that leads from the root node down and exactly covers the source-side sentence. To define a
+ * derivation, we start at the root node, choose one of its incoming hyperedges, and then recurse to
+ * the tail (or antecedent) nodes of that hyperedge, where we continually make the same decision.
+ * 
+ * Each hypernode has its hyperedges sorted according to their model score. To get the best
+ * (Viterbi) derivation, we simply recursively follow the best hyperedge coming in to each
+ * hypernode.
+ * 
+ * How do we get the second-best derivation? It is defined by changing exactly one of the decisions
+ * about which hyperedge to follow in the recursion. Somewhere, we take the second-best. Similarly,
+ * the third-best derivation makes a single change from the second-best: either making another
+ * (differnt) second-best choice somewhere along the 1-best derivation, or taking the third-best
+ * choice at the same spot where the second-best derivation took the second-best choice. And so on.
+ * 
+ * This class uses two classes that encode the necessary meta-information. The first is the
+ * DerivationState class. It roughly corresponds to a hyperedge, and records, for each of that
+ * hyperedge's tail nodes, which-best to take. So for a hyperedge with three tail nodes, the 1-best
+ * derivation will be (1,1,1), the second-best will be one of (2,1,1), (1,2,1), or (1,1,2), the
+ * third best will be one of
+ * 
+ * (3,1,1), (2,2,1), (1,1,3)
+ * 
+ * and so on.
+ * 
+ * The configuration parameter `output-format` controls what exactly is extracted from the forest.
+ * See documentation for that below. Note that Joshua does not store individual feature values while 
+ * decoding, but only the cost of each edge (in the form of a float). Therefore, if you request
+ * the features values (`%f` in `output-format`), the feature functions must be replayed, which
+ * is expensive.
+ * 
+ * The configuration parameter `top-n` controls how many items are returned. If this is set to 0,
+ * k-best extraction should be turned off entirely.
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+public class KBestExtractor {
+  private final JoshuaConfiguration joshuaConfiguration;
+  private final String outputFormat;
+  private final HashMap<HGNode, VirtualNode> virtualNodesTable = new HashMap<HGNode, VirtualNode>();
+
+  // static final String rootSym = JoshuaConfiguration.goal_symbol;
+  static final String rootSym = "ROOT";
+  static final int rootID = Vocabulary.id(rootSym);
+
+  private enum Side {
+    SOURCE, TARGET
+  };
+
+  /* Whether to extract only unique strings */
+  private final boolean extractUniqueNbest;
+
+  /* Which side to output (source or target) */
+  private final Side defaultSide;
+
+  /* The input sentence */
+  private final Sentence sentence;
+
+  /* The weights being used to score the forest */
+  private final FeatureVector weights;
+
+  /* The feature functions */
+  private final List<FeatureFunction> featureFunctions;
+
+  /* BLEU statistics of the references */
+  private BLEU.References references = null;
+
+  public KBestExtractor(
+      Sentence sentence,
+      List<FeatureFunction> featureFunctions,
+      FeatureVector weights,
+      boolean isMonolingual,
+      JoshuaConfiguration joshuaConfiguration) {
+
+    this.featureFunctions = featureFunctions;
+
+    this.joshuaConfiguration = joshuaConfiguration;
+    this.outputFormat = this.joshuaConfiguration.outputFormat;
+    this.extractUniqueNbest = joshuaConfiguration.use_unique_nbest;
+
+    this.weights = weights;
+    this.defaultSide = (isMonolingual ? Side.SOURCE : Side.TARGET);
+    this.sentence = sentence;
+
+    if (joshuaConfiguration.rescoreForest) {
+      references = new BLEU.References(sentence.references());
+    }
+  }
+
+  /**
+   * Returns the kth derivation.
+   * 
+   * You may need to reset_state() before you call this function for the first time.
+   * 
+   * @param node the node to start at
+   * @param k the kth best derivation (indexed from 1)
+   * @return the derivation object
+   */
+  public DerivationState getKthDerivation(HGNode node, int k) {
+    VirtualNode virtualNode = getVirtualNode(node);
+    return virtualNode.lazyKBestExtractOnNode(this, k);
+  }
+  
+  /**
+   * Compute the string that is output from the decoder, using the "output-format" config file
+   * parameter as a template.
+   * 
+   * You may need to reset_state() before you call this function for the first time.
+   */
+  public String getKthHyp(HGNode node, int k) {
+
+    String outputString = null;
+    
+    // Determine the k-best hypotheses at each HGNode
+    VirtualNode virtualNode = getVirtualNode(node);
+    DerivationState derivationState = virtualNode.lazyKBestExtractOnNode(this, k);
+//    DerivationState derivationState = getKthDerivation(node, k);
+    if (derivationState != null) {
+      // ==== read the kbest from each hgnode and convert to output format
+      String hypothesis = maybeProjectCase(
+                            unescapeSpecialSymbols(
+                              removeSentenceMarkers(
+                                derivationState.getHypothesis())), derivationState);
+      
+      
+      /*
+       * To save space, the decoder only stores the model cost,
+       * no the individual feature values.
+       * If you want to output them, you have to replay them.
+       */
+
+      FeatureVector features = new FeatureVector();
+      if (outputFormat.contains("%f") || outputFormat.contains("%d"))
+        features = derivationState.getFeatures();
+
+      outputString = outputFormat
+          .replace("%k", Integer.toString(k))
+          .replace("%s", hypothesis)
+          .replace("%S", DeNormalize.processSingleLine(hypothesis))
+          // TODO (kellens): Fix the recapitalization here
+          .replace("%i", Integer.toString(sentence.id()))
+          .replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString())
+          .replace("%c", String.format("%.3f", derivationState.cost));
+
+      if (outputFormat.contains("%t")) {
+        outputString = outputString.replace("%t", derivationState.getTree());
+      }
+
+      if (outputFormat.contains("%e")) {
+        outputString = outputString.replace("%e", removeSentenceMarkers(derivationState.getHypothesis(Side.SOURCE)));
+      }
+
+      /* %d causes a derivation with rules one per line to be output */
+      if (outputFormat.contains("%d")) {
+        outputString = outputString.replace("%d", derivationState.getDerivation());
+      }
+      
+      /* %a causes output of word level alignments between input and output hypothesis */
+      if (outputFormat.contains("%a")) {
+        outputString = outputString.replace("%a",  derivationState.getWordAlignmentString());
+      }
+      
+    }
+
+    return outputString;
+  }
+
+  // =========================== end kbestHypergraph
+
+  /**
+   * If requested, projects source-side lettercase to target, and appends the alignment from
+   * to the source-side sentence in ||s.
+   * 
+   * @param hypothesis
+   * @param state
+   * @return
+   */
+  private String maybeProjectCase(String hypothesis, DerivationState state) {
+    String output = hypothesis;
+
+    if (joshuaConfiguration.project_case) {
+      String[] tokens = hypothesis.split("\\s+");
+      List<List<Integer>> points = state.getWordAlignment();
+      for (int i = 0; i < points.size(); i++) {
+        List<Integer> target = points.get(i);
+        for (int source: target) {
+          Token token = sentence.getTokens().get(source + 1); // skip <s>
+          String annotation = "";
+          if (token != null && token.getAnnotation("lettercase") != null)
+            annotation = token.getAnnotation("lettercase");
+          if (source != 0 && annotation.equals("upper"))
+            tokens[i] = FormatUtils.capitalize(tokens[i]);
+          else if (annotation.equals("all-upper"))
+            tokens[i] = tokens[i].toUpperCase();
+        }
+      }
+
+      output = String.join(" ",  tokens);
+    }
+
+    return output;
+  }
+
+  /**
+   * Convenience function for k-best extraction that prints to STDOUT.
+   */
+  public void lazyKBestExtractOnHG(HyperGraph hg, int topN) throws IOException {
+    lazyKBestExtractOnHG(hg, topN, new BufferedWriter(new OutputStreamWriter(System.out)));
+  }
+
+  /**
+   * This is the entry point for extracting k-best hypotheses. It computes all of them, writing
+   * the results to the BufferedWriter passed in. If you want intermediate access to the k-best
+   * derivations, you'll want to call getKthHyp() or getKthDerivation() directly.
+   * 
+   * The number of derivations that are looked for is controlled by the `top-n` parameter.
+   * Note that when `top-n` is set to 0, k-best extraction is disabled entirely, and only things 
+   * like the viterbi string and the model score are available to the decoder. Since k-best
+   * extraction involves the recomputation of features to get the component values, turning off
+   * that extraction saves a lot of time when only the 1-best string is desired.
+   * 
+   * @param hg the hypergraph to extract from
+   * @param topN how many to extract
+   * @param out object to write to
+   * @throws IOException
+   */
+  public void lazyKBestExtractOnHG(HyperGraph hg, int topN, BufferedWriter out) throws IOException {
+
+    resetState();
+
+    if (null == hg.goalNode)
+      return;
+
+    for (int k = 1; k <= topN; k++) {
+      String hypStr = getKthHyp(hg.goalNode, k);
+      if (null == hypStr)
+        break;
+
+      out.write(hypStr);
+      out.write("\n");
+      out.flush();
+    }
+  }
+
+  /**
+   * This clears the virtualNodesTable, which maintains a list of virtual nodes. This should be
+   * called in between forest rescorings.
+   */
+  public void resetState() {
+    virtualNodesTable.clear();
+  }
+
+  /**
+   * Returns the VirtualNode corresponding to an HGNode. If no such VirtualNode exists, it is
+   * created.
+   * 
+   * @param hgnode
+   * @return the corresponding VirtualNode
+   */
+  private VirtualNode getVirtualNode(HGNode hgnode) {
+    VirtualNode virtualNode = virtualNodesTable.get(hgnode);
+    if (null == virtualNode) {
+      virtualNode = new VirtualNode(hgnode);
+      virtualNodesTable.put(hgnode, virtualNode);
+    }
+    return virtualNode;
+  }
+
+  /**
+   * This class is essentially a wrapper around an HGNode, annotating it with information needed to
+   * record which hypotheses have been explored from this point. There is one virtual node for
+   * each HGNode in the underlying hypergraph. This VirtualNode maintains information about the
+   * k-best derivations from that point on, retaining the derivations computed so far and a priority 
+   * queue of candidates.
+   */
+
+  private class VirtualNode {
+
+    // The node being annotated.
+    HGNode node = null;
+
+    // sorted ArrayList of DerivationState, in the paper is: D(^) [v]
+    public List<DerivationState> nbests = new ArrayList<DerivationState>();
+
+    // remember frontier states, best-first; in the paper, it is called cand[v]
+    private PriorityQueue<DerivationState> candHeap = null;
+
+    // Remember which DerivationState has been explored (positions in the hypercube). This allows
+    // us to avoid duplicated states that are reached from different places of expansion, e.g.,
+    // position (2,2) can be reached be extending (1,2) and (2,1).
+    private HashSet<DerivationState> derivationTable = null;
+
+    // This records unique *strings* at each item, used for unique-nbest-string extraction.
+    private HashSet<String> uniqueStringsTable = null;
+
+    public VirtualNode(HGNode it) {
+      this.node = it;
+    }
+
+    /**
+     * This returns a DerivationState corresponding to the kth-best derivation rooted at this node.
+     * 
+     * @param kbestExtractor
+     * @param k (indexed from one)
+     * @return the k-th best (1-indexed) hypothesis, or null if there are no more.
+     */
+    // return: the k-th hyp or null; k is started from one
+    private DerivationState lazyKBestExtractOnNode(KBestExtractor kbestExtractor, int k) {
+      if (nbests.size() >= k) { // no need to continue
+        return nbests.get(k - 1);
+      }
+
+      // ### we need to fill in the l_nest in order to get k-th hyp
+      DerivationState derivationState = null;
+
+      /*
+       * The first time this is called, the heap of candidates (the frontier of the cube) is
+       * uninitialized. This recursive call will seed the candidates at each node.
+       */
+      if (null == candHeap) {
+        getCandidates(kbestExtractor);
+      }
+
+      /*
+       * Now build the kbest list by repeatedly popping the best candidate and then placing all
+       * extensions of that hypothesis back on the candidates list.
+       */
+      int tAdded = 0; // sanity check
+      while (nbests.size() < k) {
+        if (candHeap.size() > 0) {
+          derivationState = candHeap.poll();
+          // derivation_tbl.remove(res.get_signature());//TODO: should remove? note that two state
+          // may be tied because the cost is the same
+          if (extractUniqueNbest) {
+            // We pass false for extract_nbest_tree because we want; to check that the hypothesis
+            // *strings* are unique, not the trees.
+            final String res_str = derivationState.getHypothesis();
+            
+            if (!uniqueStringsTable.contains(res_str)) {
+              nbests.add(derivationState);
+              uniqueStringsTable.add(res_str);
+            }
+          } else {
+            nbests.add(derivationState);
+          }
+
+          // Add all extensions of this hypothesis to the candidates list.
+          lazyNext(kbestExtractor, derivationState);
+
+          // debug: sanity check
+          tAdded++;
+          // this is possible only when extracting unique nbest
+          if (!extractUniqueNbest && tAdded > 1) {
+            throw new RuntimeException("In lazyKBestExtractOnNode, add more than one time, k is "
+                + k);
+          }
+        } else {
+          break;
+        }
+      }
+      if (nbests.size() < k) {
+        derivationState = null;// in case we do not get to the depth of k
+      }
+      // debug: sanity check
+      // if (l_nbest.size() >= k && l_nbest.get(k-1) != res) {
+      // throw new RuntimeException("In lazy_k_best_extract, ranking is not correct ");
+      // }
+
+      return derivationState;
+    }
+
+    /**
+     * This function extends the current hypothesis, adding each extended item to the list of
+     * candidates (assuming they have not been added before). It does this by, in turn, extending
+     * each of the tail node items.
+     * 
+     * @param kbestExtractor
+     * @param previousState
+     */
+    private void lazyNext(KBestExtractor kbestExtractor, DerivationState previousState) {
+      /* If there are no tail nodes, there is nothing to do. */
+      if (null == previousState.edge.getTailNodes())
+        return;
+
+      /* For each tail node, create a new state candidate by "sliding" that item one position. */
+      for (int i = 0; i < previousState.edge.getTailNodes().size(); i++) {
+        /* Create a new virtual node that is a copy of the current node */
+        HGNode tailNode = (HGNode) previousState.edge.getTailNodes().get(i);
+        VirtualNode virtualTailNode = kbestExtractor.getVirtualNode(tailNode);
+        // Copy over the ranks.
+        int[] newRanks = new int[previousState.ranks.length];
+        for (int c = 0; c < newRanks.length; c++) {
+          newRanks[c] = previousState.ranks[c];
+        }
+        // Now increment/slide the current tail node by one
+        newRanks[i] = previousState.ranks[i] + 1;
+
+        // Create a new state so we can see if it's new. The cost will be set below if it is.
+        DerivationState nextState = new DerivationState(previousState.parentNode,
+            previousState.edge, newRanks, 0.0f, previousState.edgePos);
+
+        // Don't add the state to the list of candidates if it's already been added.
+        if (!derivationTable.contains(nextState)) {
+          // Make sure that next candidate exists
+          virtualTailNode.lazyKBestExtractOnNode(kbestExtractor, newRanks[i]);
+          // System.err.println(String.format("  newRanks[%d] = %d and tail size %d", i,
+          // newRanks[i], virtualTailNode.nbests.size()));
+          if (newRanks[i] <= virtualTailNode.nbests.size()) {
+            // System.err.println("NODE: " + this.node);
+            // System.err.println("  tail is " + virtualTailNode.node);
+            float cost = previousState.getModelCost()
+                - virtualTailNode.nbests.get(previousState.ranks[i] - 1).getModelCost()
+                + virtualTailNode.nbests.get(newRanks[i] - 1).getModelCost();
+            nextState.setCost(cost);
+
+            if (joshuaConfiguration.rescoreForest)
+              nextState.bleu = nextState.computeBLEU();
+
+            candHeap.add(nextState);
+            derivationTable.add(nextState);
+
+            // System.err.println(String.format("  LAZYNEXT(%s", nextState));
+          }
+        }
+      }
+    }
+
+    /**
+     * this is the seeding function, for example, it will get down to the leaf, and sort the
+     * terminals get a 1best from each hyperedge, and add them into the heap_cands
+     * 
+     * @param kbestExtractor
+     */
+    private void getCandidates(KBestExtractor kbestExtractor) {
+      /* The list of candidates extending from this (virtual) node. */
+      candHeap = new PriorityQueue<DerivationState>(11, new DerivationStateComparator());
+
+      /*
+       * When exploring the cube frontier, there are multiple paths to each candidate. For example,
+       * going down 1 from grid position (2,1) is the same as going right 1 from grid position
+       * (1,2). To avoid adding states more than once, we keep a list of derivation states we have
+       * already added to the candidates heap.
+       * 
+       * TODO: these should really be keyed on the states themselves instead of a string
+       * representation of them.
+       */
+      derivationTable = new HashSet<DerivationState>();
+
+      /*
+       * A Joshua configuration option allows the decoder to output only unique strings. In that
+       * case, we keep an list of the frontiers of derivation states extending from this node.
+       */
+      if (extractUniqueNbest) {
+        uniqueStringsTable = new HashSet<String>();
+      }
+
+      /*
+       * Get the single-best derivation along each of the incoming hyperedges, and add the lot of
+       * them to the priority queue of candidates in the form of DerivationState objects.
+       * 
+       * Note that since the hyperedges are not sorted according to score, the first derivation
+       * computed here may not be the best. But since the loop over all hyperedges seeds the entire
+       * candidates list with the one-best along each of them, when the candidate heap is polled
+       * afterwards, we are guaranteed to have the best one.
+       */
+      int pos = 0;
+      for (HyperEdge edge : node.hyperedges) {
+        DerivationState bestState = getBestDerivation(kbestExtractor, node, edge, pos);
+        // why duplicate, e.g., 1 2 + 1 0 == 2 1 + 0 1 , but here we should not get duplicate
+        if (!derivationTable.contains(bestState)) {
+          candHeap.add(bestState);
+          derivationTable.add(bestState);
+        } else { // sanity check
+          throw new RuntimeException(
+              "get duplicate derivation in get_candidates, this should not happen"
+                  + "\nsignature is " + bestState + "\nl_hyperedge size is "
+                  + node.hyperedges.size());
+        }
+        pos++;
+      }
+
+      // TODO: if tem.size is too large, this may cause unnecessary computation, we comment the
+      // segment to accommodate the unique nbest extraction
+      /*
+       * if(tem.size()>global_n){ heap_cands=new PriorityQueue<DerivationState>(new DerivationStateComparator()); for(int i=1;
+       * i<=global_n; i++) heap_cands.add(tem.poll()); }else heap_cands=tem;
+       */
+    }
+
+    // get my best derivation, and recursively add 1best for all my children, used by get_candidates
+    // only
+    /**
+     * This computes the best derivation along a particular hyperedge. It is only called by
+     * getCandidates() to initialize the candidates priority queue at each (virtual) node.
+     * 
+     * @param kbestExtractor
+     * @param parentNode
+     * @param hyperEdge
+     * @param edgePos
+     * @return an object representing the best derivation from this node
+     */
+    private DerivationState getBestDerivation(KBestExtractor kbestExtractor, HGNode parentNode,
+        HyperEdge hyperEdge, int edgePos) {
+      int[] ranks;
+      float cost = 0.0f;
+
+      /*
+       * There are two cases: (1) leaf nodes and (2) internal nodes. A leaf node is represented by a
+       * hyperedge with no tail nodes.
+       */
+      if (hyperEdge.getTailNodes() == null) {
+        ranks = null;
+
+      } else {
+        // "ranks" records which derivation to take at each of the tail nodes. Ranks are 1-indexed.
+        ranks = new int[hyperEdge.getTailNodes().size()];
+
+        /* Initialize the one-best at each tail node. */
+        for (int i = 0; i < hyperEdge.getTailNodes().size(); i++) { // children is ready
+          ranks[i] = 1;
+          VirtualNode childVirtualNode = kbestExtractor.getVirtualNode(hyperEdge.getTailNodes()
+              .get(i));
+          // recurse
+          childVirtualNode.lazyKBestExtractOnNode(kbestExtractor, ranks[i]);
+        }
+      }
+      cost = (float) hyperEdge.getBestDerivationScore();
+
+      DerivationState state = new DerivationState(parentNode, hyperEdge, ranks, cost, edgePos);
+      if (joshuaConfiguration.rescoreForest)
+        state.bleu = state.computeBLEU();
+
+      return state;
+    }
+  };
+
+  /**
+   * A DerivationState describes which path to follow through the hypergraph. For example, it
+   * might say to use the 1-best from the first tail node, the 9th-best from the second tail node,
+   * and so on. This information is represented recursively through a chain of DerivationState
+   * objects. This function follows that chain, extracting the information according to a number
+   * of parameters, and returning results to a string, and also (optionally) accumulating the
+   * feature values into the passed-in FeatureVector.
+   */
+
+  // each DerivationState roughly corresponds to a hypothesis
+  public class DerivationState {
+    /* The edge ("e" in the paper) */
+    public HyperEdge edge;
+
+    /* The edge's parent node */
+    public HGNode parentNode;
+
+    /*
+     * This state's position in its parent node's list of incoming hyperedges (used in signature
+     * calculation)
+     */
+    public int edgePos;
+
+    /*
+     * The rank item to select from each of the incoming tail nodes ("j" in the paper, an ArrayList
+     * of size |e|)
+     */
+    public int[] ranks;
+
+    /*
+     * The cost of the hypothesis, including a weighted BLEU score, if any.
+     */
+    private float cost;
+
+    private float bleu = 0.0f;
+
+    /*
+     * The BLEU sufficient statistics associated with the edge's derivation. Note that this is a
+     * function of the complete derivation headed by the edge, i.e., all the particular
+     * subderivations of edges beneath it. That is why it must be contained in DerivationState
+     * instead of in the HyperEdge itself.
+     */
+    BLEU.Stats stats = null;
+
+    public DerivationState(HGNode pa, HyperEdge e, int[] r, float c, int pos) {
+      parentNode = pa;
+      edge = e;
+      ranks = r;
+      cost = c;
+      edgePos = pos;
+      bleu = 0.0f;
+    }
+
+    /**
+     * Computes a scaled approximate BLEU from the accumulated statistics. We know the number of
+     * words; to compute the effective reference length, we take the real reference length statistic
+     * and scale it by the percentage of the input sentence that is consumed, based on the
+     * assumption that the total number of words in the hypothesis scales linearly with the input
+     * sentence span.
+     * 
+     * @return
+     */
+    public float computeBLEU() {
+      if (stats == null) {
+        float percentage = 1.0f * (parentNode.j - parentNode.i) / (sentence.length());
+        // System.err.println(String.format("computeBLEU: (%d - %d) / %d = %f", parentNode.j,
+        // parentNode.i, sentence.length(), percentage));
+        stats = BLEU.compute(edge, percentage, references);
+
+        if (edge.getTailNodes() != null) {
+          for (int id = 0; id < edge.getTailNodes().size(); id++) {
+            stats.add(getChildDerivationState(edge, id).stats);
+          }
+        }
+      }
+
+      return BLEU.score(stats);
+    }
+
+    public void setCost(float cost2) {
+      this.cost = cost2;
+    }
+
+    /**
+     * Returns the model cost. This is obtained by subtracting off the incorporated BLEU score (if
+     * used).
+     * 
+     * @return
+     */
+    public float getModelCost() {
+      return this.cost;
+    }
+
+    /**
+     * Returns the model cost plus the BLEU score.
+     * 
+     * @return
+     */
+    public float getCost() {
+      return cost - weights.getSparse("BLEU") * bleu;
+    }
+
+    public String toString() {
+      StringBuilder sb = new StringBuilder(String.format("DS[[ %s (%d,%d)/%d ||| ",
+          Vocabulary.word(parentNode.lhs), parentNode.i, parentNode.j, edgePos));
+      sb.append("ranks=[ ");
+      if (ranks != null)
+        for (int i = 0; i < ranks.length; i++)
+          sb.append(ranks[i] + " ");
+      sb.append("] ||| " + String.format("%.5f ]]", cost));
+      return sb.toString();
+    }
+
+    public boolean equals(Object other) {
+      if (other instanceof DerivationState) {
+        DerivationState that = (DerivationState) other;
+        if (edgePos == that.edgePos) {
+          if (ranks != null && that.ranks != null) {
+            if (ranks.length == that.ranks.length) {
+              for (int i = 0; i < ranks.length; i++)
+                if (ranks[i] != that.ranks[i])
+                  return false;
+              return true;
+            }
+          }
+        }
+      }
+
+      return false;
+    }
+
+    /**
+     * DerivationState objects are unique to each VirtualNode, so the unique identifying information
+     * only need contain the edge position and the ranks.
+     */
+    public int hashCode() {
+      int hash = edgePos;
+      if (ranks != null) {
+        for (int i = 0; i < ranks.length; i++)
+          hash = hash * 53 + i;
+      }
+
+      return hash;
+    }
+
+    /**
+     * Visits every state in the derivation in a depth-first order.
+     */
+    private DerivationVisitor visit(DerivationVisitor visitor) {
+      return visit(visitor, 0, 0);
+    }
+
+    private DerivationVisitor visit(DerivationVisitor visitor, int indent, int tailNodeIndex) {
+
+      visitor.before(this, indent, tailNodeIndex);
+
+      final Rule rule = edge.getRule();
+      final List<HGNode> tailNodes = edge.getTailNodes();
+
+      if (rule == null) {
+        getChildDerivationState(edge, 0).visit(visitor, indent + 1, 0);
+      } else {
+        if (tailNodes != null) {
+          for (int index = 0; index < tailNodes.size(); index++) {
+            getChildDerivationState(edge, index).visit(visitor, indent + 1, index);
+          }
+        }
+      }
+
+      visitor.after(this, indent, tailNodeIndex);
+
+      return visitor;
+    }
+
+    private String getWordAlignmentString() {
+      return visit(new WordAlignmentExtractor()).toString();
+    }
+    
+    private List<List<Integer>> getWordAlignment() {
+      WordAlignmentExtractor extractor = new WordAlignmentExtractor();
+      visit(extractor);
+      return extractor.getFinalWordAlignments();
+    }
+
+    private String getTree() {
+      return visit(new TreeExtractor()).toString();
+    }
+    
+    private String getHypothesis() {
+      return getHypothesis(defaultSide);
+    }
+
+    /**
+     * For stack decoding we keep using the old string-based
+     * HypothesisExtractor.
+     * For Hiero, we use a faster, int-based hypothesis extraction
+     * that is correct also for Side.SOURCE cases.
+     */
+    private String getHypothesis(final Side side) {
+      return visit(new OutputStringExtractor(side.equals(Side.SOURCE))).toString();
+    }
+
+    private FeatureVector getFeatures() {
+      final FeatureVectorExtractor extractor = new FeatureVectorExtractor(featureFunctions, sentence);
+      visit(extractor);
+      return extractor.getFeatures();
+    }
+
+    private String getDerivation() {
+      return visit(new DerivationExtractor()).toString();
+    }
+
+    /**
+     * Helper function for navigating the hierarchical list of DerivationState objects. This
+     * function looks up the VirtualNode corresponding to the HGNode pointed to by the edge's
+     * {tailNodeIndex}th tail node.
+     * 
+     * @param edge
+     * @param tailNodeIndex
+     * @return
+     */
+    public DerivationState getChildDerivationState(HyperEdge edge, int tailNodeIndex) {
+      HGNode child = edge.getTailNodes().get(tailNodeIndex);
+      VirtualNode virtualChild = getVirtualNode(child);
+      return virtualChild.nbests.get(ranks[tailNodeIndex] - 1);
+    }
+
+  } // end of Class DerivationState
+
+  public static class DerivationStateComparator implements Comparator<DerivationState> {
+    // natural order by cost
+    public int compare(DerivationState one, DerivationState another) {
+      if (one.getCost() > another.getCost()) {
+        return -1;
+      } else if (one.getCost() == another.getCost()) {
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+  }
+
+  /**
+   * This interface provides a generic way to do things at each stage of a derivation. The
+   * DerivationState::visit() function visits every node in a derivation and calls the
+   * DerivationVisitor functions both before and after it visits each node. This provides a common
+   * way to do different things to the tree (e.g., extract its words, assemble a derivation, and so
+   * on) without having to rewrite the node-visiting code.
+   * 
+   * @author Matt Post <po...@cs.jhu.edu>
+   */
+  public interface DerivationVisitor {
+    /**
+     * Called before each node's children are visited.
+     *
+     * @param state the derivation state
+     * @param level the tree depth
+     * @param tailNodeIndex the tailNodeIndex corresponding to state
+     */
+    void before(DerivationState state, int level, int tailNodeIndex);
+
+    /**
+     * Called after a node's children have been visited.
+     * 
+     * @param state the derivation state
+     * @param level the tree depth
+     * @param tailNodeIndex the tailNodeIndex corresponding to state
+     */
+    void after(DerivationState state, int level, int tailNodeIndex);
+  }
+  
+  /**
+   * Assembles a Penn treebank format tree for a given derivation.
+   */
+  public class TreeExtractor implements DerivationVisitor {
+
+    /* The tree being built. */
+    private Tree tree;
+
+    public TreeExtractor() {
+      tree = null;
+    }
+
+    /**
+     * Before visiting the children, find the fragment representation for the current rule,
+     * and merge it into the tree we're building.
+     */
+    @Override
+    public void before(DerivationState state, int indent, int tailNodeIndex) {
+      HyperEdge edge = state.edge;
+      Rule rule = edge.getRule();
+
+      // Skip the special top-level rule
+      if (rule == null) {
+        return;
+      }
+
+      String lhs = Vocabulary.word(rule.getLHS());
+      String unbracketedLHS = lhs.substring(1, lhs.length() - 1);
+
+      /* Find the fragment corresponding to this flattened rule in the fragment map; if it's not
+       * there, just pretend it's a depth-one rule.
+       */
+      Tree fragment = Tree.getFragmentFromYield(rule.getEnglishWords());
+      if (fragment == null) {
+        String subtree = String.format("(%s{%d-%d} %s)", unbracketedLHS, 
+            state.parentNode.i, state.parentNode.j, 
+            quoteTerminals(rule.getEnglishWords()));
+        fragment = Tree.fromString(subtree);
+      }
+      
+      merge(fragment);
+    }
+
+    /**
+     * Quotes just the terminals in the yield of a tree, represented as a string. This is to force
+     * compliance with the Tree class, which interprets all non-quoted strings as nonterminals. 
+     * 
+     * @param words a string of words representing a rule's yield
+     * @return
+     */
+    private String quoteTerminals(String words) {
+      StringBuilder quotedWords = new StringBuilder();
+      for (String word: words.split("\\s+"))
+        if (word.startsWith("[") && word.endsWith("]"))
+          quotedWords.append(String.format("%s ", word));
+        else
+        quotedWords.append(String.format("\"%s\" ", word));
+
+      return quotedWords.substring(0, quotedWords.length() - 1);
+    }
+
+    @Override
+    public void after(DerivationState state, int indent, int tailNodeIndex) {
+      // do nothing
+    }
+
+    public String toString() {
+      return tree.unquotedString();
+    }
+
+    /**
+     * Either set the root of the tree or merge this tree by grafting it onto the first nonterminal
+     * in the yield of the parent tree.
+     * 
+     * @param fragment
+     */
+    private void merge(Tree fragment) {
+      if (tree == null) {
+        tree = fragment;
+      } else {
+        Tree parent = tree.getNonterminalYield().get(0);
+        parent.setLabel(Vocabulary.word(fragment.getLabel()));
+        parent.setChildren(fragment.getChildren());
+      }
+    }
+  }
+
+  /**
+   * Assembles an informative version of the derivation. Each rule is printed as it is encountered.
+   * Don't try to parse this output; make something that writes out JSON or something, instead.
+   * 
+   * @author Matt Post <post@cs.jhu.edu
+   */
+  public class DerivationExtractor implements DerivationVisitor {
+
+    StringBuffer sb;
+
+    public DerivationExtractor() {
+      sb = new StringBuffer();
+    }
+
+    @Override
+    public void before(DerivationState state, int indent, int tailNodeIndex) {
+
+      HyperEdge edge = state.edge;
+      Rule rule = edge.getRule();
+
+      if (rule != null) {
+
+        for (int i = 0; i < indent * 2; i++)
+          sb.append(" ");
+
+        final FeatureVectorExtractor extractor = new FeatureVectorExtractor(featureFunctions, sentence);
+        extractor.before(state, indent, tailNodeIndex);
+        final FeatureVector transitionFeatures = extractor.getFeatures();
+
+        // sb.append(rule).append(" ||| " + features + " ||| " +
+        // KBestExtractor.this.weights.innerProduct(features));
+        sb.append(String.format("%d-%d", state.parentNode.i, state.parentNode.j));
+        sb.append(" ||| " + Vocabulary.word(rule.getLHS()) + " -> "
+            + Vocabulary.getWords(rule.getFrench()) + " /// " + rule.getEnglishWords());
+        sb.append(" |||");
+        for (DPState dpState : state.parentNode.getDPStates()) {
+          sb.append(" " + dpState);
+        }
+        sb.append(" ||| " + transitionFeatures);
+        sb.append(" ||| " + weights.innerProduct(transitionFeatures));
+        if (rule.getAlignment() != null)
+          sb.append(" ||| " + Arrays.toString(rule.getAlignment()));
+        sb.append("\n");
+      }
+    }
+
+    public String toString() {
+      return sb.toString();
+    }
+
+    @Override
+    public void after(DerivationState state, int level, int tailNodeIndex) {}
+  }
+  
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
new file mode 100644
index 0000000..acb2e17
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import static java.lang.Math.min;
+import static joshua.corpus.Vocabulary.getWords;
+import static joshua.corpus.Vocabulary.nt;
+
+import java.util.Stack;
+
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
+
+public class OutputStringExtractor implements WalkerFunction, DerivationVisitor {
+  
+  public OutputStringExtractor(final boolean extractSource) {
+    this.extractSource = extractSource;
+  }
+  
+  private Stack<OutputString> outputStringStack = new Stack<>();
+  private final boolean extractSource;
+
+  @Override
+  public void apply(HGNode node, int nodeIndex) {
+    apply(node.bestHyperedge.getRule(), nodeIndex);
+  }
+  
+  /**
+   * Visiting a node during k-best extraction is the same as
+   * apply() for Viterbi extraction but using the edge from
+   * the Derivation state.
+   */
+  @Override
+  public void before(final DerivationState state, int level, int tailNodeIndex) {
+      apply(state.edge.getRule(), tailNodeIndex);
+  }
+  
+  private void apply(Rule rule, int nodeIndex) {
+    if (rule != null) {
+      final int[] words = extractSource ? rule.getFrench() : rule.getEnglish();
+      merge(new OutputString(words, rule.getArity(), nodeIndex));
+    }
+  }
+  
+  /** Nothing to do */
+  @Override
+  public void after(DerivationState state, int level, int tailNodeIndex) {}
+  
+  private static int getSourceNonTerminalPosition(final int[] words, int nonTerminalIndex) {
+    int nonTerminalsSeen = 0;
+    for (int i = 0; i < words.length; i++) {
+      if (nt(words[i])) {
+        nonTerminalsSeen++;
+        if (nonTerminalsSeen == nonTerminalIndex) {
+          return i;
+        }
+      }
+    }
+    throw new RuntimeException(
+        String.format(
+            "Can not find %s-th non terminal in source ids: %s. This should not happen!",
+            nonTerminalIndex,
+            arrayToString(words)));
+  }
+  
+  /**
+   * Returns the position of the nonTerminalIndex-th nonTerminal words.
+   * Non-terminals on target sides of rules are indexed by
+   * their order on the source side, e.g. '-1', '-2',
+   * Thus, if index==0 we return the index of '-1'.
+   * For index==1, we return index of '-2'
+   */
+  private static int getTargetNonTerminalPosition(int[] words, int nonTerminalIndex) {
+    for (int pos = 0; pos < words.length; pos++) {
+      if (nt(words[pos]) && -(words[pos] + 1) == nonTerminalIndex) {
+        return pos;
+      }
+    }
+    throw new RuntimeException(
+        String.format(
+            "Can not find %s-th non terminal in target ids: %s. This should not happen!",
+            nonTerminalIndex,
+            arrayToString(words)));
+  }
+  
+  private static String arrayToString(int[] ids) {
+    StringBuilder sb = new StringBuilder();
+    for (int i : ids) {
+      sb.append(i + " ");
+    }
+    return sb.toString().trim();
+  }
+  
+  private void substituteNonTerminal(
+      final OutputString parentState,
+      final OutputString childState) {
+    int mergePosition;
+    if (extractSource) {
+      /* correct nonTerminal is given by the tailNodePosition of the childState (zero-index, thus +1) and 
+       * current parentState's arity. If the parentState has already filled one of two available slots,
+       * we need to use the remaining one, even if childState refers to the second slot.
+       */
+       mergePosition = getSourceNonTerminalPosition(
+          parentState.words, min(childState.tailNodePosition + 1, parentState.arity));
+    } else {
+      mergePosition = getTargetNonTerminalPosition(
+          parentState.words, childState.tailNodePosition);
+    }
+    parentState.substituteNonTerminalAtPosition(childState.words, mergePosition);
+  }
+
+  private void merge(final OutputString state) {
+    if (!outputStringStack.isEmpty()
+        && state.arity == 0) {
+      if (outputStringStack.peek().arity == 0) {
+          throw new IllegalStateException("Parent OutputString has arity of 0. Cannot merge.");
+      }
+      final OutputString parent = outputStringStack.pop();
+      substituteNonTerminal(parent, state);
+      merge(parent);
+    } else {
+      outputStringStack.add(state);
+    }
+  }
+  
+  @Override
+  public String toString() {
+    if (outputStringStack.isEmpty()) {
+      return "";
+    }
+    
+    if (outputStringStack.size() != 1) {
+      throw new IllegalStateException(
+          String.format(
+              "Stack should contain only a single (last) element, but was size %d", outputStringStack.size()));
+    }
+    return getWords(outputStringStack.pop().words);
+  }
+  
+  /** Stores necessary information to obtain an output string on source or target side */
+  private class OutputString {
+    
+    private int[] words;
+    private int arity;
+    private final int tailNodePosition;
+    
+    private OutputString(int[] words, int arity, int tailNodePosition) {
+      this.words = words;
+      this.arity = arity;
+      this.tailNodePosition = tailNodePosition;
+    }
+    
+    /**
+     * Merges child words into this at the correct
+     * non terminal position of this.
+     * The correct position is determined by the tailNodePosition
+     * of child and the arity of this.
+     */
+    private void substituteNonTerminalAtPosition(final int[] words, final int position) {
+      assert(nt(this.words[position]));
+      final int[] result = new int[words.length + this.words.length - 1];
+      int resultIndex = 0;
+      for (int i = 0; i < position; i++) {
+        result[resultIndex++] = this.words[i];
+      }
+      for (int i = 0; i < words.length; i++) {
+        result[resultIndex++] = words[i];
+      }
+      for (int i = position + 1; i < this.words.length; i++) {
+        result[resultIndex++] = this.words[i];
+      }
+      // update words and reduce arity of this OutputString
+      this.words = result;
+      arity--;
+    }
+  }
+  
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java b/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
new file mode 100644
index 0000000..2c85770
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.hypergraph;
+
+import java.util.Stack;
+
+// example: (ROOT ([S] ([X] ([X] scientists completed ([X] for ([X] ([X] chromosome) related to ([X]
+// early ([X] OOV))))) sequencing)))
+
+public class StringToTreeConverter {
+
+  static private final String beginSymbol = "(b";
+  static private final String nodeSymbol = "node";
+
+  HyperGraph convert(String inputStr) {
+
+    HyperGraph tree = null;
+
+    Stack<String> stack = new Stack<String>();
+    for (int i = 0; i < inputStr.length(); i++) {
+      char curChar = inputStr.charAt(i);
+
+      if (curChar == ')' && inputStr.charAt(i - 1) != ' ') {// end of a rule
+        StringBuffer ruleString = new StringBuffer();
+
+        while (stack.empty() == false) {
+          String cur = stack.pop();
+          if (cur.equals(beginSymbol)) {// stop
+            // setup a node
+            // HGNode(int i, int j, int lhs, HashMap<Integer,DPState> dpStates, HyperEdge
+            // initHyperedge, double estTotalLogP)
+            // public HyperEdge(Rule rule, double bestDerivationLogP, Double transitionLogP,
+            // List<HGNode> antNodes, SourcePath srcPath)
+            // public BilingualRule(int lhs, int[] sourceRhs, int[] targetRhs, float[]
+            // featureScores, int arity, int owner, float latticeCost, int ruleID)
+
+
+            stack.add(nodeSymbol);// TODO: should be lHS+id
+            break;
+          } else if (cur.equals(nodeSymbol)) {
+
+          } else {
+            ruleString.append(cur);
+          }
+        }
+      } else if (curChar == '(' && inputStr.charAt(i + 1) != ' ') {// begin of a rule
+        stack.add(beginSymbol);
+      } else {
+        stack.add("" + curChar);
+      }
+    }
+
+
+
+    return tree;
+  }
+
+}