You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:46:07 UTC
[56/60] incubator-joshua git commit: Merge branch 'maven-multi-module' of https://github.com/logogin/incubator-joshua into maven-multi-module

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 0000000,b48685d..37bffb7
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@@ -1,0 -1,1066 +1,1064 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.ff.tm.packed;
+ 
+ /***
 - * This package implements Joshua's packed grammar structure, which enables the efficient loading	
++ * This package implements Joshua's packed grammar structure, which enables the efficient loading
+  * and accessing of grammars. It is described in the paper:
+  * 
+  * @article{ganitkevitch2012joshua,
+  *   Author = {Ganitkevitch, J. and Cao, Y. and Weese, J. and Post, M. and Callison-Burch, C.},
+  *   Journal = {Proceedings of WMT12},
+  *   Title = {Joshua 4.0: Packing, PRO, and paraphrases},
+  *   Year = {2012}}
+  *   
+  * The packed grammar works by compiling out the grammar tries into a compact format that is loaded
+  * and parsed directly from Java arrays. A fundamental problem is that Java arrays are indexed
+  * by ints and not longs, meaning the maximum size of the packed grammar is about 2 GB. This forces
+  * the use of packed grammar slices, which together constitute the grammar. The figure in the
+  * paper above shows what each slice looks like. 
+  * 
+  * The division across slices is done in a depth-first manner. Consider the entire grammar organized
+  * into a single source-side trie. The splits across tries are done by grouping the root-level
+  * outgoing trie arcs --- and the entire trie beneath them --- across slices. 
+  * 
+  * This presents a problem: if the subtree rooted beneath a single top-level arc is too big for a 
+  * slice, the grammar can't be packed. This happens with very large Hiero grammars, for example,
+  * where there are a *lot* of rules that start with [X].
+  * 
+  * A solution being worked on is to split that symbol and pack them into separate grammars with a
+  * shared vocabulary, and then rely on Joshua's ability to query multiple grammars for rules to
+  * solve this problem. This is not currently implemented but could be done directly in the
+  * Grammar Packer.
+  *
+  * *UPDATE 10/2015*
+  * The introduction of a SliceAggregatingTrie together with sorting the grammar by the full source string
+  * (not just by the first source word) allows distributing rules with the same first source word
+  * across multiple slices.
+  * @author fhieber
+  */
+ 
+ import static java.util.Collections.sort;
+ 
+ import java.io.File;
+ import java.io.FileInputStream;
+ import java.io.IOException;
+ import java.io.InputStream;
+ import java.nio.BufferUnderflowException;
+ import java.nio.ByteBuffer;
+ import java.nio.IntBuffer;
+ import java.nio.MappedByteBuffer;
+ import java.nio.channels.FileChannel;
+ import java.nio.channels.FileChannel.MapMode;
+ import java.nio.file.Files;
+ import java.nio.file.Paths;
+ import java.security.DigestInputStream;
+ import java.security.MessageDigest;
+ import java.security.NoSuchAlgorithmException;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.Comparator;
+ import java.util.HashMap;
+ import java.util.Iterator;
+ import java.util.List;
+ import java.util.Map;
+ 
+ import org.apache.joshua.corpus.Vocabulary;
+ import org.apache.joshua.decoder.JoshuaConfiguration;
+ import org.apache.joshua.decoder.ff.FeatureFunction;
+ import org.apache.joshua.decoder.ff.FeatureVector;
+ import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
+ import org.apache.joshua.decoder.ff.tm.BasicRuleCollection;
++import org.apache.joshua.decoder.ff.tm.OwnerId;
+ import org.apache.joshua.decoder.ff.tm.Rule;
+ import org.apache.joshua.decoder.ff.tm.RuleCollection;
+ import org.apache.joshua.decoder.ff.tm.Trie;
+ import org.apache.joshua.decoder.ff.tm.hash_based.ExtensionIterator;
+ import org.apache.joshua.util.FormatUtils;
+ import org.apache.joshua.util.encoding.EncoderConfiguration;
+ import org.apache.joshua.util.encoding.FloatEncoder;
+ import org.apache.joshua.util.io.LineReader;
+ 
+ import com.google.common.base.Supplier;
+ import com.google.common.base.Suppliers;
+ import com.google.common.cache.Cache;
+ import com.google.common.cache.CacheBuilder;
++
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ 
+ public class PackedGrammar extends AbstractGrammar {
+ 
+   private static final Logger LOG = LoggerFactory.getLogger(PackedGrammar.class);
+   public static final String VOCABULARY_FILENAME = "vocabulary";
+ 
+   private EncoderConfiguration encoding;
+   private PackedRoot root;
+   private ArrayList<PackedSlice> slices;
+ 
+   private final File vocabFile; // store path to vocabulary file
+ 
+   // The version number of the earliest supported grammar packer
+   public static final int SUPPORTED_VERSION = 3;
+ 
+   // A rule cache for commonly used tries to avoid excess object allocations
+   // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+   private final Cache<Trie, List<Rule>> cached_rules;
+ 
+   private String grammarDir;
+ 
+   public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
+       JoshuaConfiguration joshuaConfiguration) throws IOException {
 -    super(joshuaConfiguration);
++    super(owner, joshuaConfiguration, span_limit);
+ 
+     this.grammarDir = grammar_dir;
 -    this.spanLimit = span_limit;
+ 
+     // Read the vocabulary.
+     vocabFile = new File(grammar_dir + File.separator + VOCABULARY_FILENAME);
+     LOG.info("Reading vocabulary: {}", vocabFile);
+     if (!Vocabulary.read(vocabFile)) {
+       throw new RuntimeException("mismatches or collisions while reading on-disk vocabulary");
+     }
+     
+     // Read the config
+     String configFile = grammar_dir + File.separator + "config";
+     if (new File(configFile).exists()) {
+       LOG.info("Reading packed config: {}", configFile);
+       readConfig(configFile);
+     }
+     
+     // Read the quantizer setup.
+     LOG.info("Reading encoder configuration: {}{}encoding", grammar_dir, File.separator);
+     encoding = new EncoderConfiguration();
+     encoding.load(grammar_dir + File.separator + "encoding");
+ 
 -    // Set phrase owner.
 -    this.owner = Vocabulary.id(owner);
 -
+     final List<String> listing = Arrays.asList(new File(grammar_dir).list());
+     sort(listing); // File.list() has arbitrary sort order
+     slices = new ArrayList<PackedSlice>();
+     for (String prefix : listing) {
+       if (prefix.startsWith("slice_") && prefix.endsWith(".source"))
+         slices.add(new PackedSlice(grammar_dir + File.separator + prefix.substring(0, 11)));
+     }
+ 
+     long count = 0;
+     for (PackedSlice s : slices)
+       count += s.estimated.length;
+     root = new PackedRoot(slices);
+     cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build();
+ 
+     LOG.info("Loaded {} rules", count);
+   }
+ 
+   @Override
+   public Trie getTrieRoot() {
+     return root;
+   }
+ 
+   @Override
+   public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
+     return (spanLimit == -1 || pathLength <= spanLimit);
+   }
+ 
+   @Override
+   public int getNumRules() {
+     int num_rules = 0;
+     for (PackedSlice ps : slices)
+       num_rules += ps.featureSize;
+     return num_rules;
+   }
+ 
+   @Override
+   public int getNumDenseFeatures() {
+     return encoding.getNumDenseFeatures();
+   }
+ 
+   /**
+    * Computes the MD5 checksum of the vocabulary file.
+    * Can be used for comparing vocabularies across multiple packedGrammars.
+    * @return the computed checksum
+    */
+   public String computeVocabularyChecksum() {
+     MessageDigest md;
+     try {
+       md = MessageDigest.getInstance("MD5");
+     } catch (NoSuchAlgorithmException e) {
+       throw new RuntimeException("Unknown checksum algorithm");
+     }
+     byte[] buffer = new byte[1024];
+     try (final InputStream is = Files.newInputStream(Paths.get(vocabFile.toString()));
+         DigestInputStream dis = new DigestInputStream(is, md)) {
+       while (dis.read(buffer) != -1) {}
+     } catch (IOException e) {
+       throw new RuntimeException("Can not find vocabulary file. This should not happen.");
+     }
+     byte[] digest = md.digest();
+     // convert the byte to hex format
+     StringBuffer sb = new StringBuffer("");
+     for (int i = 0; i < digest.length; i++) {
+       sb.append(Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1));
+     }
+     return sb.toString();
+   }
+ 
+   /**
+    * PackedRoot represents the root of the packed grammar trie.
+    * Tries for different source-side firstwords are organized in
+    * packedSlices on disk. A packedSlice can contain multiple trie
+    * roots (i.e. multiple source-side firstwords).
+    * The PackedRoot builds a lookup table, mapping from
+    * source-side firstwords to the addresses in the packedSlices
+    * that represent the subtrie for a particular firstword.
+    * If the GrammarPacker has to distribute rules for a
+    * source-side firstword across multiple slices, a
+    * SliceAggregatingTrie node is created that aggregates those 
+    * tries to hide
+    * this additional complexity from the grammar interface
+    * This feature allows packing of grammars where the list of rules
+    * for a single source-side firstword would exceed the maximum array
+    * size of Java (2gb).
+    */
+   public final class PackedRoot implements Trie {
+ 
+     private final HashMap<Integer, Trie> lookup;
+ 
+     public PackedRoot(final List<PackedSlice> slices) {
+       final Map<Integer, List<Trie>> childTries = collectChildTries(slices);
+       lookup = buildLookupTable(childTries);
+     }
+     
+     /**
+      * Determines whether trie nodes for source first-words are spread over 
+      * multiple packedSlices by counting their occurrences.
+      * @param slices
+      * @return A mapping from first word ids to a list of trie nodes.
+      */
+     private Map<Integer, List<Trie>> collectChildTries(final List<PackedSlice> slices) {
+       final Map<Integer, List<Trie>> childTries = new HashMap<>();
+       for (PackedSlice packedSlice : slices) {
+         
+         // number of tries stored in this packedSlice
+         final int num_children = packedSlice.source[0];
+         for (int i = 0; i < num_children; i++) {
+           final int id = packedSlice.source[2 * i + 1];
+           
+           /* aggregate tries with same root id
+            * obtain a Trie node, already at the correct address in the packedSlice.
+            * In other words, the lookup index already points to the correct trie node in the packedSlice.
+            * packedRoot.match() thus can directly return the result of lookup.get(id);
+            */
+           if (!childTries.containsKey(id)) {
+             childTries.put(id, new ArrayList<Trie>(1));
+           }
+           final Trie trie = packedSlice.root().match(id);
+           childTries.get(id).add(trie);
+         }
+       }
+       return childTries;
+     }
+     
+     /**
+      * Build a lookup table for children tries.
+      * If the list contains only a single child node, a regular trie node
+      * is inserted into the table; otherwise a SliceAggregatingTrie node is
+      * created that hides this partitioning into multiple packedSlices
+      * upstream.
+      */
+     private HashMap<Integer,Trie> buildLookupTable(final Map<Integer, List<Trie>> childTries) {
+       HashMap<Integer,Trie> lookup = new HashMap<>(childTries.size());
+       for (int id : childTries.keySet()) {
+         final List<Trie> tries = childTries.get(id);
+         if (tries.size() == 1) {
+           lookup.put(id, tries.get(0));
+         } else {
+           lookup.put(id, new SliceAggregatingTrie(tries));
+         }
+       }
+       return lookup;
+     }
+ 
+     @Override
+     public Trie match(int word_id) {
+       return lookup.get(word_id);
+     }
+ 
+     @Override
+     public boolean hasExtensions() {
+       return !lookup.isEmpty();
+     }
+ 
+     @Override
+     public HashMap<Integer, ? extends Trie> getChildren() {
+       return lookup;
+     }
+ 
+     @Override
+     public ArrayList<? extends Trie> getExtensions() {
+       return new ArrayList<>(lookup.values());
+     }
+ 
+     @Override
+     public boolean hasRules() {
+       return false;
+     }
+ 
+     @Override
+     public RuleCollection getRuleCollection() {
+       return new BasicRuleCollection(0, new int[0]);
+     }
+ 
+     @Override
+     public Iterator<Integer> getTerminalExtensionIterator() {
+       return new ExtensionIterator(lookup, true);
+     }
+ 
+     @Override
+     public Iterator<Integer> getNonterminalExtensionIterator() {
+       return new ExtensionIterator(lookup, false);
+     }
+   }
+ 
+   public final class PackedSlice {
+     private final String name;
+ 
+     private final int[] source;
+     private final IntBuffer target;
+     private final ByteBuffer features;
+     private final ByteBuffer alignments;
+ 
+     private final int[] targetLookup;
+     private int featureSize;
+     private float[] estimated;
+     private float[] precomputable;
+ 
+     private final static int BUFFER_HEADER_POSITION = 8;
+ 
+     /**
+      * Provides a cache of packedTrie nodes to be used in getTrie.
+      */
+     private HashMap<Integer, PackedTrie> tries;
+ 
+     public PackedSlice(String prefix) throws IOException {
+       name = prefix;
+ 
+       File source_file = new File(prefix + ".source");
+       File target_file = new File(prefix + ".target");
+       File target_lookup_file = new File(prefix + ".target.lookup");
+       File feature_file = new File(prefix + ".features");
+       File alignment_file = new File(prefix + ".alignments");
+ 
+       source = fullyLoadFileToArray(source_file);
+       // First int specifies the size of this file, load from 1st int on
+       targetLookup = fullyLoadFileToArray(target_lookup_file, 1);
+ 
+       target = associateMemoryMappedFile(target_file).asIntBuffer();
+       features = associateMemoryMappedFile(feature_file);
+       initializeFeatureStructures();
+ 
+       if (alignment_file.exists()) {
+         alignments = associateMemoryMappedFile(alignment_file);
+       } else {
+         alignments = null;
+       }
+ 
+       tries = new HashMap<Integer, PackedTrie>();
+     }
+ 
+     /**
+      * Helper function to help create all the structures which describe features
+      * in the Slice. Only called during object construction.
+      */
+     private void initializeFeatureStructures() {
+       int num_blocks = features.getInt(0);
+       estimated = new float[num_blocks];
+       precomputable = new float[num_blocks];
+       Arrays.fill(estimated, Float.NEGATIVE_INFINITY);
+       Arrays.fill(precomputable, Float.NEGATIVE_INFINITY);
+       featureSize = features.getInt(4);
+     }
+ 
+     private int getIntFromByteBuffer(int position, ByteBuffer buffer) {
+       return buffer.getInt(BUFFER_HEADER_POSITION + (4 * position));
+     }
+ 
+     private int[] fullyLoadFileToArray(File file) throws IOException {
+       return fullyLoadFileToArray(file, 0);
+     }
+ 
+     /**
+      * This function will use a bulk loading method to fully populate a target
+      * array from file.
+      *
+      * @param file
+      *          File that will be read from disk.
+      * @param startIndex
+      *          an offset into the read file.
+      * @return an int array of size length(file) - offset containing ints in the
+      *         file.
+      * @throws IOException
+      */
+     private int[] fullyLoadFileToArray(File file, int startIndex) throws IOException {
+       IntBuffer buffer = associateMemoryMappedFile(file).asIntBuffer();
+       int size = (int) (file.length() - (4 * startIndex))/4;
+       int[] result = new int[size];
+       buffer.position(startIndex);
+       buffer.get(result, 0, size);
+       return result;
+     }
+ 
+     private ByteBuffer associateMemoryMappedFile(File file) throws IOException {
+       try(FileInputStream fileInputStream = new FileInputStream(file)) {
+         FileChannel fileChannel = fileInputStream.getChannel();
+         int size = (int) fileChannel.size();
+         MappedByteBuffer result = fileChannel.map(MapMode.READ_ONLY, 0, size);
+         return result;
+       }
+     }
+ 
+     private final int[] getTarget(int pointer) {
+       // Figure out level.
+       int tgt_length = 1;
+       while (tgt_length < (targetLookup.length + 1) && targetLookup[tgt_length] <= pointer)
+         tgt_length++;
+       int[] tgt = new int[tgt_length];
+       int index = 0;
+       int parent;
+       do {
+         parent = target.get(pointer);
+         if (parent != -1)
+           tgt[index++] = target.get(pointer + 1);
+         pointer = parent;
+       } while (pointer != -1);
+       return tgt;
+     }
+ 
+     private synchronized PackedTrie getTrie(final int node_address) {
+       PackedTrie t = tries.get(node_address);
+       if (t == null) {
+         t = new PackedTrie(node_address);
+         tries.put(node_address, t);
+       }
+       return t;
+     }
+ 
+     private synchronized PackedTrie getTrie(int node_address, int[] parent_src, int parent_arity,
+         int symbol) {
+       PackedTrie t = tries.get(node_address);
+       if (t == null) {
+         t = new PackedTrie(node_address, parent_src, parent_arity, symbol);
+         tries.put(node_address, t);
+       }
+       return t;
+     }
+ 
+     /**
+      * Returns the FeatureVector associated with a rule (represented as a block ID).
+      * These features are in the form "feature1=value feature2=value...". By default, unlabeled
+      * features are named using the pattern.
+      * @param block_id
+      * @return feature vector
+      */
+ 
+     private final FeatureVector loadFeatureVector(int block_id) {
+       int featurePosition = getIntFromByteBuffer(block_id, features);
+       final int numFeatures = encoding.readId(features, featurePosition);
+ 
+       featurePosition += EncoderConfiguration.ID_SIZE;
+       final FeatureVector featureVector = new FeatureVector();
+       FloatEncoder encoder;
+       String featureName;
+ 
+       for (int i = 0; i < numFeatures; i++) {
+         final int innerId = encoding.readId(features, featurePosition);
+         final int outerId = encoding.outerId(innerId);
+         encoder = encoding.encoder(innerId);
+         // TODO (fhieber): why on earth are dense feature ids (ints) encoded in the vocabulary?
+         featureName = Vocabulary.word(outerId);
+         final float value = encoder.read(features, featurePosition);
+         try {
+           int index = Integer.parseInt(featureName);
+           featureVector.increment(index, -value);
+         } catch (NumberFormatException e) {
+           featureVector.increment(featureName, value);
+         }
+         featurePosition += EncoderConfiguration.ID_SIZE + encoder.size();
+       }
+       
+       return featureVector;
+     }
+ 
+     /**
+      * We need to synchronize this method as there is a many to one ratio between
+      * PackedRule/PhrasePair and this class (PackedSlice). This means during concurrent first
+      * getAlignments calls to PackedRule objects they could alter each other's positions within the
+      * buffer before calling read on the buffer.
+      */
+     private synchronized final byte[] getAlignmentArray(int block_id) {
+       if (alignments == null)
+         throw new RuntimeException("No alignments available.");
+       int alignment_position = getIntFromByteBuffer(block_id, alignments);
+       int num_points = (int) alignments.get(alignment_position);
+       byte[] alignment = new byte[num_points * 2];
+ 
+       alignments.position(alignment_position + 1);
+       try {
+         alignments.get(alignment, 0, num_points * 2);
+       } catch (BufferUnderflowException bue) {
+         LOG.warn("Had an exception when accessing alignment mapped byte buffer");
+         LOG.warn("Attempting to access alignments at position: {}",  alignment_position + 1);
+         LOG.warn("And to read this many bytes: {}",  num_points * 2);
+         LOG.warn("Buffer capacity is : {}", alignments.capacity());
+         LOG.warn("Buffer position is : {}", alignments.position());
+         LOG.warn("Buffer limit is : {}", alignments.limit());
+         throw bue;
+       }
+       return alignment;
+     }
+ 
+     private final PackedTrie root() {
+       return getTrie(0);
+     }
+ 
+     public String toString() {
+       return name;
+     }
+ 
+     /**
+      * A trie node within the grammar slice. Identified by its position within the source array,
+      * and, as a supplement, the source string leading from the trie root to the node.
+      * 
+      * @author jg
+      * 
+      */
+     public class PackedTrie implements Trie, RuleCollection {
+ 
+       private final int position;
+ 
+       private boolean sorted = false;
+ 
+       private int[] src;
+       private int arity;
+ 
+       private PackedTrie(int position) {
+         this.position = position;
+         src = new int[0];
+         arity = 0;
+       }
+ 
+       private PackedTrie(int position, int[] parent_src, int parent_arity, int symbol) {
+         this.position = position;
+         src = new int[parent_src.length + 1];
+         System.arraycopy(parent_src, 0, src, 0, parent_src.length);
+         src[src.length - 1] = symbol;
+         arity = parent_arity;
+         if (FormatUtils.isNonterminal(symbol))
+           arity++;
+       }
+ 
+       @Override
+       public final Trie match(int token_id) {
+         int num_children = source[position];
+         if (num_children == 0)
+           return null;
+         if (num_children == 1 && token_id == source[position + 1])
+           return getTrie(source[position + 2], src, arity, token_id);
+         int top = 0;
+         int bottom = num_children - 1;
+         while (true) {
+           int candidate = (top + bottom) / 2;
+           int candidate_position = position + 1 + 2 * candidate;
+           int read_token = source[candidate_position];
+           if (read_token == token_id) {
+             return getTrie(source[candidate_position + 1], src, arity, token_id);
+           } else if (top == bottom) {
+             return null;
+           } else if (read_token > token_id) {
+             top = candidate + 1;
+           } else {
+             bottom = candidate - 1;
+           }
+           if (bottom < top)
+             return null;
+         }
+       }
+ 
+       @Override
+       public HashMap<Integer, ? extends Trie> getChildren() {
+         HashMap<Integer, Trie> children = new HashMap<Integer, Trie>();
+         int num_children = source[position];
+         for (int i = 0; i < num_children; i++) {
+           int symbol = source[position + 1 + 2 * i];
+           int address = source[position + 2 + 2 * i];
+           children.put(symbol, getTrie(address, src, arity, symbol));
+         }
+         return children;
+       }
+ 
+       @Override
+       public boolean hasExtensions() {
+         return (source[position] != 0);
+       }
+ 
+       @Override
+       public ArrayList<? extends Trie> getExtensions() {
+         int num_children = source[position];
+         ArrayList<PackedTrie> tries = new ArrayList<PackedTrie>(num_children);
+ 
+         for (int i = 0; i < num_children; i++) {
+           int symbol = source[position + 1 + 2 * i];
+           int address = source[position + 2 + 2 * i];
+           tries.add(getTrie(address, src, arity, symbol));
+         }
+ 
+         return tries;
+       }
+ 
+       @Override
+       public boolean hasRules() {
+         int num_children = source[position];
+         return (source[position + 1 + 2 * num_children] != 0);
+       }
+ 
+       @Override
+       public RuleCollection getRuleCollection() {
+         return this;
+       }
+ 
+       @Override
+       public List<Rule> getRules() {
+         List<Rule> rules = cached_rules.getIfPresent(this);
+         if (rules != null) {
+           return rules;
+         }
+ 
+         int num_children = source[position];
+         int rule_position = position + 2 * (num_children + 1);
+         int num_rules = source[rule_position - 1];
+ 
+         rules = new ArrayList<Rule>(num_rules);
+         for (int i = 0; i < num_rules; i++) {
+           rules.add(new PackedRule(rule_position + 3 * i));
+         }
+ 
+         cached_rules.put(this, rules);
+         return rules;
+       }
+ 
+       /**
+        * We determine if the Trie is sorted by checking if the estimated cost of the first rule in
+        * the trie has been set.
+        */
+       @Override
+       public boolean isSorted() {
+         return sorted;
+       }
+ 
+       private synchronized void sortRules(List<FeatureFunction> models) {
+         int num_children = source[position];
+         int rule_position = position + 2 * (num_children + 1);
+         int num_rules = source[rule_position - 1];
+         if (num_rules == 0) {
+           this.sorted = true;
+           return;
+         }
+         Integer[] rules = new Integer[num_rules];
+ 
+         int target_address;
+         int block_id;
+         for (int i = 0; i < num_rules; ++i) {
+           target_address = source[rule_position + 1 + 3 * i];
+           rules[i] = rule_position + 2 + 3 * i;
+           block_id = source[rules[i]];
+ 
+           Rule rule = new Rule(source[rule_position + 3 * i], src,
+               getTarget(target_address), loadFeatureVector(block_id), arity, owner);
+           estimated[block_id] = rule.estimateRuleCost(models);
+           precomputable[block_id] = rule.getPrecomputableCost();
+         }
+ 
+         Arrays.sort(rules, new Comparator<Integer>() {
+           public int compare(Integer a, Integer b) {
+             float a_cost = estimated[source[a]];
+             float b_cost = estimated[source[b]];
+             if (a_cost == b_cost)
+               return 0;
+             return (a_cost > b_cost ? -1 : 1);
+           }
+         });
+ 
+         int[] sorted = new int[3 * num_rules];
+         int j = 0;
+         for (int i = 0; i < rules.length; i++) {
+           int address = rules[i];
+           sorted[j++] = source[address - 2];
+           sorted[j++] = source[address - 1];
+           sorted[j++] = source[address];
+         }
+         for (int i = 0; i < sorted.length; i++)
+           source[rule_position + i] = sorted[i];
+ 
+         // Replace rules in cache with their sorted values on next getRules()
+         cached_rules.invalidate(this);
+         this.sorted = true;
+       }
+ 
+       @Override
+       public List<Rule> getSortedRules(List<FeatureFunction> featureFunctions) {
+         if (!isSorted())
+           sortRules(featureFunctions);
+         return getRules();
+       }
+ 
+       @Override
+       public int[] getSourceSide() {
+         return src;
+       }
+ 
+       @Override
+       public int getArity() {
+         return arity;
+       }
+ 
+       @Override
+       public Iterator<Integer> getTerminalExtensionIterator() {
+         return new PackedChildIterator(position, true);
+       }
+ 
+       @Override
+       public Iterator<Integer> getNonterminalExtensionIterator() {
+         return new PackedChildIterator(position, false);
+       }
+ 
+       public final class PackedChildIterator implements Iterator<Integer> {
+ 
+         private int current;
+         private boolean terminal;
+         private boolean done;
+         private int last;
+ 
+         PackedChildIterator(int position, boolean terminal) {
+           this.terminal = terminal;
+           int num_children = source[position];
+           done = (num_children == 0);
+           if (!done) {
+             current = (terminal ? position + 1 : position - 1 + 2 * num_children);
+             last = (terminal ? position - 1 + 2 * num_children : position + 1);
+           }
+         }
+ 
+         @Override
+         public boolean hasNext() {
+           if (done)
+             return false;
+           int next = (terminal ? current + 2 : current - 2);
+           if (next == last)
+             return false;
+           return (terminal ? source[next] > 0 : source[next] < 0);
+         }
+ 
+         @Override
+         public Integer next() {
+           if (done)
+             throw new RuntimeException("No more symbols!");
+           int symbol = source[current];
+           if (current == last)
+             done = true;
+           if (!done) {
+             current = (terminal ? current + 2 : current - 2);
+             done = (terminal ? source[current] < 0 : source[current] > 0);
+           }
+           return symbol;
+         }
+ 
+         @Override
+         public void remove() {
+           throw new UnsupportedOperationException();
+         }
+       }
+       
+       /**
+        * A packed phrase pair represents a rule of the form of a phrase pair, packed with the
+        * grammar-packer.pl script, which simply adds a nonterminal [X] to the left-hand side of
+        * all phrase pairs (and converts the Moses features). The packer then packs these. We have
+        * to then put a nonterminal on the source and target sides to treat the phrase pairs like
+        * left-branching rules, which is how Joshua deals with phrase decoding. 
+        * 
+        * @author Matt Post post@cs.jhu.edu
+        *
+        */
+       public final class PackedPhrasePair extends PackedRule {
+ 
+         private final Supplier<int[]> englishSupplier;
+         private final Supplier<byte[]> alignmentSupplier;
+ 
+         public PackedPhrasePair(int address) {
+           super(address);
+           englishSupplier = initializeEnglishSupplier();
+           alignmentSupplier = initializeAlignmentSupplier();
+         }
+ 
+         @Override
+         public int getArity() {
+           return PackedTrie.this.getArity() + 1;
+         }
+ 
+         /**
+          * Initialize a number of suppliers which get evaluated when their respective getters
+          * are called.
+          * Inner lambda functions are guaranteed to only be called once, because of this underlying
+          * structures are accessed in a threadsafe way.
+          * Guava's implementation makes sure only one read of a volatile variable occurs per get.
+          * This means this implementation should be as thread-safe and performant as possible.
+          */
+ 
+         private Supplier<int[]> initializeEnglishSupplier(){
+           Supplier<int[]> result = Suppliers.memoize(() ->{
+             int[] phrase = getTarget(source[address + 1]);
+             int[] tgt = new int[phrase.length + 1];
+             tgt[0] = -1;
+             for (int i = 0; i < phrase.length; i++)
+               tgt[i+1] = phrase[i];
+             return tgt;
+           });
+           return result;
+         }
+ 
+         private Supplier<byte[]> initializeAlignmentSupplier(){
+           Supplier<byte[]> result = Suppliers.memoize(() ->{
+             byte[] raw_alignment = getAlignmentArray(source[address + 2]);
+             byte[] points = new byte[raw_alignment.length + 2];
+             points[0] = points[1] = 0;
+             for (int i = 0; i < raw_alignment.length; i++)
+               points[i + 2] = (byte) (raw_alignment[i] + 1);
+             return points;
+           });
+           return result;
+         }
+ 
+         /**
+          * Take the English phrase of the underlying rule and prepend an [X].
+          * 
+          * @return the augmented phrase
+          */
+         @Override
+         public int[] getEnglish() {
+           return this.englishSupplier.get();
+         }
+         
+         /**
+          * Take the French phrase of the underlying rule and prepend an [X].
+          * 
+          * @return the augmented French phrase
+          */
+         @Override
+         public int[] getFrench() {
+           int phrase[] = new int[src.length + 1];
+           int ntid = Vocabulary.id(PackedGrammar.this.joshuaConfiguration.default_non_terminal);
+           phrase[0] = ntid;
+           System.arraycopy(src,  0, phrase, 1, src.length);
+           return phrase;
+         }
+         
+         /**
+          * Similarly the alignment array needs to be shifted over by one.
+          * 
+          * @return the byte[] alignment
+          */
+         @Override
+         public byte[] getAlignment() {
+           // if no alignments in grammar do not fail
+           if (alignments == null) {
+             return null;
+           }
+ 
+           return this.alignmentSupplier.get();
+         }
+       }
+ 
+       public class PackedRule extends Rule {
+         protected final int address;
+         private final Supplier<int[]> englishSupplier;
+         private final Supplier<FeatureVector> featureVectorSupplier;
+         private final Supplier<byte[]> alignmentsSupplier;
+ 
+         public PackedRule(int address) {
+           this.address = address;
+           this.englishSupplier = intializeEnglishSupplier();
+           this.featureVectorSupplier = initializeFeatureVectorSupplier();
+           this.alignmentsSupplier = initializeAlignmentsSupplier();
+         }
+ 
+         private Supplier<int[]> intializeEnglishSupplier(){
+           Supplier<int[]> result = Suppliers.memoize(() ->{
+             return getTarget(source[address + 1]);
+           });
+           return result;
+         }
+ 
+         private Supplier<FeatureVector> initializeFeatureVectorSupplier(){
+           Supplier<FeatureVector> result = Suppliers.memoize(() ->{
+             return loadFeatureVector(source[address + 2]);
+          });
+           return result;
+         }
+ 
+         private Supplier<byte[]> initializeAlignmentsSupplier(){
+           Supplier<byte[]> result = Suppliers.memoize(()->{
+             // if no alignments in grammar do not fail
+             if (alignments == null){
+               return null;
+             }
+             return getAlignmentArray(source[address + 2]);
+           });
+           return result;
+         }
+ 
+         @Override
+         public void setArity(int arity) {
+         }
+ 
+         @Override
+         public int getArity() {
+           return PackedTrie.this.getArity();
+         }
+ 
+         @Override
 -        public void setOwner(int ow) {
++        public void setOwner(OwnerId owner) {
+         }
+ 
+         @Override
 -        public int getOwner() {
++        public OwnerId getOwner() {
+           return owner;
+         }
+ 
+         @Override
+         public void setLHS(int lhs) {
+         }
+ 
+         @Override
+         public int getLHS() {
+           return source[address];
+         }
+ 
+         @Override
+         public void setEnglish(int[] eng) {
+         }
+ 
+         @Override
+         public int[] getEnglish() {
+           return this.englishSupplier.get();
+         }
+ 
+         @Override
+         public void setFrench(int[] french) {
+         }
+ 
+         @Override
+         public int[] getFrench() {
+           return src;
+         }
+ 
+         @Override
+         public FeatureVector getFeatureVector() {
+           return this.featureVectorSupplier.get();
+         }
+         
+         @Override
+         public byte[] getAlignment() {
+           return this.alignmentsSupplier.get();
+         }
+         
+         @Override
+         public String getAlignmentString() {
+             throw new RuntimeException("AlignmentString not implemented for PackedRule!");
+         }
+ 
+         @Override
+         public float getEstimatedCost() {
+           return estimated[source[address + 2]];
+         }
+ 
+ //        @Override
+ //        public void setPrecomputableCost(float cost) {
+ //          precomputable[source[address + 2]] = cost;
+ //        }
+ 
+         @Override
+         public float getPrecomputableCost() {
+           return precomputable[source[address + 2]];
+         }
+ 
+         @Override
+         public float estimateRuleCost(List<FeatureFunction> models) {
+           return estimated[source[address + 2]];
+         }
+ 
+         @Override
+         public String toString() {
+           StringBuffer sb = new StringBuffer();
+           sb.append(Vocabulary.word(this.getLHS()));
+           sb.append(" ||| ");
+           sb.append(getFrenchWords());
+           sb.append(" ||| ");
+           sb.append(getEnglishWords());
+           sb.append(" |||");
+           sb.append(" " + getFeatureVector());
+           sb.append(String.format(" ||| %.3f", getEstimatedCost()));
+           return sb.toString();
+         }
+       }
+     }
+   }
+ 
+   @Override
+   public void addOOVRules(int word, List<FeatureFunction> featureFunctions) {
+     throw new RuntimeException("PackedGrammar.addOOVRules(): I can't add OOV rules");
+   }
+   
+   @Override
+   public void addRule(Rule rule) {
+     throw new RuntimeException("PackedGrammar.addRule(): I can't add rules");
+   }
+   
+   /** 
+    * Read the config file
+    * 
+    * TODO: this should be rewritten using typeconfig.
+    * 
+    * @param config
+    * @throws IOException
+    */
+   private void readConfig(String config) throws IOException {
+     int version = 0;
+     
+     for (String line: new LineReader(config)) {
+       String[] tokens = line.split(" = ");
+       if (tokens[0].equals("max-source-len"))
+         this.maxSourcePhraseLength = Integer.parseInt(tokens[1]);
+       else if (tokens[0].equals("version")) {
+         version = Integer.parseInt(tokens[1]);
+       }
+     }
+     
+     if (version != 3) {
+       String message = String.format("The grammar at %s was packed with packer version %d, but the earliest supported version is %d",
+           this.grammarDir, version, SUPPORTED_VERSION);
+       throw new RuntimeException(message);
+     }
+   }
+ }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index 0000000,a6edddd..c5d2398
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@@ -1,0 -1,180 +1,179 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.hypergraph;
+ 
+ import java.io.PrintStream;
+ import java.util.HashSet;
+ 
+ import org.apache.joshua.corpus.Vocabulary;
+ import org.apache.joshua.decoder.JoshuaConfiguration;
+ import org.apache.joshua.decoder.ff.tm.Grammar;
+ import org.apache.joshua.decoder.ff.tm.Rule;
+ import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
+ import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+ import org.apache.joshua.util.FormatUtils;
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ 
+ /**
+  * This walker function builds up a new context-free grammar by visiting each node in a hypergraph.
+  * For a quick overview, see Chris Dyer's 2010 NAACL paper
+  * "Two monlingual parses are better than one (synchronous parse)".
+  * <p>
+  * From a functional-programming point of view, this walker really wants to calculate a fold over
+  * the entire hypergraph: the initial value is an empty grammar, and as we visit each node, we add
+  * more rules to the grammar. After we have traversed the whole hypergraph, the resulting grammar
+  * will contain all rules needed for synchronous parsing.
+  * <p>
+  * These rules look just like the rules already present in the hypergraph, except that each
+  * non-terminal symbol is annotated with the span of its node.
+  */
+ public class GrammarBuilderWalkerFunction implements WalkerFunction {
+ 
+   private static final Logger LOG = LoggerFactory.getLogger(GrammarBuilderWalkerFunction.class);
+ 
+   private MemoryBasedBatchGrammar grammar;
+   private static HieroFormatReader reader = new HieroFormatReader();
+   private PrintStream outStream;
+   private int goalSymbol;
+   private HashSet<Rule> rules;
+ 
+   public GrammarBuilderWalkerFunction(String goal,JoshuaConfiguration joshuaConfiguration) {
 -    grammar = new MemoryBasedBatchGrammar(reader,joshuaConfiguration);
 -    grammar.setSpanLimit(1000);
++    grammar = new MemoryBasedBatchGrammar(reader, joshuaConfiguration, 1000);
+     outStream = null;
+     goalSymbol = Vocabulary.id(goal);
+     rules = new HashSet<Rule>();
+   }
+ 
+   public GrammarBuilderWalkerFunction(String goal, PrintStream out,JoshuaConfiguration joshuaConfiguration) {
+     this(goal,joshuaConfiguration);
+     outStream = out;
+   }
+ 
+   public void apply(HGNode node, int index) {
+     // System.err.printf("VISITING NODE: %s\n", getLabelWithSpan(node));
+     for (HyperEdge e : node.hyperedges) {
+       Rule r = getRuleWithSpans(e, node);
+       if (r != null && !rules.contains(r)) {
+         if (outStream != null) outStream.println(r);
+         grammar.addRule(r);
+         rules.add(r);
+       }
+     }
+   }
+ 
+   private static int getLabelWithSpan(HGNode node) {
+     return Vocabulary.id(getLabelWithSpanAsString(node));
+   }
+ 
+   private static String getLabelWithSpanAsString(HGNode node) {
+     String label = Vocabulary.word(node.lhs);
+     String unBracketedCleanLabel = label.substring(1, label.length() - 1);
+     return String.format("[%d-%s-%d]", node.i, unBracketedCleanLabel, node.j);
+   }
+ 
+   private boolean nodeHasGoalSymbol(HGNode node) {
+     return node.lhs == goalSymbol;
+   }
+ 
+   private Rule getRuleWithSpans(HyperEdge edge, HGNode head) {
+     Rule edgeRule = edge.getRule();
+     int headLabel = getLabelWithSpan(head);
+     // System.err.printf("Head label: %s\n", headLabel);
+     // if (edge.getAntNodes() != null) {
+     // for (HGNode n : edge.getAntNodes())
+     // System.err.printf("> %s\n", getLabelWithSpan(n));
+     // }
+     int[] source = getNewSource(nodeHasGoalSymbol(head), edge);
+     // if this would be unary abstract, getNewSource will be null
+     if (source == null) return null;
+     int[] target = getNewTargetFromSource(source);
+     Rule result =
+         new Rule(headLabel, source, target, edgeRule.getFeatureString(), edgeRule.getArity());
+     // System.err.printf("new rule is %s\n", result);
+     return result;
+   }
+ 
+   private static int[] getNewSource(boolean isGlue, HyperEdge edge) {
+     Rule rule = edge.getRule();
+     int[] english = rule.getEnglish();
+     // if this is a unary abstract rule, just return null
+     // TODO: except glue rules!
+     if (english.length == 1 && english[0] < 0 && !isGlue) return null;
+     int[] result = new int[english.length];
+     for (int i = 0; i < english.length; i++) {
+       int curr = english[i];
+       if (! FormatUtils.isNonterminal(curr)) {
+         // If it's a terminal symbol, we just copy it into the new rule.
+         result[i] = curr;
+       } else {
+         // If it's a nonterminal, its value is -N, where N is the index
+         // of the nonterminal on the source side.
+         //
+         // That is, if we would call a nonterminal "[X,2]", the value of
+         // curr at this point is -2. And the tail node that it points at
+         // is #1 (since getTailNodes() is 0-indexed).
+         int index = -curr - 1;
+         result[i] = getLabelWithSpan(edge.getTailNodes().get(index));
+       }
+     }
+     // System.err.printf("source: %s\n", result);
+     return result;
+   }
+ 
+   private static int[] getNewTargetFromSource(int[] source) {
+     int[] result = new int[source.length];
+     int currNT = -1; // value to stick into NT slots
+     for (int i = 0; i < source.length; i++) {
+       result[i] = source[i];
+       if (FormatUtils.isNonterminal(result[i])) {
+         result[i] = currNT;
+         currNT--;
+       }
+     }
+     // System.err.printf("target: %s\n", result);
+     return result;
+   }
+ 
+   private static HGNode getGoalSymbolNode(HGNode root) {
+     if (root.hyperedges == null || root.hyperedges.size() == 0) {
+       LOG.error("getGoalSymbolNode: root node has no hyperedges");
+       return null;
+     }
+     return root.hyperedges.get(0).getTailNodes().get(0);
+   }
+ 
+ 
+   public static int goalSymbol(HyperGraph hg) {
+     if (hg.goalNode == null) {
+       LOG.error("goalSymbol: goalNode of hypergraph is null");
+       return -1;
+     }
+     HGNode symbolNode = getGoalSymbolNode(hg.goalNode);
+     if (symbolNode == null) return -1;
+     // System.err.printf("goalSymbol: %s\n", result);
+     // System.err.printf("symbol node LHS is %d\n", symbolNode.lhs);
+     // System.err.printf("i = %d, j = %d\n", symbolNode.i, symbolNode.j);
+     return getLabelWithSpan(symbolNode);
+   }
+ 
+   public Grammar getGrammar() {
+     return grammar;
+   }
+ }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
index 0000000,27f5525..51bd9d6
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
@@@ -1,0 -1,176 +1,176 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.hypergraph;
+ 
+ import java.util.HashMap;
+ 
+ import org.apache.joshua.corpus.Vocabulary;
+ 
+ /**
+  * during the pruning process, many Item/Deductions may not be explored at all due to the early-stop
+  * in pruning_deduction
+  * 
+  * @author Zhifei Li, zhifei.work@gmail.com
+  * @version $LastChangedDate$
+  */
+ public class HyperGraphPruning extends TrivialInsideOutside {
+ 
+   HashMap<HGNode, Boolean> processedNodesTbl = new HashMap<HGNode, Boolean>();
+   double bestLogProb;// viterbi unnormalized log prob in the hypergraph
+ 
+   boolean ViterbiPruning = false;// Viterbi or Posterior pruning
+ 
+   boolean fixThresholdPruning = true;
+   double THRESHOLD_GENERAL = 10;// if the merit is worse than the best_log_prob by this number, then
+                                 // prune
+   double THRESHOLD_GLUE = 10;// if the merit is worse than the best_log_prob by this number, then
+                              // prune
+ 
+   int numSurvivedEdges = 0;
+   int numSurvivedNodes = 0;
+ 
+   int glueGrammarOwner = 0;// TODO
+ 
+ 
+   public HyperGraphPruning(boolean fixThreshold, double thresholdGeneral, double thresholdGlue) {
+     fixThresholdPruning = fixThreshold;
+     THRESHOLD_GENERAL = thresholdGeneral;
+     THRESHOLD_GLUE = thresholdGlue;
+     glueGrammarOwner = Vocabulary.id("glue");// TODO
+   }
+ 
+   public void clearState() {
+     processedNodesTbl.clear();
+     super.clearState();
+   }
+ 
+ 
+   // ######################### pruning here ##############
+   public void pruningHG(HyperGraph hg) {
+ 
+     runInsideOutside(hg, 2, 1, 1.0);// viterbi-max, log-semiring
+ 
+     if (fixThresholdPruning) {
+       pruningHGHelper(hg);
+       super.clearState();
+     } else {
+       throw new RuntimeException("wrong call");
+     }
+   }
+ 
+   private void pruningHGHelper(HyperGraph hg) {
+ 
+     this.bestLogProb = getLogNormalizationConstant();// set the best_log_prob
+ 
+     numSurvivedEdges = 0;
+     numSurvivedNodes = 0;
+     processedNodesTbl.clear();
+     pruningNode(hg.goalNode);
+ 
+     // clear up
+     processedNodesTbl.clear();
+ 
+     System.out.println("Item suvived ratio: " + numSurvivedNodes * 1.0 / hg.numNodes + " =  "
+         + numSurvivedNodes + "/" + hg.numNodes);
+     System.out.println("Deduct suvived ratio: " + numSurvivedEdges * 1.0 / hg.numEdges + " =  "
+         + numSurvivedEdges + "/" + hg.numEdges);
+   }
+ 
+ 
+   private void pruningNode(HGNode it) {
+ 
+     if (processedNodesTbl.containsKey(it)) return;
+ 
+     processedNodesTbl.put(it, true);
+     boolean shouldSurvive = false;
+ 
+     // ### recursive call on each deduction
+     for (int i = 0; i < it.hyperedges.size(); i++) {
+       HyperEdge dt = it.hyperedges.get(i);
+       boolean survived = pruningEdge(dt, it);// deduction-specifc operation
+       if (survived) {
+         shouldSurvive = true; // at least one deduction survive
+       } else {
+         it.hyperedges.remove(i);
+         i--;
+       }
+     }
+     // TODO: now we simply remove the pruned deductions, but in general, we may want to update the
+     // variables mainted in the item (e.g., best_deduction); this depends on the pruning method used
+ 
+     /*
+      * by defintion: "should_surive==false" should be impossible, since if I got called, then my
+      * upper-deduction must survive, then i will survive because there must be one way to reach me
+      * from lower part in order for my upper-deduction survive
+      */
+     if (!shouldSurvive) {
+       throw new RuntimeException("item explored but does not survive");
+       // TODO: since we always keep the best_deduction, this should never be true
+     } else {
+       numSurvivedNodes++;
+     }
+   }
+ 
+ 
+   // if survive, return true
+   // best-deduction is always kept
+   private boolean pruningEdge(HyperEdge dt, HGNode parent) {
+ 
+     /**
+      * TODO: theoretically, if an item is get called, then its best deduction should always be kept
+      * even just by the threshold-checling. In reality, due to precision of Double, the
+      * threshold-checking may not be perfect
+      */
+     if (dt != parent.bestHyperedge) { // best deduction should always survive if the Item is get
+                                       // called
+       // ### prune?
+       if (shouldPruneHyperedge(dt, parent)) {
+         return false; // early stop
+       }
+     }
+ 
+     // ### still survive, recursive call all my ant-items
+     if (null != dt.getTailNodes()) {
+       for (HGNode ant_it : dt.getTailNodes()) {
+         pruningNode(ant_it); // recursive call on each ant item, note: the ant_it will not be pruned
+                              // as I need it
+       }
+     }
+ 
+     // ### if get to here, then survive; remember: if I survive, then my upper-item must survive
+     numSurvivedEdges++;
+     return true; // survive
+   }
+ 
+   private boolean shouldPruneHyperedge(HyperEdge dt, HGNode parent) {
+ 
+     // ### get merit
+     double postLogProb = getEdgeUnormalizedPosteriorLogProb(dt, parent);
+ 
+ 
 -    if (dt.getRule() != null && dt.getRule().getOwner() == glueGrammarOwner
++    if (dt.getRule() != null && dt.getRule().getOwner().equals(glueGrammarOwner)
+         && dt.getRule().getArity() == 2) { // specicial rule: S->S X
+       // TODO
+       return (postLogProb - this.bestLogProb < THRESHOLD_GLUE);
+     } else {
+       return (postLogProb - this.bestLogProb < THRESHOLD_GENERAL);
+     }
+   }
+ 
+ }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
index 0000000,9c3899e..90a550b
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
@@@ -1,0 -1,159 +1,159 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.io;
+ 
+ import java.util.ArrayList;
+ import java.util.List;
+ 
+ import com.google.gson.Gson;
+ import com.google.gson.GsonBuilder;
+ 
+ import org.apache.joshua.decoder.StructuredTranslation;
+ import org.apache.joshua.decoder.Translation;
+ 
+ /**
+  * Represents a JSON object returned by the server. The object has the format
+  * 
+  * { data: { 
+  *   translations: [
+  *     { annotatedSource: "",
+  *       translatedText: "",
+  *       raw_nbest: [
+  *         { hyp: "",
+  *           totalScore: 0.0, } ]
+  *       tokenization: { ... }
+  *       translatedTextRaw: "",
+  *       nbest: [
+  *         { translatedText: "",
+  *           translatedTextRaw: "",
+  *           tokenization: { ... } } ] } ] } }
+  * 
+  * @author post
+  */
+ 
+ public class JSONMessage {
+   public Data data = null;
+   public List<String> metadata = null;
+   public JSONMessage() {
+     metadata = new ArrayList<String>();
+   }
+   
+   public class Data {
+     public List<TranslationItem> translations;
+     
+     public Data() {
+       translations = new ArrayList<TranslationItem>();
+     }
+   }
+ //
+ //  public class Metadata {
+ //    public String metadata = null;
+ //    public List<String> rules = null;
+ //
+ //    public Metadata() {
+ //      rules = new ArrayList<String>();
+ //    }
+ //  }
+ 
+   public void addTranslation(Translation translation) {
+     String viterbi = translation.getStructuredTranslations().get(0).getTranslationString();
+     
+     TranslationItem item = addTranslation(viterbi);
+ 
+     for (StructuredTranslation hyp: translation.getStructuredTranslations()) {
 -      String text = hyp.getFormattedTranslationString();
++      String text = hyp.getTranslationString();
+       float score = hyp.getTranslationScore();
+ 
+       item.addHypothesis(text, score);
+     }
+     
+       // old string-based k-best output
+   //    String[] results = translation.toString().split("\\n");
+   //    if (results.length > 0) {
+   //      String rawTranslation = results[0].split(" \\|\\|\\| ")[1];
+   //      JSONMessage.TranslationItem item = message.addTranslation(rawTranslation);
+   //
+   //      for (String result: results) {
+   //        String[] tokens = result.split(" \\|\\|\\| ");
+   //        String rawResult = tokens[1];
+   //        float score = Float.parseFloat(tokens[3]);
+   //        item.addHypothesis(rawResult, score);
+   //      }
+   //    }
+     }
+ 
+   /**
+    * Adds a new Translation to the JSON object. A Translation represents one or more hypotheses
+    * (or k-best items)
+    * 
 -   * @param text
++   * @param text the translation text
+    * @return the new TranslationItem object
+    */
+   public TranslationItem addTranslation(String text) {
+     if (data == null)
+       data = new Data();
+     
+     TranslationItem newItem = new TranslationItem(text);
+     data.translations.add(newItem);
+     return newItem;
+   }
+   
+   public void addMetaData(String msg) {
+     this.metadata.add(msg);
+   }
+ 
+   public class TranslationItem {
+     public String translatedText;
+     public List<NBestItem> raw_nbest;
+     
+     public TranslationItem(String value) {
+       this.translatedText = value;
+       this.raw_nbest = new ArrayList<NBestItem>();
+     }
+     
+     /**
+      * Adds a new item to the translation's list of k-best items
+      * 
+      * @param hyp the hypothesis
+      * @param score its score
+      */
+     public void addHypothesis(String hyp, float score) {
+       this.raw_nbest.add(new NBestItem(hyp, score));
+     }
+   }
+   
+   public class NBestItem {
+     public String hyp;
+     public float totalScore;
+     
+     public NBestItem(String hyp, float score) {
+       this.hyp = hyp;
+       this.totalScore = score;  
+     }
+   }
+   
+   public void addRule(String rule) {
+     metadata.add("custom_rule " + rule);
+   }
+ 
+   public String toString() {
+     Gson gson = new GsonBuilder().setPrettyPrinting().create();
+     return gson.toJson(this) + "\n";
+   }
+ }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 0000000,27f92ac..0e03dba
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@@ -1,0 -1,181 +1,183 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.phrase;
+ 
++import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
++
+ import java.io.File;
+ import java.io.IOException;
+ import java.util.List;
+ 
+ import org.apache.joshua.corpus.Vocabulary;
+ import org.apache.joshua.decoder.JoshuaConfiguration;
+ import org.apache.joshua.decoder.ff.FeatureFunction;
+ import org.apache.joshua.decoder.ff.tm.Grammar;
++import org.apache.joshua.decoder.ff.tm.OwnerId;
+ import org.apache.joshua.decoder.ff.tm.Rule;
+ import org.apache.joshua.decoder.ff.tm.RuleCollection;
+ import org.apache.joshua.decoder.ff.tm.Trie;
+ import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+ import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
+ 
+ /**
+  * Represents a phrase table, and is implemented as a wrapper around either a {@link PackedGrammar}
+  * or a {@link MemoryBasedBatchGrammar}.
+  * 
+  * TODO: this should all be implemented as a two-level trie (source trie and target trie).
+  */
+ public class PhraseTable implements Grammar {
+   
+   private JoshuaConfiguration config;
+   private Grammar backend;
+   
+   /**
+    * Chain to the super with a number of defaults. For example, we only use a single nonterminal,
+    * and there is no span limit.
+    * 
+    * @param grammarFile file path parent directory
+    * @param owner used to set phrase owners
+    * @param type the grammar specification keyword (e.g., "thrax" or "moses")
+    * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+    * @throws IOException if there is an error reading the grammar file
+    */
+   public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config) 
+       throws IOException {
+     this.config = config;
+     int spanLimit = 0;
+     
+     if (grammarFile != null && new File(grammarFile).isDirectory()) {
+       this.backend = new PackedGrammar(grammarFile, spanLimit, owner, type, config);
+       if (this.backend.getMaxSourcePhraseLength() == -1) {
+         String msg = "FATAL: Using a packed grammar for a phrase table backend requires that you "
+             + "packed the grammar with Joshua 6.0.2 or greater";
+         throw new RuntimeException(msg);
+       }
+ 
+     } else {
+       this.backend = new MemoryBasedBatchGrammar(type, grammarFile, owner, "[X]", spanLimit, config);
+     }
+   }
+   
+   public PhraseTable(String owner, JoshuaConfiguration config) {
+     this.config = config;
 -    
 -    this.backend = new MemoryBasedBatchGrammar(owner, config);
++    this.backend = new MemoryBasedBatchGrammar(owner, config, 20);
+   }
+       
+   /**
+    * Returns the longest source phrase read. Because phrases have a dummy nonterminal prepended to
+    * them, we need to subtract 1.
+    * 
+    * @return the longest source phrase read.
+    */
+   @Override
+   public int getMaxSourcePhraseLength() {
+     return this.backend.getMaxSourcePhraseLength() - 1;
+   }
+ 
+   /**
+    * Collect the set of target-side phrases associated with a source phrase.
+    * 
+    * @param sourceWords the sequence of source words
+    * @return the rules
+    */
+   public RuleCollection getPhrases(int[] sourceWords) {
+     if (sourceWords.length != 0) {
+       Trie pointer = getTrieRoot();
+       pointer = pointer.match(Vocabulary.id("[X]"));
+       int i = 0;
+       while (pointer != null && i < sourceWords.length)
+         pointer = pointer.match(sourceWords[i++]);
+ 
+       if (pointer != null && pointer.hasRules()) {
+         return pointer.getRuleCollection();
+       }
+     }
+ 
+     return null;
+   }
+ 
+   /**
+    * Adds a rule to the grammar. Only supported when the backend is a MemoryBasedBatchGrammar.
+    * 
+    * @param rule the rule to add
+    */
+   public void addRule(Rule rule) {
+     ((MemoryBasedBatchGrammar)backend).addRule(rule);
+   }
+   
+   @Override
+   public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
+     // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now almost
+     // certainly is)
+     int targetWord = config.mark_oovs
+         ? Vocabulary.id(Vocabulary.word(sourceWord) + "_OOV")
+         : sourceWord;   
+ 
+     int nt_i = Vocabulary.id("[X]");
+     Rule oovRule = new Rule(nt_i, new int[] { nt_i, sourceWord },
 -        new int[] { -1, targetWord }, "", 1, null);
++        new int[] { -1, targetWord }, "", 1, UNKNOWN_OWNER);
+     addRule(oovRule);
+     oovRule.estimateRuleCost(featureFunctions);
+         
+ //    String ruleString = String.format("[X] ||| [X,1] %s ||| [X,1] %s", 
+ //        Vocabulary.word(sourceWord), Vocabulary.word(targetWord));
+ //    Rule oovRule = new HieroFormatReader().parseLine(ruleString);
+ //    oovRule.setOwner(Vocabulary.id("oov"));
+ //    addRule(oovRule);
+ //    oovRule.estimateRuleCost(featureFunctions);
+   }
+ 
+   @Override
+   public Trie getTrieRoot() {
+     return backend.getTrieRoot();
+   }
+ 
+   @Override
+   public void sortGrammar(List<FeatureFunction> models) {
+     backend.sortGrammar(models);    
+   }
+ 
+   @Override
+   public boolean isSorted() {
+     return backend.isSorted();
+   }
+ 
+   /**
+    * This should never be called. 
+    */
+   @Override
+   public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
+     return true;
+   }
+ 
+   @Override
+   public int getNumRules() {
+     return backend.getNumRules();
+   }
+ 
+   @Override
 -  public int getOwner() {
++  public OwnerId getOwner() {
+     return backend.getOwner();
+   }
+ 
+   @Override
+   public int getNumDenseFeatures() {
+     return backend.getNumDenseFeatures();
+   }
+ }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1bb8a203/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 0000000,c688b2c..8c092ec
mode 000000,100644..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@@ -1,0 -1,269 +1,271 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *  http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.joshua.decoder.phrase;
+ 
+ /***
+  * Entry point for phrase-based decoding, analogous to {@link Chart} for the CKY algorithm. This
+  * class organizes all the stacks used for decoding, and is responsible for building them. Stack
+  * construction is stack-centric: that is, we loop over the number of source words in increasing sizes;
+  * at each step of this iteration, we break the search between smaller stack sizes and source-side
+  * phrase sizes.
+  * 
+  * The end result of decoding is a {@link Hypergraph} with the same format as hierarchical decoding.
+  * Phrases are treating as left-branching rules, and the span information (i,j) is overloaded so
+  * that i means nothing and j represents the index of the last-translated source word in each
+  * hypothesis. This means that most hypergraph code can work without modification. The algorithm 
+  * ensures that the coverage vector is consistent but the resulting hypergraph may not be projective,
+  * which is different from the CKY algorithm, which does produce projective derivations. 
+  * 
+  * TODO Lattice decoding is not yet supported (March 2015).
+  */
+ 
++import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
++
+ import java.util.ArrayList;
+ import java.util.List;
+ 
+ import org.apache.joshua.corpus.Span;
+ import org.apache.joshua.decoder.JoshuaConfiguration;
+ import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+ import org.apache.joshua.decoder.ff.FeatureFunction;
+ import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
+ import org.apache.joshua.decoder.ff.tm.Grammar;
+ import org.apache.joshua.decoder.hypergraph.HGNode;
+ import org.apache.joshua.decoder.hypergraph.HyperEdge;
+ import org.apache.joshua.decoder.hypergraph.HyperGraph;
+ import org.apache.joshua.decoder.segment_file.Sentence;
+ import org.slf4j.Logger;
+ import org.slf4j.LoggerFactory;
+ 
+ public class Stacks {
+ 
+   private static final Logger LOG = LoggerFactory.getLogger(Stacks.class);
+ 
+   // The list of stacks, grouped according to number of source words covered
+   private List<Stack> stacks;
+ 
+   // The end state
+   private Hypothesis end;
+   
+   List<FeatureFunction> featureFunctions;
+ 
+   private Sentence sentence;
+ 
+   private JoshuaConfiguration config;
+ 
+   /* Contains all the phrase tables */
+   private PhraseChart chart;
+   
+   /**
+    * Entry point. Initialize everything. Create pass-through (OOV) phrase table and glue phrase
+    * table (with start-of-sentence and end-of-sentence rules).
+    * 
+    * @param sentence input to {@link org.apache.joshua.lattice.Lattice}
+    * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+    * @param grammars an array of {@link org.apache.joshua.decoder.ff.tm.Grammar}'s
+    * @param config a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+    */
+   public Stacks(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars, 
+       JoshuaConfiguration config) {
+ 
+     this.sentence = sentence;
+     this.featureFunctions = featureFunctions;
+     this.config = config;
+     
+     int num_phrase_tables = 0;
+     for (int i = 0; i < grammars.length; i++)
+       if (grammars[i] instanceof PhraseTable)
+         ++num_phrase_tables;
+     
+     PhraseTable[] phraseTables = new PhraseTable[num_phrase_tables + 2];
+     for (int i = 0, j = 0; i < grammars.length; i++)
+       if (grammars[i] instanceof PhraseTable)
+         phraseTables[j++] = (PhraseTable) grammars[i];
+     
 -    phraseTables[phraseTables.length - 2] = new PhraseTable("null", config);
++    phraseTables[phraseTables.length - 2] = new PhraseTable(UNKNOWN_OWNER, config);
+     phraseTables[phraseTables.length - 2].addRule(Hypothesis.END_RULE);
+     
+     phraseTables[phraseTables.length - 1] = new PhraseTable("oov", config);
+     AbstractGrammar.addOOVRules(phraseTables[phraseTables.length - 1], sentence.getLattice(), featureFunctions, config.true_oovs_only);
+     
+     this.chart = new PhraseChart(phraseTables, featureFunctions, sentence, config.num_translation_options);
+   }
+   
+   
+   /**
+    * The main algorithm. Returns a hypergraph representing the search space.
+    * 
+    * @return a {@link org.apache.joshua.decoder.hypergraph.HyperGraph} representing the search space
+    */
+   public HyperGraph search() {
+     
+     long startTime = System.currentTimeMillis();
+     
+     Future future = new Future(chart);
+     stacks = new ArrayList<Stack>();
+     
+     // <s> counts as the first word. Pushing null lets us count from one.
+     stacks.add(null);
+ 
+     // Initialize root hypothesis with <s> context and future cost for everything.
+     ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, Hypothesis.BEGIN_RULE,
+         null, -1, 1, null, this.sentence);
+     Stack firstStack = new Stack(featureFunctions, sentence, config);
+     firstStack.add(new Hypothesis(result.getDPStates(), future.Full()));
+     stacks.add(firstStack);
+     
+     // Decode with increasing numbers of source words. 
+     for (int source_words = 2; source_words <= sentence.length(); ++source_words) {
+       Stack targetStack = new Stack(featureFunctions, sentence, config);
+       stacks.add(targetStack);
+ 
+       // Iterate over stacks to continue from.
+       for (int phrase_length = 1; phrase_length <= Math.min(source_words - 1, chart.MaxSourcePhraseLength());
+           phrase_length++) {
+         int from_stack = source_words - phrase_length;
+         Stack tailStack = stacks.get(from_stack);
+ 
+         LOG.debug("WORDS {} MAX {} (STACK {} phrase_length {})", source_words,
+             chart.MaxSourcePhraseLength(), from_stack, phrase_length);
+         
+         // Iterate over antecedents in this stack.
+         for (Coverage coverage: tailStack.getCoverages()) {
+           ArrayList<Hypothesis> hypotheses = tailStack.get(coverage); 
+           
+           // the index of the starting point of the first possible phrase
+           int begin = coverage.firstZero();
+           
+           // the absolute position of the ending spot of the last possible phrase
+           int last_end = Math.min(coverage.firstZero() + config.reordering_limit, chart.SentenceLength());
+           int last_begin = (last_end > phrase_length) ? (last_end - phrase_length) : 0;
+ 
+           for (begin = coverage.firstZero(); begin <= last_begin; begin++) {
+             if (!coverage.compatible(begin, begin + phrase_length) ||
+                 ! permissible(coverage, begin, begin + phrase_length)) {
+               continue;
+             }
+ 
+             Span span = new Span(begin, begin + phrase_length);
+ 
+             // Don't append </s> until the end
+             if (begin == sentence.length() - 1 && source_words != sentence.length()) 
+               continue;            
+ 
+             TargetPhrases phrases = chart.getRange(begin, begin + phrase_length);
+             if (phrases == null)
+               continue;
+ 
+ 
+             LOG.debug("Applying {} target phrases over [{}, {}]",
+                 phrases.size(), begin, begin + phrase_length);
+             
+             // TODO: could also compute some number of features here (e.g., non-LM ones)
+             // float score_delta = context.GetScorer().transition(ant, phrases, begin, begin + phrase_length);
+             
+             // Future costs: remove span to be filled.
+             float future_delta = future.Change(coverage, begin, begin + phrase_length);
+             
+             /* This associates with each span a set of hypotheses that can be extended by
+              * phrases from that span. The hypotheses are wrapped in HypoState objects, which
+              * augment the hypothesis score with a future cost.
+              */
+             Candidate cand = new Candidate(hypotheses, phrases, span, future_delta);
+             targetStack.addCandidate(cand);
+           }
+         }
+       }
+ 
+       /* At this point, every vertex contains a list of all existing hypotheses that the target
+        * phrases in that vertex could extend. Now we need to create the search object, which
+        * implements cube pruning. There are up to O(n^2) cubes, n the size of the current stack,
+        * one cube each over each span of the input. Each "cube" has two dimensions: one representing
+        * the target phrases over the span, and one representing all of these incoming hypotheses.
+        * We seed the chart with the best item in each cube, and then repeatedly pop and extend.
+        */
+       
+ //      System.err.println(String.format("\nBuilding cube-pruning chart for %d words", source_words));
+ 
+       targetStack.search();
+     }
+     
+     LOG.info("Input {}: Search took {} seconds", sentence.id(),
+         (System.currentTimeMillis() - startTime) / 1000.0f);
+     
+     return createGoalNode();
+   }
+     
+   /**
+    * Enforces reordering constraints. Our version of Moses' ReorderingConstraint::Check() and
+    * SearchCubePruning::CheckDistortion(). 
+    * 
+    * @param coverage
+    * @param begin
+    * @param i
+    * @return
+    */
+   private boolean permissible(Coverage coverage, int begin, int end) {
+     int firstZero = coverage.firstZero();
+ 
+     if (config.reordering_limit < 0)
+       return true;
+     
+     /* We can always start with the first zero since it doesn't create a reordering gap
+      */
+     if (begin == firstZero)
+       return true;
+ 
+     /* If a gap is created by applying this phrase, make sure that you can reach the first
+      * zero later on without violating the distortion constraint.
+      */
+     if (end - firstZero > config.reordering_limit) {
+       return false;
+     }
+     
+     return true;
+   }
+ 
+ 
+   /**
+    * Searches through the goal stack, calling the final transition function on each node, and then returning
+    * the best item. Usually the final transition code doesn't add anything, because all features
+    * have already computed everything they need to. The standard exception is language models that
+    * have not yet computed their prefix probabilities (which is not the case with KenLM, the default).
+    * 
+    * @return
+    */
+   private HyperGraph createGoalNode() {
+     Stack lastStack = stacks.get(sentence.length());
+     
+     for (Hypothesis hyp: lastStack) {
+       float score = hyp.getScore();
+       List<HGNode> tailNodes = new ArrayList<HGNode>();
+       tailNodes.add(hyp);
+       
+       float finalTransitionScore = ComputeNodeResult.computeFinalCost(featureFunctions, tailNodes, 0, sentence.length(), null, sentence);
+ 
+       if (null == this.end)
+         this.end = new Hypothesis(null, score + finalTransitionScore, hyp, sentence.length(), null);
+ 
+       HyperEdge edge = new HyperEdge(null, score + finalTransitionScore, finalTransitionScore, tailNodes, null);
+       end.addHyperedgeInNode(edge);
+     }
+     
+     return new HyperGraph(end, -1, -1, this.sentence);
+   }
+ }