You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:55 UTC

[44/60] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
new file mode 100644
index 0000000..f374279
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus.syntax;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.util.io.LineReader;
+
+public class ArraySyntaxTree implements SyntaxTree, Externalizable {
+
+  /**
+   * Note that index stores the indices of lattice node positions, i.e. the last element of index is
+   * the terminal node, pointing to lattice.size()
+   */
+  private ArrayList<Integer> forwardIndex;
+  private ArrayList<Integer> forwardLattice;
+  private ArrayList<Integer> backwardIndex;
+  private ArrayList<Integer> backwardLattice;
+
+  private ArrayList<Integer> terminals;
+
+  private boolean useBackwardLattice = true;
+
+  private static final int MAX_CONCATENATIONS = 3;
+  private static final int MAX_LABELS = 100;
+
+  public ArraySyntaxTree() {
+    forwardIndex = null;
+    forwardLattice = null;
+    backwardIndex = null;
+    backwardLattice = null;
+
+    terminals = null;
+  }
+
+
+  public ArraySyntaxTree(String parsed_line) {
+    initialize();
+    appendFromPennFormat(parsed_line);
+  }
+
+
+  /**
+   * Returns a collection of single-non-terminal labels that exactly cover the specified span in the
+   * lattice.
+   */
+  public Collection<Integer> getConstituentLabels(int from, int to) {
+    Collection<Integer> labels = new HashSet<Integer>();
+    int span_length = to - from;
+    for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
+      int current_span = forwardLattice.get(i + 1);
+      if (current_span == span_length)
+        labels.add(forwardLattice.get(i));
+      else if (current_span < span_length) break;
+    }
+    return labels;
+  }
+
+
+  public int getOneConstituent(int from, int to) {
+    int spanLength = to - from;
+    Stack<Integer> stack = new Stack<Integer>();
+
+    for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
+      int currentSpan = forwardLattice.get(i + 1);
+      if (currentSpan == spanLength) {
+        return forwardLattice.get(i);
+      } else if (currentSpan < spanLength) break;
+    }
+    if (stack.isEmpty()) return 0;
+    StringBuilder sb = new StringBuilder();
+    while (!stack.isEmpty()) {
+      String w = Vocabulary.word(stack.pop());
+      if (sb.length() != 0) sb.append(":");
+      sb.append(w);
+    }
+    String label = sb.toString();
+    return Vocabulary.id(adjustMarkup(label));
+  }
+
+
+  public int getOneSingleConcatenation(int from, int to) {
+    for (int midpt = from + 1; midpt < to; midpt++) {
+      int x = getOneConstituent(from, midpt);
+      if (x == 0) continue;
+      int y = getOneConstituent(midpt, to);
+      if (y == 0) continue;
+      String label = Vocabulary.word(x) + "+" + Vocabulary.word(y);
+      return Vocabulary.id(adjustMarkup(label));
+    }
+    return 0;
+  }
+
+
+  public int getOneDoubleConcatenation(int from, int to) {
+    for (int a = from + 1; a < to - 1; a++) {
+      for (int b = a + 1; b < to; b++) {
+        int x = getOneConstituent(from, a);
+        if (x == 0) continue;
+        int y = getOneConstituent(a, b);
+        if (y == 0) continue;
+        int z = getOneConstituent(b, to);
+        if (z == 0) continue;
+        String label = Vocabulary.word(x) + "+" + Vocabulary.word(y) + "+" + Vocabulary.word(z);
+        return Vocabulary.id(adjustMarkup(label));
+      }
+    }
+    return 0;
+  }
+
+
+  public int getOneRightSideCCG(int from, int to) {
+    for (int end = to + 1; end <= forwardLattice.size(); end++) {
+      int x = getOneConstituent(from, end);
+      if (x == 0) continue;
+      int y = getOneConstituent(to, end);
+      if (y == 0) continue;
+      String label = Vocabulary.word(x) + "/" + Vocabulary.word(y);
+      return Vocabulary.id(adjustMarkup(label));
+    }
+    return 0;
+  }
+
+
+  public int getOneLeftSideCCG(int from, int to) {
+    for (int start = from - 1; start >= 0; start--) {
+      int x = getOneConstituent(start, to);
+      if (x == 0) continue;
+      int y = getOneConstituent(start, from);
+      if (y == 0) continue;
+      String label = Vocabulary.word(y) + "\\" + Vocabulary.word(x);
+      return Vocabulary.id(adjustMarkup(label));
+    }
+    return 0;
+  }
+
+
+  /**
+   * Returns a collection of concatenated non-terminal labels that exactly cover the specified span
+   * in the lattice. The number of non-terminals concatenated is limited by MAX_CONCATENATIONS and
+   * the total number of labels returned is bounded by MAX_LABELS.
+   */
+  public Collection<Integer> getConcatenatedLabels(int from, int to) {
+    Collection<Integer> labels = new HashSet<Integer>();
+
+    int span_length = to - from;
+    Stack<Integer> nt_stack = new Stack<Integer>();
+    Stack<Integer> pos_stack = new Stack<Integer>();
+    Stack<Integer> depth_stack = new Stack<Integer>();
+
+    // seed stacks (reverse order to save on iterations, longer spans)
+    for (int i = forwardIndex.get(from + 1) - 2; i >= forwardIndex.get(from); i -= 2) {
+      int current_span = forwardLattice.get(i + 1);
+      if (current_span < span_length) {
+        nt_stack.push(forwardLattice.get(i));
+        pos_stack.push(from + current_span);
+        depth_stack.push(1);
+      } else if (current_span >= span_length) break;
+    }
+
+    while (!nt_stack.isEmpty() && labels.size() < MAX_LABELS) {
+      int nt = nt_stack.pop();
+      int pos = pos_stack.pop();
+      int depth = depth_stack.pop();
+
+      // maximum depth reached without filling span
+      if (depth == MAX_CONCATENATIONS) continue;
+
+      int remaining_span = to - pos;
+      for (int i = forwardIndex.get(pos + 1) - 2; i >= forwardIndex.get(pos); i -= 2) {
+        int current_span = forwardLattice.get(i + 1);
+        if (current_span > remaining_span) break;
+
+        // create and look up concatenated label
+        int concatenated_nt =
+            Vocabulary.id(adjustMarkup(Vocabulary.word(nt) + "+"
+                + Vocabulary.word(forwardLattice.get(i))));
+        if (current_span < remaining_span) {
+          nt_stack.push(concatenated_nt);
+          pos_stack.push(pos + current_span);
+          depth_stack.push(depth + 1);
+        } else if (current_span == remaining_span) {
+          labels.add(concatenated_nt);
+        }
+      }
+    }
+
+    return labels;
+  }
+
+  // TODO: can pre-comupute all that in top-down fashion.
+  public Collection<Integer> getCcgLabels(int from, int to) {
+    Collection<Integer> labels = new HashSet<Integer>();
+
+    int span_length = to - from;
+    // TODO: range checks on the to and from
+
+    boolean is_prefix = (forwardLattice.get(forwardIndex.get(from) + 1) > span_length);
+    if (is_prefix) {
+      Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
+      // find missing to the right
+      for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
+        int current_span = forwardLattice.get(i + 1);
+        if (current_span <= span_length)
+          break;
+        else {
+          int end_pos = forwardLattice.get(i + 1) + from;
+          Set<Integer> nts = main_constituents.get(end_pos);
+          if (nts == null) main_constituents.put(end_pos, new HashSet<Integer>());
+          main_constituents.get(end_pos).add(forwardLattice.get(i));
+        }
+      }
+      for (int i = forwardIndex.get(to); i < forwardIndex.get(to + 1); i += 2) {
+        Set<Integer> main_set = main_constituents.get(to + forwardLattice.get(i + 1));
+        if (main_set != null) {
+          for (int main : main_set)
+            labels.add(Vocabulary.id(adjustMarkup(Vocabulary.word(main) + "/"
+                + Vocabulary.word(forwardLattice.get(i)))));
+        }
+      }
+    }
+
+    if (!is_prefix) {
+      if (useBackwardLattice) {
+        // check if there is any possible higher-level constituent overlapping
+        int to_end =
+            (to == backwardIndex.size() - 1) ? backwardLattice.size() : backwardIndex.get(to + 1);
+        // check longest span ending in to..
+        if (backwardLattice.get(to_end - 1) <= span_length) return labels;
+
+        Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
+        // find missing to the left
+        for (int i = to_end - 2; i >= backwardIndex.get(to); i -= 2) {
+          int current_span = backwardLattice.get(i + 1);
+          if (current_span <= span_length)
+            break;
+          else {
+            int start_pos = to - backwardLattice.get(i + 1);
+            Set<Integer> nts = main_constituents.get(start_pos);
+            if (nts == null) main_constituents.put(start_pos, new HashSet<Integer>());
+            main_constituents.get(start_pos).add(backwardLattice.get(i));
+          }
+        }
+        for (int i = backwardIndex.get(from); i < backwardIndex.get(from + 1); i += 2) {
+          Set<Integer> main_set = main_constituents.get(from - backwardLattice.get(i + 1));
+          if (main_set != null) {
+            for (int main : main_set)
+              labels.add(Vocabulary.id(adjustMarkup(Vocabulary.word(main) + "\\"
+                  + Vocabulary.word(backwardLattice.get(i)))));
+          }
+        }
+      } else {
+        // TODO: bothersome no-backwards-arrays method.
+      }
+    }
+    return labels;
+  }
+
+  @Override
+  public int[] getTerminals() {
+    return getTerminals(0, terminals.size());
+  }
+
+  @Override
+  public int[] getTerminals(int from, int to) {
+    int[] span = new int[to - from];
+    for (int i = from; i < to; i++)
+      span[i - from] = terminals.get(i);
+    return span;
+  }
+
+  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    // TODO Auto-generated method stub
+  }
+
+  public void writeExternal(ObjectOutput out) throws IOException {
+    // TODO Auto-generated method stub
+  }
+
+  /**
+   * Reads Penn Treebank format file
+   * @param file_name the string path of the Penn Treebank file
+   * @throws IOException if the file does not exist
+   */
+  public void readExternalText(String file_name) throws IOException {
+    LineReader reader = new LineReader(file_name);
+    initialize();
+    for (String line : reader) {
+      if (line.trim().equals("")) continue;
+      appendFromPennFormat(line);
+    }
+  }
+
+  public void writeExternalText(String file_name) throws IOException {
+    // TODO Auto-generated method stub
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < forwardIndex.size(); i++)
+      sb.append("FI[" + i + "] =\t" + forwardIndex.get(i) + "\n");
+    sb.append("\n");
+    for (int i = 0; i < forwardLattice.size(); i += 2)
+      sb.append("F[" + i + "] =\t" + Vocabulary.word(forwardLattice.get(i)) + " , "
+          + forwardLattice.get(i + 1) + "\n");
+
+    sb.append("\n");
+    for (int i = 0; i < terminals.size(); i += 1)
+      sb.append("T[" + i + "] =\t" + Vocabulary.word(terminals.get(i)) + " , 1 \n");
+
+    if (this.useBackwardLattice) {
+      sb.append("\n");
+      for (int i = 0; i < backwardIndex.size(); i++)
+        sb.append("BI[" + i + "] =\t" + backwardIndex.get(i) + "\n");
+      sb.append("\n");
+      for (int i = 0; i < backwardLattice.size(); i += 2)
+        sb.append("B[" + i + "] =\t" + Vocabulary.word(backwardLattice.get(i)) + " , "
+            + backwardLattice.get(i + 1) + "\n");
+    }
+    return sb.toString();
+  }
+
+
+  private void initialize() {
+    forwardIndex = new ArrayList<Integer>();
+    forwardIndex.add(0);
+    forwardLattice = new ArrayList<Integer>();
+    if (this.useBackwardLattice) {
+      backwardIndex = new ArrayList<Integer>();
+      backwardIndex.add(0);
+      backwardLattice = new ArrayList<Integer>();
+    }
+
+    terminals = new ArrayList<Integer>();
+  }
+
+
+  // TODO: could make this way more efficient
+  private void appendFromPennFormat(String line) {
+    String[] tokens = line.replaceAll("\\(", " ( ").replaceAll("\\)", " ) ").trim().split("\\s+");
+
+    boolean next_nt = false;
+    int current_id = 0;
+    Stack<Integer> stack = new Stack<Integer>();
+
+    for (String token : tokens) {
+      if ("(".equals(token)) {
+        next_nt = true;
+        continue;
+      }
+      if (")".equals(token)) {
+        int closing_pos = stack.pop();
+        forwardLattice.set(closing_pos, forwardIndex.size() - forwardLattice.get(closing_pos));
+        if (this.useBackwardLattice) {
+          backwardLattice.add(forwardLattice.get(closing_pos - 1));
+          backwardLattice.add(forwardLattice.get(closing_pos));
+        }
+        continue;
+      }
+      if (next_nt) {
+        // get NT id
+        current_id = Vocabulary.id(adjustMarkup(token));
+        // add into lattice
+        forwardLattice.add(current_id);
+        // push NT span field onto stack (added hereafter, we're just saving the "- 1")
+        stack.push(forwardLattice.size());
+        // add NT span field
+        forwardLattice.add(forwardIndex.size());
+      } else {
+        current_id = Vocabulary.id(token);
+        terminals.add(current_id);
+
+        forwardIndex.add(forwardLattice.size());
+        if (this.useBackwardLattice) backwardIndex.add(backwardLattice.size());
+      }
+      next_nt = false;
+    }
+  }
+
+  private String adjustMarkup(String nt) {
+    return "[" + nt.replaceAll("[\\[\\]]", "") + "]";
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
new file mode 100644
index 0000000..6bb4c0b
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus.syntax;
+
+import java.util.Collection;
+
+public interface SyntaxTree {
+
+  public Collection<Integer> getConstituentLabels(int from, int to);
+
+  public Collection<Integer> getConcatenatedLabels(int from, int to);
+
+  public Collection<Integer> getCcgLabels(int from, int to);
+
+  public int[] getTerminals();
+
+  public int[] getTerminals(int from, int to);
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
new file mode 100644
index 0000000..5af6d11
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.apache.joshua.util.io.LineReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author orluke
+ * 
+ */
+public class ArgsParser {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ArgsParser.class);
+
+  private String configFile = null;
+
+  /**
+   * Parse the arguments passed from the command line when the JoshuaDecoder application was
+   * executed from the command line.
+   * 
+   * @param args string array of input arguments
+   * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
+   * @throws IOException if there is an error wit the input arguments
+   */
+  public ArgsParser(String[] args, JoshuaConfiguration config) throws IOException {
+
+    /*
+     * Look for a verbose flag, -v.
+     * 
+     * Look for an argument to the "-config" flag to find the config file, if any. 
+     */
+    if (args.length >= 1) {
+      // Search for a verbose flag
+      for (int i = 0; i < args.length; i++) {
+        if (args[i].equals("-v")) {
+          Decoder.VERBOSE = Integer.parseInt(args[i + 1].trim());
+          config.setVerbosity(Decoder.VERBOSE);
+        }
+      
+        if (args[i].equals("-version")) {
+          LineReader reader = new LineReader(String.format("%s/VERSION", System.getenv("JOSHUA")));
+          reader.readLine();
+          String version = reader.readLine().split("\\s+")[2];
+          System.out.println(String.format("The Apache Joshua machine translator, version %s", version));
+          System.out.println("joshua.incubator.apache.org");
+          System.exit(0);
+
+        } else if (args[i].equals("-license")) {
+          try {
+            for (String line: Files.readAllLines(Paths.get(String.format("%s/../LICENSE", 
+                JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation().getPath())), 
+                Charset.defaultCharset())) {
+              System.out.println(line);
+            }
+          } catch (IOException e) {
+            throw new RuntimeException("FATAL: missing license file!", e);
+          }
+          System.exit(0);
+        }
+      }
+
+      // Search for the configuration file from the end (so as to take the last one)
+      for (int i = args.length-1; i >= 0; i--) {
+        if (args[i].equals("-c") || args[i].equals("-config")) {
+
+          setConfigFile(args[i + 1].trim());
+          try {
+            LOG.info("Parameters read from configuration file: {}", getConfigFile());
+            config.readConfigFile(getConfigFile());
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+          break;
+        }
+      }
+
+      // Now process all the command-line args
+      config.processCommandLineOptions(args);
+    }
+  }
+
+  /**
+   * @return the configFile
+   */
+  public String getConfigFile() {
+    return configFile;
+  }
+
+  /**
+   * @param configFile the configFile to set
+   */
+  public void setConfigFile(String configFile) {
+    this.configFile = configFile;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java b/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
new file mode 100644
index 0000000..8b51403
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
@@ -0,0 +1,562 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.hypergraph.HyperEdge;
+import org.apache.joshua.util.Ngram;
+import org.apache.joshua.util.Regex;
+
+/**
+ * this class implements: (1) sentence-level bleu, with smoothing
+ * 
+ * @author Zhifei Li, zhifei.work@gmail.com
+ */
+public class BLEU {
+  // do_ngram_clip: consider global n-gram clip
+
+  public static float computeSentenceBleu(String[] refSents, String hypSent) {
+    return computeSentenceBleu(refSents, hypSent, true, 4, false);
+  }
+
+  // ====================multiple references
+  /**
+   * 
+   * @param refSents todo
+   * @param hypSent todo
+   * @param doNgramClip Should usually be true
+   * @param bleuOrder Should usually be 4
+   * @param useShortestRef Probably use false
+   * @return todo
+   */
+  public static float computeSentenceBleu(String[] refSents, String hypSent, boolean doNgramClip,
+      int bleuOrder, boolean useShortestRef) {
+    // === ref tbl
+    HashMap<String, Integer> maxRefCountTbl = constructMaxRefCountTable(refSents, bleuOrder);
+
+    // == ref len
+    int[] refLens = new int[refSents.length];
+    for (int i = 0; i < refSents.length; i++) {
+      String[] refWords = Regex.spaces.split(refSents[i]);
+      refLens[i] = refWords.length;
+    }
+
+    float effectiveRefLen = computeEffectiveLen(refLens, useShortestRef);
+
+    // === hyp tbl
+    String[] hypWrds = Regex.spaces.split(hypSent);
+    HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
+    Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
+    return computeSentenceBleu(effectiveRefLen, maxRefCountTbl, hypWrds.length, hypNgramTbl,
+        doNgramClip, bleuOrder);
+  }
+
+  public static float computeEffectiveLen(int[] refLens, boolean useShortestRef) {
+    if (useShortestRef) {
+      int res = Integer.MAX_VALUE;
+      for (int i = 0; i < refLens.length; i++)
+        if (refLens[i] < res)
+          res = refLens[i];
+      return res;
+    } else {// default is average length
+      float res = 0;
+      for (int i = 0; i < refLens.length; i++)
+        res += refLens[i];
+      return res * 1.0f / refLens.length;
+    }
+  }
+
+  /**
+   * words in the ngrams are using integer symbol ID
+   * @param refSents todo
+   * @param bleuOrder todo
+   * @return todo
+   * */
+  public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
+
+    List<HashMap<String, Integer>> listRefNgramTbl = new ArrayList<HashMap<String, Integer>>();
+    for (int i = 0; i < refSents.length; i++) {
+      // if(refSents[i]==null){System.out.println("null ref sent"); System.exit(1);}
+      // String[] refWords = refSents[i].split("\\s+");
+      String[] refWords = Regex.spaces.split(refSents[i]);
+
+      HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
+      Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWords);
+      listRefNgramTbl.add(refNgramTbl);
+    }
+
+    return computeMaxRefCountTbl(listRefNgramTbl);
+  }
+
+  /**
+   * compute max_ref_count for each ngram in the reference sentences
+   * @param listRefNgramTbl todo
+   * @return todo
+   * */
+  public static HashMap<String, Integer> computeMaxRefCountTbl(
+      List<HashMap<String, Integer>> listRefNgramTbl) {
+
+    HashMap<String, Integer> merged = new HashMap<String, Integer>();
+
+    // == get merged key set
+    for (HashMap<String, Integer> tbl : listRefNgramTbl) {
+      for (String ngram : tbl.keySet()) {
+        merged.put(ngram, 0);
+      }
+    }
+
+    // == get max ref count
+    for (String ngram : merged.keySet()) {
+      int max = 0;
+      for (HashMap<String, Integer> tbl : listRefNgramTbl) {
+        Integer val = tbl.get(ngram);
+        if (val != null && val > max)
+          max = val;
+      }
+
+      merged.put(ngram, max);
+    }
+    return merged;
+  }
+
+  public static float computeSentenceBleu(float effectiveRefLen,
+      HashMap<String, Integer> maxRefCountTbl, int hypLen, HashMap<String, Integer> hypNgramTbl,
+      boolean doNgramClip, int bleuOrder) {
+
+    float resBleu = 0.0f;
+
+    int[] numNgramMatch = new int[bleuOrder];
+    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {// each ngram in hyp
+      String ngram = entry.getKey();
+      if (maxRefCountTbl.containsKey(ngram)) {
+        int hypNgramCount = entry.getValue();
+
+        int effectiveNumMatch = hypNgramCount;
+
+        if (doNgramClip) {// min{hypNgramCount, maxRefCount}
+          int maxRefCount = maxRefCountTbl.get(ngram);
+          effectiveNumMatch = (int) Support.findMin(hypNgramCount, maxRefCount); // ngram clip;
+        }
+
+        numNgramMatch[Regex.spaces.split(ngram).length - 1] += effectiveNumMatch;
+      }
+    }
+
+    resBleu = computeBleu(hypLen, effectiveRefLen, numNgramMatch, bleuOrder);
+    // System.out.println("hyp_len: " + hyp_sent.length + "; ref_len:" + ref_sent.length +
+    // "; bleu: " + res_bleu +" num_ngram_matches: " + num_ngram_match[0] + " " +num_ngram_match[1]+
+    // " " + num_ngram_match[2] + " " +num_ngram_match[3]);
+    // System.out.println("Blue is " + res_bleu);
+    return resBleu;
+  }
+
+  // ==============================multiple references end
+
+  public static float computeSentenceBleu(String refSent, String hypSent, boolean doNgramClip,
+      int bleuOrder) {
+    String[] refWrds = Regex.spaces.split(refSent);
+    String[] hypWrds = Regex.spaces.split(hypSent);
+    HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
+    Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWrds);
+    HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
+    Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
+    return computeSentenceBleu(refWrds.length, refNgramTbl, hypWrds.length, hypNgramTbl,
+        doNgramClip, bleuOrder);
+  }
+
+  public static float computeSentenceBleu(int refLen, HashMap<String, Integer> refNgramTbl,
+      int hypLen, HashMap<String, Integer> hypNgramTbl, boolean doNgramClip, int bleuOrder) {
+    float resBleu = 0;
+
+    int[] numNgramMatch = new int[bleuOrder];
+    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {
+      String ngram = entry.getKey();
+      if (refNgramTbl.containsKey(ngram)) {
+        if (doNgramClip) {
+          numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
+              refNgramTbl.get(ngram), entry.getValue()); // ngram clip
+        } else {
+          numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without ngram count clipping
+        }
+      }
+    }
+    resBleu = computeBleu(hypLen, refLen, numNgramMatch, bleuOrder);
+    // System.out.println("hyp_len: " + hyp_sent.length + "; ref_len:" + ref_sent.length +
+    // "; bleu: " + res_bleu +" num_ngram_matches: " + num_ngram_match[0] + " " +num_ngram_match[1]+
+    // " " + num_ngram_match[2] + " " +num_ngram_match[3]);
+    // System.out.println("Blue is " + res_bleu);
+    return resBleu;
+  }
+
+  // sentence-bleu: BLEU= bp * prec; where prec = exp (sum 1/4 * log(prec[order]))
+  public static float computeBleu(int hypLen, float refLen, int[] numNgramMatch, int bleuOrder) {
+    if (hypLen <= 0 || refLen <= 0) {
+      throw new RuntimeException("error: ref or hyp is zero len");
+    }
+    float res = 0;
+    float wt = 1.0f / bleuOrder;
+    float prec = 0;
+    float smooth_factor = 1.0f;
+    for (int t = 0; t < bleuOrder && t < hypLen; t++) {
+      if (numNgramMatch[t] > 0) {
+        prec += wt * Math.log(numNgramMatch[t] * 1.0 / (hypLen - t));
+      } else {
+        smooth_factor *= 0.5;// TODO
+        prec += wt * Math.log(smooth_factor / (hypLen - t));
+      }
+    }
+    float bp = (hypLen >= refLen) ? 1.0f : (float) Math.exp(1 - refLen / hypLen);
+    res = bp * (float) Math.exp(prec);
+    // System.out.println("hyp_len: " + hyp_len + "; ref_len:" + ref_len + "prec: " + Math.exp(prec)
+    // + "; bp: " + bp + "; bleu: " + res);
+    return res;
+  }
+
+  public static HashMap<String, Integer> constructNgramTable(String sentence, int bleuOrder) {
+    HashMap<String, Integer> ngramTable = new HashMap<String, Integer>();
+    String[] refWrds = Regex.spaces.split(sentence);
+    Ngram.getNgrams(ngramTable, 1, bleuOrder, refWrds);
+    return ngramTable;
+  }
+
+  // ================================ Google linear corpus gain
+  // ============================================
+  public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, String[] refSents,
+      String hypSent) {
+    int bleuOrder = 4;
+    int hypLength = Regex.spaces.split(hypSent).length;
+    HashMap<String, Integer> refereceNgramTable = BLEU.constructMaxRefCountTable(refSents,
+        bleuOrder);
+    HashMap<String, Integer> hypNgramTable = BLEU.constructNgramTable(hypSent, bleuOrder);
+    return computeLinearCorpusGain(linearCorpusGainThetas, hypLength, hypNgramTable,
+        refereceNgramTable);
+  }
+
+  /**
+   * speed consideration: assume hypNgramTable has a smaller size than referenceNgramTable does
+   * @param linearCorpusGainThetas todo
+   * @param hypLength todo
+   * @param hypNgramTable todo
+   * @param referenceNgramTable todo
+   * @return todo
+   */
+  public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, int hypLength,
+      Map<String, Integer> hypNgramTable, Map<String, Integer> referenceNgramTable) {
+    float res = 0;
+    res += linearCorpusGainThetas[0] * hypLength;
+    for (Entry<String, Integer> entry : hypNgramTable.entrySet()) {
+      String ngram = entry.getKey();
+      if (referenceNgramTable.containsKey(ngram)) {// delta function
+        int ngramOrder = Regex.spaces.split(ngram).length;
+        res += entry.getValue() * linearCorpusGainThetas[ngramOrder];
+      }
+    }
+    return res;
+  }
+
+  /* Convenience function */
+  public static int[] computeNgramMatches(String[] refSents, String hypSent) {
+    int bleuOrder = 4;
+    int hypLength = Regex.spaces.split(hypSent).length;
+    HashMap<String, Integer> refereceNgramTable = BLEU.constructMaxRefCountTable(refSents,
+        bleuOrder);
+    HashMap<String, Integer> hypNgramTable = BLEU.constructNgramTable(hypSent, bleuOrder);
+    return computeNgramMatches(hypLength, hypNgramTable, refereceNgramTable, bleuOrder);
+  }
+
+  public static int[] computeNgramMatches(int hypLength, Map<String, Integer> hypNgramTable,
+      Map<String, Integer> referenceNgramTable, int highestOrder) {
+    int[] res = new int[highestOrder + 1];
+    res[0] = hypLength;
+    for (Entry<String, Integer> entry : hypNgramTable.entrySet()) {
+      String ngram = entry.getKey();
+      if (referenceNgramTable.containsKey(ngram)) {// delta function
+        int ngramOrder = Regex.spaces.split(ngram).length;
+        res[ngramOrder] += entry.getValue();
+      }
+    }
+
+    /*
+    System.err.print("NGRAMS:");
+    for (String ngram: hypNgramTable.keySet())
+      System.err.print(" | " + ngram);
+    System.err.println();
+    System.err.print("REF:");
+    for (String ngram: referenceNgramTable.keySet())
+      System.err.print(" | " + ngram);
+    System.err.println();
+    System.err.print("COUNTS:");
+    for (int i = 1; i <= 4; i++)
+      System.err.print(" " + res[i]);
+    System.err.println();
+    */
+
+    return res;
+  }
+
+  static public float[] computeLinearCorpusThetas(int numUnigramTokens, float unigramPrecision,
+      float decayRatio) {
+    float[] res = new float[5];
+    res[0] = -1.0f / numUnigramTokens;
+    for (int i = 1; i < 5; i++)
+      res[i] = (1.0f / (4.0f * numUnigramTokens * unigramPrecision * (float) Math.pow(decayRatio,
+          i - 1)));
+
+    float firstWeight = res[0];
+    for (int i = 0; i < 5; i++)
+      res[i] /= Math.abs(firstWeight);// normalize by first one
+
+    System.out.print("Normalized Thetas are: ");
+    for (int i = 0; i < 5; i++)
+      System.out.print(res[i] + " ");
+    System.out.print("\n");
+
+    return res;
+  }
+
+  public static final int maxOrder = 4;
+
+  /**
+   * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n &lt;= 4) for terminal rules
+   * and (b) all ngrams overlying boundary points between terminals in the rule and ngram state from
+   * tail nodes.
+   * 
+   * There are four cases to handle:
+   * <ul>
+   * <li>only words
+   * <li>a number of words followed by a nonterminal (left context of tail tail node)
+   * <li>a nonterminal (right context of tail node) followed by one or more words
+   * <li>two nonterminals (right context of tail node 1, left context of tail node 2)
+   * </ul>
+   * 
+   * Of these, all but the first have a boundary point to consider.
+   * 
+   * @param edge todo
+   * @param spanPct todo
+   * @param references the reference to compute statistics against
+   * @return todo
+   */
+  public static Stats compute(HyperEdge edge, float spanPct, References references) {
+    Stats stats = new Stats();
+    // TODO: this should not be the span width, but the real ref scaled to the span percentage
+    stats.reflen = (int) (spanPct * references.reflen);
+
+    Rule rule = edge.getRule();
+    if (rule != null) {
+      int[] symbols = rule.getEnglish();
+
+//      System.err.println(String.format("compute(%s)", rule));
+      
+      ArrayList<Integer> currentNgram = new ArrayList<Integer>();
+      int boundary = -1;
+      int tailIndex = -1;
+      for (int i = 0; i < symbols.length; i++) {
+        if (symbols[i] < 0) {
+          tailIndex++;
+
+          NgramDPState ngramState = null;
+          try {
+            ngramState = (NgramDPState) edge.getTailNodes().get(tailIndex).getDPState(0);
+          } catch (ClassCastException e) {
+            throw new RuntimeException(String.format(
+                "* FATAL: first state needs to be NgramDPState (found %s)", edge.getTailNodes()
+                    .get(tailIndex).getDPState(0).getClass()));
+          }
+          
+          // Compute ngrams overlapping with left context of tail node
+          if (currentNgram.size() > 0) {
+            boundary = currentNgram.size();
+            for (int id : ngramState.getLeftLMStateWords())
+              currentNgram.add(id);
+
+            // Compute the BLEU statistics
+            BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+            stats.add(partStats);
+            
+//            System.err.println("    " + Vocabulary.getWords(ngramState.getLeftLMStateWords()));
+
+            currentNgram.clear();
+          }
+          
+//          System.err.println("    " + Vocabulary.getWords(ngramState.getRightLMStateWords()));
+
+          // Accumulate ngrams from right context of tail node
+          for (int id : ngramState.getRightLMStateWords())
+            currentNgram.add(id);
+
+          boundary = currentNgram.size();
+
+        } else { // terminal symbol
+          currentNgram.add(symbols[i]);
+          stats.len++;
+
+//          System.err.println("    " + Vocabulary.word(symbols[i]));
+          
+          if (boundary != -1) {
+            BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+            stats.add(partStats);
+
+            // Shift off the context from the nonterminal's righthand side
+            for (int j = 0; j < boundary; j++)
+              currentNgram.remove(0);
+            boundary = -1;
+          }
+        }
+
+        /*
+         * At the end, we might have (a) nothing, (b) a sequence of words from a nonterminal's
+         * righthand side, (c) a sequence of words from the rule, or (d) a sequence of words from a
+         * nonterminal's righthand context and from the rule
+         */
+        if (currentNgram.size() > 0 && currentNgram.size() != boundary) { // skip cases (a) and (b)
+          BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+          stats.add(partStats);
+        }
+      }
+    }
+    return stats;
+  }
+
+  /**
+   * When computing BLEU statistics over a rule, we need to avoid adding in ngrams that are
+   * exclusively contained inside tail nodes. This function accumulates all the eligible ngrams from
+   * a string respective of an optional boundary point, and then calls computeNgramMatches().
+   * 
+   * @param ngram the current set of ngrams
+   * @param references contains the set of ngrams to compare against
+   * @param boundary the boundary over which all ngrams must fall (-1 means ignore boundary)
+   * @return
+   */
+  private static Stats computeOverDivide(ArrayList<Integer> ngram, References references,
+      int boundary) {
+    
+//    System.err.print(String.format("      BOUNDARY(%s, %d)", Vocabulary.getWords(ngram), boundary));
+
+    HashMap<String, Integer> boundaryNgrams = new HashMap<String, Integer>();
+    for (int width = 1; width <= Math.min(maxOrder, ngram.size()); width++) {
+      for (int i = 0; i < ngram.size() - width + 1; i++) {
+        int j = i + width;
+
+        final List<Integer> piece = ngram.subList(i, j);
+        if (boundary == -1 || (boundary > i && boundary < j)) {
+          String ngramStr = Vocabulary.getWords(piece);
+          if (!boundaryNgrams.containsKey(ngramStr))
+            boundaryNgrams.put(ngramStr, 1);
+          else
+            boundaryNgrams.put(ngramStr, boundaryNgrams.get(ngramStr));
+        }
+      }
+    }
+    
+    /*
+    System.err.print(" FOUND");
+    for (String phr: boundaryNgrams.keySet())
+      System.err.print(" | " + phr);
+    System.err.println();
+    */
+
+    BLEU.Stats result = new BLEU.Stats();
+    int[] stats = BLEU.computeNgramMatches(0, boundaryNgrams, references.ngramCounts, maxOrder);
+    System.arraycopy(stats, 1, result.counts, 0, maxOrder);
+
+    return result;
+  }
+
+  public static class References {
+    HashMap<String, Integer> ngramCounts;
+    float reflen;
+
+    public References(String reference) {
+      String[] refs = new String[1];
+      refs[0] = reference;
+      fill(refs);
+    }
+
+    public References(String[] references) {
+      fill(references);
+    }
+
+    private void fill(String[] references) {
+      ngramCounts = new HashMap<String, Integer>();
+      reflen = 0.0f;
+      for (int i = 0; i < references.length; i++) {
+        String[] ref = references[i].split(" ");
+        Ngram.getNgrams(ngramCounts, 1, maxOrder, ref);
+        reflen += ref.length;
+      }
+      reflen /= references.length;
+    }
+  }
+
+  public static float score(Stats stats) {
+    float score = 0f;
+    float wt = 1.0f / maxOrder;
+    float prec = 0;
+    float smooth_factor = 1.0f;
+    for (int t = 0; t < maxOrder && t < stats.len; t++) {
+      if (stats.counts[t] > 0) {
+        prec += wt * Math.log(stats.counts[t] * 1.0 / (stats.len - t));
+      } else {
+        smooth_factor *= 0.5;// TODO
+        prec += wt * Math.log(smooth_factor / (stats.len - t));
+      }
+    }
+    float bp = (stats.len >= stats.reflen) ? 1.0f : (float) Math.exp(1 - stats.reflen / stats.len);
+    score = bp * (float) Math.exp(prec);
+    
+//    System.err.println(String.format("BLEU(%d %d %d %d / BP=%f) = %f", stats.counts[0], stats.counts[1], stats.counts[2], stats.counts[3], bp, score));
+    return score;
+  }
+
+  /**
+   * Accumulated sufficient statistics for computing BLEU.
+   */
+  public static class Stats {
+    public int[] counts;
+    public float len;
+    public float reflen;
+
+    public Stats() {
+      counts = new int[4];
+      len = 0.0f;
+      reflen = 0.0f;
+    }
+
+    public Stats(int[] counts, float len, float reflen) {
+      this.counts = counts;
+      this.len = len;
+      this.reflen = reflen;
+    }
+
+    public void add(Stats otherStats) {
+      for (int i = 0; i < counts.length; i++)
+        counts[i] += otherStats.counts[i];
+      
+      len += otherStats.len;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
new file mode 100644
index 0000000..097ce59
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -0,0 +1,813 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import static org.apache.joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+
+import com.google.common.base.Strings;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.FeatureFunction;
+import org.apache.joshua.decoder.ff.PhraseModel;
+import org.apache.joshua.decoder.ff.StatefulFF;
+import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
+import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
+import org.apache.joshua.decoder.io.TranslationRequestStream;
+import org.apache.joshua.decoder.phrase.PhraseTable;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.FileUtility;
+import org.apache.joshua.util.FormatUtils;
+import org.apache.joshua.util.Regex;
+import org.apache.joshua.util.io.LineReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class handles decoder initialization and the complication introduced by multithreading.
+ *
+ * After initialization, the main entry point to the Decoder object is
+ * decodeAll(TranslationRequest), which returns a set of Translation objects wrapped in an iterable
+ * Translations object. It is important that we support multithreading both (a) across the sentences
+ * within a request and (b) across requests, in a round-robin fashion. This is done by maintaining a
+ * fixed sized concurrent thread pool. When a new request comes in, a RequestParallelizer thread is
+ * launched. This object iterates over the request's sentences, obtaining a thread from the
+ * thread pool, and using that thread to decode the sentence. If a decoding thread is not available,
+ * it will block until one is in a fair (FIFO) manner. RequestParallelizer thereby permits intra-request
+ * parallelization by separating out reading the input stream from processing the translated sentences,
+ * but also ensures that round-robin parallelization occurs, since RequestParallelizer uses the
+ * thread pool before translating each request.
+ *
+ * A decoding thread is handled by DecoderThread and launched from DecoderThreadRunner. The purpose
+ * of the runner is to record where to place the translated sentence when it is done (i.e., which
+ * Translations object). Translations itself is an iterator whose next() call blocks until the next
+ * translation is available.
+ *
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
+ */
+public class Decoder {
+
+  private static final Logger LOG = LoggerFactory.getLogger(Decoder.class);
+
+  private final JoshuaConfiguration joshuaConfiguration;
+
+  public JoshuaConfiguration getJoshuaConfiguration() {
+    return joshuaConfiguration;
+  }
+
+  /*
+   * Many of these objects themselves are global objects. We pass them in when constructing other
+   * objects, so that they all share pointers to the same object. This is good because it reduces
+   * overhead, but it can be problematic because of unseen dependencies (for example, in the
+   * Vocabulary shared by language model, translation grammar, etc).
+   */
+  private List<Grammar> grammars;
+  private ArrayList<FeatureFunction> featureFunctions;
+  private Grammar customPhraseTable;
+
+  /* The feature weights. */
+  public static FeatureVector weights;
+
+  public static int VERBOSE = 1;
+
+  private BlockingQueue<DecoderThread> threadPool = null;
+
+  // ===============================================================
+  // Constructors
+  // ===============================================================
+
+  /**
+   * Constructor method that creates a new decoder using the specified configuration file.
+   *
+   * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+   * @param configFile name of configuration file.
+   */
+  public Decoder(JoshuaConfiguration joshuaConfiguration, String configFile) {
+    this(joshuaConfiguration);
+    this.initialize(configFile);
+  }
+
+  /**
+   * Factory method that creates a new decoder using the specified configuration file.
+   *
+   * @param configFile Name of configuration file.
+   * @return a configured {@link org.apache.joshua.decoder.Decoder}
+   */
+  public static Decoder createDecoder(String configFile) {
+    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+    return new Decoder(joshuaConfiguration, configFile);
+  }
+
+  /**
+   * Constructs an uninitialized decoder for use in testing.
+   * <p>
+   * This method is private because it should only ever be called by the
+   * {@link #getUninitalizedDecoder()} method to provide an uninitialized decoder for use in
+   * testing.
+   */
+  private Decoder(JoshuaConfiguration joshuaConfiguration) {
+    this.joshuaConfiguration = joshuaConfiguration;
+    this.grammars = new ArrayList<Grammar>();
+    this.threadPool = new ArrayBlockingQueue<DecoderThread>(
+        this.joshuaConfiguration.num_parallel_decoders, true);
+    this.customPhraseTable = null;
+  }
+
+  /**
+   * Gets an uninitialized decoder for use in testing.
+   * <p>
+   * This method is called by unit tests or any outside packages (e.g., MERT) relying on the
+   * decoder.
+   * @param joshuaConfiguration a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
+   * @return an uninitialized decoder for use in testing
+   */
+  static public Decoder getUninitalizedDecoder(JoshuaConfiguration joshuaConfiguration) {
+    return new Decoder(joshuaConfiguration);
+  }
+
+  // ===============================================================
+  // Public Methods
+  // ===============================================================
+
+  /**
+   * This class is responsible for getting sentences from the TranslationRequest and procuring a
+   * DecoderThreadRunner to translate it. Each call to decodeAll(TranslationRequest) launches a
+   * thread that will read the request's sentences, obtain a DecoderThread to translate them, and
+   * then place the Translation in the appropriate place.
+   *
+   * @author Matt Post <po...@cs.jhu.edu>
+   *
+   */
+  private class RequestParallelizer extends Thread {
+    /* Source of sentences to translate. */
+    private final TranslationRequestStream request;
+
+    /* Where to put translated sentences. */
+    private final Translations response;
+
+    RequestParallelizer(TranslationRequestStream request, Translations response) {
+      this.request = request;
+      this.response = response;
+    }
+
+    @Override
+    public void run() {
+      /*
+       * Repeatedly get an input sentence, wait for a DecoderThread, and then start a new thread to
+       * translate the sentence. We start a new thread (via DecoderRunnerThread) as opposed to
+       * blocking, so that the RequestHandler can go on to the next sentence in this request, which
+       * allows parallelization across the sentences of the request.
+       */
+      for (;;) {
+        Sentence sentence = request.next();
+
+        if (sentence == null) {
+          response.finish();
+          break;
+        }
+
+        // This will block until a DecoderThread becomes available.
+        DecoderThread thread = Decoder.this.getThread();
+        new DecoderThreadRunner(thread, sentence, response).start();
+      }
+    }
+
+    /**
+     * Strips the nonterminals from the lefthand side of the rule.
+     *
+     * @param rule
+     * @return
+     */
+    private String formatRule(Rule rule) {
+      String ruleString = "";
+      boolean first = true;
+      for (int word: rule.getFrench()) {
+        if (!first)
+          ruleString += " " + Vocabulary.word(word);
+        first = false;
+      }
+
+      ruleString += " |||"; // space will get added with first English word
+      first = true;
+      for (int word: rule.getEnglish()) {
+        if (!first)
+          ruleString += " " + Vocabulary.word(word);
+        first = false;
+      }
+
+      // strip of the leading space
+      return ruleString.substring(1);
+    }
+  }
+
+  /**
+   * Retrieve a thread from the thread pool, blocking until one is available. The blocking occurs in
+   * a fair fashion (i.e,. FIFO across requests).
+   *
+   * @return a thread that can be used for decoding.
+   */
+  public DecoderThread getThread() {
+    try {
+      return threadPool.take();
+    } catch (InterruptedException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  /**
+   * This class handles running a DecoderThread (which takes care of the actual translation of an
+   * input Sentence, returning a Translation object when its done). This is done in a thread so as
+   * not to tie up the RequestHandler that launched it, freeing it to go on to the next sentence in
+   * the TranslationRequest, in turn permitting parallelization across the sentences of a request.
+   *
+   * When the decoder thread is finshed, the Translation object is placed in the correct place in
+   * the corresponding Translations object that was returned to the caller of
+   * Decoder.decodeAll(TranslationRequest).
+   *
+   * @author Matt Post <po...@cs.jhu.edu>
+   */
+  private class DecoderThreadRunner extends Thread {
+
+    private final DecoderThread decoderThread;
+    private final Sentence sentence;
+    private final Translations translations;
+
+    DecoderThreadRunner(DecoderThread thread, Sentence sentence, Translations translations) {
+      this.decoderThread = thread;
+      this.sentence = sentence;
+      this.translations = translations;
+    }
+
+    @Override
+    public void run() {
+      /*
+       * Process any found metadata.
+       */
+      
+      /*
+       * Use the thread to translate the sentence. Then record the translation with the
+       * corresponding Translations object, and return the thread to the pool.
+       */
+      try {
+        Translation translation = decoderThread.translate(this.sentence);
+        translations.record(translation);
+
+        /*
+         * This is crucial! It's what makes the thread available for the next sentence to be
+         * translated.
+         */
+        threadPool.put(decoderThread);
+      } catch (Exception e) {
+        throw new RuntimeException(String.format(
+            "Input %d: FATAL UNCAUGHT EXCEPTION: %s", sentence.id(), e.getMessage()), e);
+        //        translations.record(new Translation(sentence, null, featureFunctions, joshuaConfiguration));
+      }
+    }
+  }
+
+  /**
+   * This function is the main entry point into the decoder. It translates all the sentences in a
+   * (possibly boundless) set of input sentences. Each request launches its own thread to read the
+   * sentences of the request.
+   *
+   * @param request the populated {@link org.apache.joshua.decoder.io.TranslationRequestStream}
+   * @throws IOException if there is an error with the input stream or writing the output
+   * @return an iterable, asynchronously-filled list of Translations
+   */
+  public Translations decodeAll(TranslationRequestStream request) throws IOException {
+    Translations translations = new Translations(request);
+
+    /* Start a thread to handle requests on the input stream */
+    new RequestParallelizer(request, translations).start();
+
+    return translations;
+  }
+
+
+  /**
+   * We can also just decode a single sentence.
+   *
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return the sentence {@link org.apache.joshua.decoder.Translation}
+   */
+  public Translation decode(Sentence sentence) {
+    // Get a thread.
+
+    try {
+      DecoderThread thread = threadPool.take();
+      Translation translation = thread.translate(sentence);
+      threadPool.put(thread);
+
+      return translation;
+
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+
+    return null;
+  }
+
+  /**
+   * Clean shutdown of Decoder, resetting all
+   * static variables, such that any other instance of Decoder
+   * afterwards gets a fresh start.
+   */
+  public void cleanUp() {
+    // shut down DecoderThreads
+    for (DecoderThread thread : threadPool) {
+      try {
+        thread.join();
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+    resetGlobalState();
+  }
+
+  public static void resetGlobalState() {
+    // clear/reset static variables
+    DENSE_FEATURE_NAMES.clear();
+    Vocabulary.clear();
+    Vocabulary.unregisterLanguageModels();
+    LanguageModelFF.resetLmIndex();
+    StatefulFF.resetGlobalStateIndex();
+  }
+
+  public static void writeConfigFile(double[] newWeights, String template, String outputFile,
+      String newDiscriminativeModel) {
+    try {
+      int columnID = 0;
+
+      BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
+      LineReader reader = new LineReader(template);
+      try {
+        for (String line : reader) {
+          line = line.trim();
+          if (Regex.commentOrEmptyLine.matches(line) || line.indexOf("=") != -1) {
+            // comment, empty line, or parameter lines: just copy
+            writer.write(line);
+            writer.newLine();
+
+          } else { // models: replace the weight
+            String[] fds = Regex.spaces.split(line);
+            StringBuffer newSent = new StringBuffer();
+            if (!Regex.floatingNumber.matches(fds[fds.length - 1])) {
+              throw new IllegalArgumentException("last field is not a number; the field is: "
+                  + fds[fds.length - 1]);
+            }
+
+            if (newDiscriminativeModel != null && "discriminative".equals(fds[0])) {
+              newSent.append(fds[0]).append(' ');
+              newSent.append(newDiscriminativeModel).append(' ');// change the
+              // file name
+              for (int i = 2; i < fds.length - 1; i++) {
+                newSent.append(fds[i]).append(' ');
+              }
+            } else {// regular
+              for (int i = 0; i < fds.length - 1; i++) {
+                newSent.append(fds[i]).append(' ');
+              }
+            }
+            if (newWeights != null)
+              newSent.append(newWeights[columnID++]);// change the weight
+            else
+              newSent.append(fds[fds.length - 1]);// do not change
+
+            writer.write(newSent.toString());
+            writer.newLine();
+          }
+        }
+      } finally {
+        reader.close();
+        writer.close();
+      }
+
+      if (newWeights != null && columnID != newWeights.length) {
+        throw new IllegalArgumentException("number of models does not match number of weights");
+      }
+
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  // ===============================================================
+  // Initialization Methods
+  // ===============================================================
+
+  /**
+   * Moses requires the pattern .*_.* for sparse features, and prohibits underscores in dense features. 
+   * This conforms to that pattern. We assume non-conforming dense features start with tm_ or lm_,
+   * and the only sparse feature that needs converting is OOVPenalty.
+   *
+   * @param feature
+   * @return the feature in Moses format
+   */
+  private String mosesize(String feature) {
+    if (joshuaConfiguration.moses) {
+      if (feature.startsWith("tm_") || feature.startsWith("lm_"))
+        return feature.replace("_", "-");
+    }
+
+    return feature;
+  }
+
+  /**
+   * Initialize all parts of the JoshuaDecoder.
+   *
+   * @param configFile File containing configuration options
+   * @return An initialized decoder
+   */
+  public Decoder initialize(String configFile) {
+    try {
+
+      long pre_load_time = System.currentTimeMillis();
+
+      /* Weights can be listed in a separate file (denoted by parameter "weights-file") or directly
+       * in the Joshua config file. Config file values take precedent.
+       */
+      this.readWeights(joshuaConfiguration.weights_file);
+      
+      
+      /* Add command-line-passed weights to the weights array for processing below */
+      if (!Strings.isNullOrEmpty(joshuaConfiguration.weight_overwrite)) {
+        String[] tokens = joshuaConfiguration.weight_overwrite.split("\\s+");
+        for (int i = 0; i < tokens.length; i += 2) {
+          String feature = tokens[i];
+          float value = Float.parseFloat(tokens[i+1]);
+
+          if (joshuaConfiguration.moses)
+            feature = demoses(feature);
+
+          joshuaConfiguration.weights.add(String.format("%s %s", feature, tokens[i+1]));
+          LOG.info("COMMAND LINE WEIGHT: {} -> {}", feature, value);
+        }
+      }
+
+      /* Read the weights found in the config file */
+      for (String pairStr: joshuaConfiguration.weights) {
+        String pair[] = pairStr.split("\\s+");
+
+        /* Sanity check for old-style unsupported feature invocations. */
+        if (pair.length != 2) {
+          StringBuilder errMsg = new StringBuilder();
+          errMsg.append("FATAL: Invalid feature weight line found in config file.\n");
+          errMsg.append(String.format("The line was '%s'\n", pairStr));
+          errMsg.append("You might be using an old version of the config file that is no longer supported\n");
+          errMsg.append("Check joshua-decoder.org or email joshua_support@googlegroups.com for help\n");
+          errMsg.append("Code = " + 17);
+          throw new RuntimeException(errMsg.toString());
+        }
+
+        weights.set(pair[0], Float.parseFloat(pair[1]));
+      }
+
+      LOG.info("Read {} weights ({} of them dense)", weights.size(), DENSE_FEATURE_NAMES.size());
+
+      // Do this before loading the grammars and the LM.
+      this.featureFunctions = new ArrayList<FeatureFunction>();
+
+      // Initialize and load grammars. This must happen first, since the vocab gets defined by
+      // the packed grammar (if any)
+      this.initializeTranslationGrammars();
+      LOG.info("Grammar loading took: {} seconds.",
+          (System.currentTimeMillis() - pre_load_time) / 1000);
+
+      // Initialize the features: requires that LM model has been initialized.
+      this.initializeFeatureFunctions();
+
+      // This is mostly for compatibility with the Moses tuning script
+      if (joshuaConfiguration.show_weights_and_quit) {
+        for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+          String name = DENSE_FEATURE_NAMES.get(i);
+          if (joshuaConfiguration.moses)
+            System.out.println(String.format("%s= %.5f", mosesize(name), weights.getDense(i)));
+          else
+            System.out.println(String.format("%s %.5f", name, weights.getDense(i)));
+        }
+        System.exit(0);
+      }
+
+      // Sort the TM grammars (needed to do cube pruning)
+      if (joshuaConfiguration.amortized_sorting) {
+        LOG.info("Grammar sorting happening lazily on-demand.");
+      } else {
+        long pre_sort_time = System.currentTimeMillis();
+        for (Grammar grammar : this.grammars) {
+          grammar.sortGrammar(this.featureFunctions);
+        }
+        LOG.info("Grammar sorting took {} seconds.",
+            (System.currentTimeMillis() - pre_sort_time) / 1000);
+      }
+
+      // Create the threads
+      for (int i = 0; i < joshuaConfiguration.num_parallel_decoders; i++) {
+        this.threadPool.put(new DecoderThread(this.grammars, Decoder.weights,
+            this.featureFunctions, joshuaConfiguration));
+      }
+    } catch (IOException | InterruptedException e) {
+      LOG.warn(e.getMessage(), e);
+    }
+
+    return this;
+  }
+
+  /**
+   * Initializes translation grammars Retained for backward compatibility
+   *
+   * @param ownersSeen Records which PhraseModelFF's have been instantiated (one is needed for each
+   *          owner)
+   * @throws IOException
+   */
+  private void initializeTranslationGrammars() throws IOException {
+
+    if (joshuaConfiguration.tms.size() > 0) {
+
+      // collect packedGrammars to check if they use a shared vocabulary
+      final List<PackedGrammar> packed_grammars = new ArrayList<>();
+
+      // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
+      for (String tmLine : joshuaConfiguration.tms) {
+
+        String type = tmLine.substring(0,  tmLine.indexOf(' '));
+        String[] args = tmLine.substring(tmLine.indexOf(' ')).trim().split("\\s+");
+        HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
+
+        String owner = parsedArgs.get("owner");
+        int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
+        String path = parsedArgs.get("path");
+
+        Grammar grammar = null;
+        if (! type.equals("moses") && ! type.equals("phrase")) {
+          if (new File(path).isDirectory()) {
+            try {
+              PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
+              packed_grammars.add(packed_grammar);
+              grammar = packed_grammar;
+            } catch (FileNotFoundException e) {
+              String msg = String.format("Couldn't load packed grammar from '%s'", path)
+                  + "Perhaps it doesn't exist, or it may be an old packed file format.";
+              throw new RuntimeException(msg);
+            }
+          } else {
+            // thrax, hiero, samt
+            grammar = new MemoryBasedBatchGrammar(type, path, owner,
+                joshuaConfiguration.default_non_terminal, span_limit, joshuaConfiguration);
+          }
+
+        } else {
+
+          int maxSourceLen = parsedArgs.containsKey("max-source-len")
+              ? Integer.parseInt(parsedArgs.get("max-source-len"))
+              : -1;
+
+          joshuaConfiguration.search_algorithm = "stack";
+          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
+        }
+
+        this.grammars.add(grammar);
+      }
+
+      checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
+
+    } else {
+      LOG.warn("no grammars supplied!  Supplying dummy glue grammar.");
+      MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration);
+      glueGrammar.setSpanLimit(-1);
+      glueGrammar.addGlueRules(featureFunctions);
+      this.grammars.add(glueGrammar);
+    }
+    
+    /* Add the grammar for custom entries */
+    if (joshuaConfiguration.search_algorithm.equals("stack"))
+      this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
+    else
+      this.customPhraseTable = new MemoryBasedBatchGrammar("custom", joshuaConfiguration);
+    this.grammars.add(this.customPhraseTable);
+    
+    /* Create an epsilon-deleting grammar */
+    if (joshuaConfiguration.lattice_decoding) {
+      LOG.info("Creating an epsilon-deleting grammar");
+      MemoryBasedBatchGrammar latticeGrammar = new MemoryBasedBatchGrammar("lattice", joshuaConfiguration);
+      latticeGrammar.setSpanLimit(-1);
+      HieroFormatReader reader = new HieroFormatReader();
+
+      String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
+      String defaultNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
+
+      //FIXME: too many arguments
+      String ruleString = String.format("[%s] ||| [%s,1] <eps> ||| [%s,1] ||| ", goalNT, goalNT, defaultNT,
+          goalNT, defaultNT);
+
+      Rule rule = reader.parseLine(ruleString);
+      latticeGrammar.addRule(rule);
+      rule.estimateRuleCost(featureFunctions);
+
+      this.grammars.add(latticeGrammar);
+    }
+
+    /* Now create a feature function for each owner */
+    HashSet<String> ownersSeen = new HashSet<String>();
+
+    for (Grammar grammar: this.grammars) {
+      String owner = Vocabulary.word(grammar.getOwner());
+      if (! ownersSeen.contains(owner)) {
+        this.featureFunctions.add(new PhraseModel(weights, new String[] { "tm", "-owner", owner },
+            joshuaConfiguration, grammar));
+        ownersSeen.add(owner);
+      }
+    }
+
+    LOG.info("Memory used {} MB",
+        ((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0));
+  }
+
+  /**
+   * Checks if multiple packedGrammars have the same vocabulary by comparing their vocabulary file checksums.
+   */
+  private static void checkSharedVocabularyChecksumsForPackedGrammars(final List<PackedGrammar> packed_grammars) {
+    String previous_checksum = "";
+    for (PackedGrammar grammar : packed_grammars) {
+      final String checksum = grammar.computeVocabularyChecksum();
+      if (previous_checksum.isEmpty()) {
+        previous_checksum = checksum;
+      } else {
+        if (!checksum.equals(previous_checksum)) {
+          throw new RuntimeException(
+              "Trying to load multiple packed grammars with different vocabularies!" +
+                  "Have you packed them jointly?");
+        }
+        previous_checksum = checksum;
+      }
+    }
+  }
+
+  /*
+   * This function reads the weights for the model. Feature names and their weights are listed one
+   * per line in the following format:
+   * 
+   * FEATURE_NAME WEIGHT
+   */
+  private void readWeights(String fileName) {
+    Decoder.weights = new FeatureVector();
+
+    if (fileName.equals(""))
+      return;
+
+    try {
+      LineReader lineReader = new LineReader(fileName);
+
+      for (String line : lineReader) {
+        line = line.replaceAll("\\s+", " ");
+
+        if (line.equals("") || line.startsWith("#") || line.startsWith("//")
+            || line.indexOf(' ') == -1)
+          continue;
+
+        String tokens[] = line.split("\\s+");
+        String feature = tokens[0];
+        Float value = Float.parseFloat(tokens[1]);
+
+        // Kludge for compatibility with Moses tuners
+        if (joshuaConfiguration.moses) {
+          feature = demoses(feature);
+        }
+
+        weights.increment(feature, value);
+      }
+    } catch (IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+    LOG.info("Read {} weights from file '{}'", weights.size(), fileName);
+  }
+
+  private String demoses(String feature) {
+    if (feature.endsWith("="))
+      feature = feature.replace("=", "");
+    if (feature.equals("OOV_Penalty"))
+      feature = "OOVPenalty";
+    else if (feature.startsWith("tm-") || feature.startsWith("lm-"))
+      feature = feature.replace("-",  "_");
+    return feature;
+  }
+
+  /**
+   * Feature functions are instantiated with a line of the form
+   *
+   * <pre>
+   *   FEATURE OPTIONS
+   * </pre>
+   *
+   * Weights for features are listed separately.
+   *
+   * @throws IOException
+   *
+   */
+  private void initializeFeatureFunctions() throws IOException {
+
+    for (String featureLine : joshuaConfiguration.features) {
+      // line starts with NAME, followed by args
+      // 1. create new class named NAME, pass it config, weights, and the args
+
+      String fields[] = featureLine.split("\\s+");
+      String featureName = fields[0];
+      
+      try {
+        
+        Class<?> clas = getFeatureFunctionClass(featureName);
+        Constructor<?> constructor = clas.getConstructor(FeatureVector.class,
+            String[].class, JoshuaConfiguration.class);
+        FeatureFunction feature = (FeatureFunction) constructor.newInstance(weights, fields, joshuaConfiguration);
+        this.featureFunctions.add(feature);
+        
+      } catch (Exception e) {
+        throw new RuntimeException(String.format("Unable to instantiate feature function '%s'!", featureLine), e); 
+      }
+    }
+
+    for (FeatureFunction feature : featureFunctions) {
+      LOG.info("FEATURE: {}", feature.logString());
+    }
+
+    weights.registerDenseFeatures(featureFunctions);
+  }
+
+  /**
+   * Searches a list of predefined paths for classes, and returns the first one found. Meant for
+   * instantiating feature functions.
+   *
+   * @param name
+   * @return the class, found in one of the search paths
+   * @throws ClassNotFoundException
+   */
+  private Class<?> getFeatureFunctionClass(String featureName) {
+    Class<?> clas = null;
+
+    String[] packages = { "org.apache.joshua.decoder.ff", "org.apache.joshua.decoder.ff.lm", "org.apache.joshua.decoder.ff.phrase" };
+    for (String path : packages) {
+      try {
+        clas = Class.forName(String.format("%s.%s", path, featureName));
+        break;
+      } catch (ClassNotFoundException e) {
+        try {
+          clas = Class.forName(String.format("%s.%sFF", path, featureName));
+          break;
+        } catch (ClassNotFoundException e2) {
+          // do nothing
+        }
+      }
+    }
+    return clas;
+  }
+  
+  /**
+   * Adds a rule to the custom grammar.  
+   * 
+   * @param rule the rule to add
+   */
+  public void addCustomRule(Rule rule) {
+    customPhraseTable.addRule(rule);
+    rule.estimateRuleCost(featureFunctions);
+  }
+
+  public Grammar getCustomPhraseTable() {
+    return customPhraseTable;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
new file mode 100644
index 0000000..d6f5233
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.decoder.chart_parser.Chart;
+import org.apache.joshua.decoder.ff.FeatureFunction;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.SourceDependentFF;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.hypergraph.ForestWalker;
+import org.apache.joshua.decoder.hypergraph.GrammarBuilderWalkerFunction;
+import org.apache.joshua.decoder.hypergraph.HyperGraph;
+import org.apache.joshua.decoder.phrase.Stacks;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.corpus.Vocabulary;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class handles decoding of individual Sentence objects (which can represent plain sentences
+ * or lattices). A single sentence can be decoded by a call to translate() and, if an InputHandler
+ * is used, many sentences can be decoded in a thread-safe manner via a single call to
+ * translateAll(), which continually queries the InputHandler for sentences until they have all been
+ * consumed and translated.
+ * 
+ * The DecoderFactory class is responsible for launching the threads.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ */
+
+public class DecoderThread extends Thread {
+  private static final Logger LOG = LoggerFactory.getLogger(DecoderThread.class);
+
+  private final JoshuaConfiguration joshuaConfiguration;
+  /*
+   * these variables may be the same across all threads (e.g., just copy from DecoderFactory), or
+   * differ from thread to thread
+   */
+  private final List<Grammar> allGrammars;
+  private final List<FeatureFunction> featureFunctions;
+
+
+  // ===============================================================
+  // Constructor
+  // ===============================================================
+  public DecoderThread(List<Grammar> grammars, FeatureVector weights,
+      List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) throws IOException {
+
+    this.joshuaConfiguration = joshuaConfiguration;
+    this.allGrammars = grammars;
+
+    this.featureFunctions = new ArrayList<FeatureFunction>();
+    for (FeatureFunction ff : featureFunctions) {
+      if (ff instanceof SourceDependentFF) {
+        this.featureFunctions.add(((SourceDependentFF) ff).clone());
+      } else {
+        this.featureFunctions.add(ff);
+      }
+    }
+  }
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+  @Override
+  public void run() {
+    // Nothing to do but wait.
+  }
+
+  /**
+   * Translate a sentence.
+   * 
+   * @param sentence The sentence to be translated.
+   * @return the sentence {@link org.apache.joshua.decoder.Translation}
+   */
+  public Translation translate(Sentence sentence) {
+
+    LOG.info("Input {}: {}", sentence.id(), sentence.fullSource());
+
+    if (sentence.target() != null)
+      LOG.info("Input {}: Constraining to target sentence '{}'",
+          sentence.id(), sentence.target());
+
+    // skip blank sentences
+    if (sentence.isEmpty()) {
+      LOG.info("Translation {}: Translation took 0 seconds", sentence.id());
+      return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
+    }
+
+    long startTime = System.currentTimeMillis();
+
+    int numGrammars = allGrammars.size();
+    Grammar[] grammars = new Grammar[numGrammars];
+
+    for (int i = 0; i < allGrammars.size(); i++)
+      grammars[i] = allGrammars.get(i);
+
+    if (joshuaConfiguration.segment_oovs)
+      sentence.segmentOOVs(grammars);
+
+    /**
+     * Joshua supports (as of September 2014) both phrase-based and hierarchical decoding. Here
+     * we build the appropriate chart. The output of both systems is a hypergraph, which is then
+     * used for further processing (e.g., k-best extraction).
+     */
+    HyperGraph hypergraph = null;
+    try {
+
+      if (joshuaConfiguration.search_algorithm.equals("stack")) {
+        Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
+
+        hypergraph = stacks.search();
+      } else {
+        /* Seeding: the chart only sees the grammars, not the factories */
+        Chart chart = new Chart(sentence, this.featureFunctions, grammars,
+            joshuaConfiguration.goal_symbol, joshuaConfiguration);
+
+        hypergraph = (joshuaConfiguration.use_dot_chart) 
+            ? chart.expand() 
+                : chart.expandSansDotChart();
+      }
+
+    } catch (java.lang.OutOfMemoryError e) {
+      LOG.error("Input {}: out of memory", sentence.id());
+      hypergraph = null;
+    }
+
+    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
+    LOG.info("Input {}: Translation took {} seconds", sentence.id(), seconds);
+    LOG.info("Input {}: Memory used is {} MB", sentence.id(), (Runtime
+        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
+
+    /* Return the translation unless we're doing synchronous parsing. */
+    if (!joshuaConfiguration.parse || hypergraph == null) {
+      return new Translation(sentence, hypergraph, featureFunctions, joshuaConfiguration);
+    }
+
+    /*****************************************************************************************/
+
+    /*
+     * Synchronous parsing.
+     * 
+     * Step 1. Traverse the hypergraph to create a grammar for the second-pass parse.
+     */
+    Grammar newGrammar = getGrammarFromHyperGraph(joshuaConfiguration.goal_symbol, hypergraph);
+    newGrammar.sortGrammar(this.featureFunctions);
+    long sortTime = System.currentTimeMillis();
+    LOG.info("Sentence {}: New grammar has {} rules.", sentence.id(),
+        newGrammar.getNumRules());
+
+    /* Step 2. Create a new chart and parse with the instantiated grammar. */
+    Grammar[] newGrammarArray = new Grammar[] { newGrammar };
+    Sentence targetSentence = new Sentence(sentence.target(), sentence.id(), joshuaConfiguration);
+    Chart chart = new Chart(targetSentence, featureFunctions, newGrammarArray, "GOAL",joshuaConfiguration);
+    int goalSymbol = GrammarBuilderWalkerFunction.goalSymbol(hypergraph);
+    String goalSymbolString = Vocabulary.word(goalSymbol);
+    LOG.info("Sentence {}: goal symbol is {} ({}).", sentence.id(),
+        goalSymbolString, goalSymbol);
+    chart.setGoalSymbolID(goalSymbol);
+
+    /* Parsing */
+    HyperGraph englishParse = chart.expand();
+    long secondParseTime = System.currentTimeMillis();
+    LOG.info("Sentence {}: Finished second chart expansion ({} seconds).",
+        sentence.id(), (secondParseTime - sortTime) / 1000);
+    LOG.info("Sentence {} total time: {} seconds.\n", sentence.id(),
+        (secondParseTime - startTime) / 1000);
+    LOG.info("Memory used after sentence {} is {} MB", sentence.id(), (Runtime
+        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
+    return new Translation(sentence, englishParse, featureFunctions, joshuaConfiguration); // or do something else
+  }
+
+  private Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
+    GrammarBuilderWalkerFunction f = new GrammarBuilderWalkerFunction(goal,joshuaConfiguration);
+    ForestWalker walker = new ForestWalker();
+    walker.walk(hg.goalNode, f);
+    return f.getGrammar();
+  }
+}