You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:35 UTC

[19/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
new file mode 100644
index 0000000..7a3de23
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@ -0,0 +1,710 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import static joshua.util.FormatUtils.cleanNonTerminal;
+import static joshua.util.FormatUtils.markup;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.logging.Logger;
+
+import joshua.decoder.ff.StatefulFF;
+import joshua.decoder.ff.fragmentlm.Tree;
+import joshua.util.FormatUtils;
+import joshua.util.Regex;
+import joshua.util.io.LineReader;
+
+/**
+ * Configuration file for Joshua decoder.
+ * 
+ * When adding new features to Joshua, any new configurable parameters should be added to this
+ * class.
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+public class JoshuaConfiguration {
+  
+  // whether to construct a StructuredTranslation object for each request instead of 
+  // printing to stdout. Used when the Decoder is used from Java directly.
+  public Boolean use_structured_output = false;
+  
+  // If set to true, Joshua will lowercase the input, creating an annotation that marks the
+  // original case
+  public boolean lowercase = false;
+  
+  // If set to true, Joshua will recapitalize the output by projecting the case from aligned
+  // source-side words
+  public boolean project_case = false;
+
+  // List of grammar files to read
+  public ArrayList<String> tms = new ArrayList<String>();
+
+  // A rule cache for commonly used tries to avoid excess object allocations
+  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+  public Integer cachedRuleSize = new Integer(5000);
+
+  /*
+   * The file to read the weights from (part of the sparse features implementation). Weights can
+   * also just be listed in the main config file.
+   */
+  public String weights_file = "";
+
+  // Default symbols. The symbol here should be enclosed in square brackets.
+  public String default_non_terminal = FormatUtils.markup("X");
+  public String goal_symbol = FormatUtils.markup("GOAL");
+
+  /*
+   * A list of OOV symbols in the form
+   * 
+   * [X1] weight [X2] weight [X3] weight ...
+   * 
+   * where the [X] symbols are nonterminals and the weights are weights. For each OOV word w in the
+   * input sentence, Joshua will create rules of the form
+   * 
+   * X1 -> w (weight)
+   * 
+   * If this is empty, an unweighted default_non_terminal is used.
+   */
+  
+  public class OOVItem implements Comparable<OOVItem> {
+    public String label;
+    public float weight;
+
+    OOVItem(String l, float w) {
+      label = l;
+      weight = w;
+    }
+    
+    @Override
+    public int compareTo(OOVItem other) {
+      if (weight > other.weight) 
+        return -1;
+      else if (weight < other.weight)
+        return 1;
+      return 0;
+    }
+  }
+  public ArrayList<OOVItem> oovList = null;
+
+  /*
+   * Whether to segment OOVs into a lattice
+   */
+  public boolean segment_oovs = false;
+  
+  /*
+   * Enable lattice decoding.
+   */
+  public boolean lattice_decoding = false;
+  
+  /*
+   * If false, sorting of the complete grammar is done at load time. If true, grammar tries are not
+   * sorted till they are first accessed. Amortized sorting means you get your first translation
+   * much, much quicker (good for debugging), but that per-sentence decoding is a bit slower.
+   */
+  public boolean amortized_sorting = true;
+
+  // syntax-constrained decoding
+  public boolean constrain_parse = false;
+  public boolean use_pos_labels = false;
+
+  // oov-specific
+  public boolean true_oovs_only = false;
+
+  /* Dynamic sentence-level filtering. */
+  public boolean filter_grammar = false;
+
+  /* The cube pruning pop limit. Set to 0 for exhaustive pruning. */
+  public int pop_limit = 100;
+
+  /* Maximum sentence length. Sentences longer than this are truncated. */
+  public int maxlen = 200;
+
+  /*
+   * N-best configuration.
+   */
+  // Make sure output strings in the n-best list are unique.
+  public boolean use_unique_nbest = true;
+
+  /* Include the phrasal alignments in the output (not word-level alignmetns at the moment). */
+  public boolean include_align_index = false;
+
+  /* The number of hypotheses to output by default. */
+  public int topN = 1;
+  
+  /**
+   * This string describes the format of each line of output from the decoder (i.e., the
+   * translations). The string can include arbitrary text and also variables. The following
+   * variables are available:
+   * 
+   * <pre>
+   * - %i the 0-indexed sentence number 
+   * - %e the source string %s the translated sentence 
+   * - %S the translated sentence with some basic capitalization and denormalization 
+   * - %t the synchronous derivation 
+   * - %f the list of feature values (as name=value pairs) 
+   * - %c the model cost
+   * - %w the weight vector 
+   * - %a the alignments between source and target words (currently unimplemented) 
+   * - %d a verbose, many-line version of the derivation
+   * </pre>
+   */
+  public String outputFormat = "%i ||| %s ||| %f ||| %c";
+
+  /* The number of decoding threads to use (-threads). */
+  public int num_parallel_decoders = 1;
+
+  // disk hg
+  public String hypergraphFilePattern = "";
+
+  /*
+   * When true, _OOV is appended to all words that are passed through (useful for something like
+   * transliteration on the target side
+   */
+  public boolean mark_oovs = false;
+
+  /* Enables synchronous parsing. */
+  public boolean parse = false; // perform synchronous parsing
+
+  private final Logger logger = Logger.getLogger(JoshuaConfiguration.class.getName());
+
+  /* A list of the feature functions. */
+  public ArrayList<String> features = new ArrayList<String>();
+
+  /* A list of weights found in the main config file (instead of in a separate weights file) */
+  public ArrayList<String> weights = new ArrayList<String>();
+
+  /* Determines whether to expect JSON input or plain lines */
+  public enum INPUT_TYPE { plain, json };
+  public INPUT_TYPE input_type = INPUT_TYPE.plain;
+
+  /* Type of server. Not sure we need to keep the regular TCP one around. */
+  public enum SERVER_TYPE { none, TCP, HTTP };
+  public SERVER_TYPE server_type = SERVER_TYPE.TCP;
+  
+  /* If set, Joshua will start a (multi-threaded, per "threads") TCP/IP server on this port. */
+  public int server_port = 0;
+
+  /*
+   * Whether to do forest rescoring. If set to true, the references are expected on STDIN along with
+   * the input sentences in the following format:
+   * 
+   * input sentence ||| ||| reference1 ||| reference2 ...
+   * 
+   * (The second field is reserved for the output sentence for alignment and forced decoding).
+   */
+
+  public boolean rescoreForest = false;
+  public float rescoreForestWeight = 10.0f;
+
+  /*
+   * Location of fragment mapping file, which maps flattened SCFG rules to their internal
+   * representation.
+   */
+  public String fragmentMapFile = null;
+
+  /*
+   * Whether to use soft syntactic constraint decoding /fuzzy matching, which allows that any
+   * nonterminal may be substituted for any other nonterminal (except for OOV and GOAL)
+   */
+  public boolean fuzzy_matching = false;
+
+  public static final String SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME = "fuzzy_matching";
+
+  /***
+   * Phrase-based decoding parameters.
+   */
+  
+  /* The search algorithm: currently either "cky" or "stack" */
+  public String search_algorithm = "cky";
+  
+  /* The distortion limit */
+  public int reordering_limit = 8;
+  
+  /* The number of target sides considered for each source side (after sorting by model weight) */
+  public int num_translation_options = 20;
+
+  /* If true, decode using a dot chart (standard CKY+); if false, use the much more efficient
+   * version of Sennrich (SSST 2014)
+   */
+  public boolean use_dot_chart = true;
+  
+  /* Moses compatibility */
+  public boolean moses = false;
+  
+  /* If true, just print out the weights found in the config file, and exit. */
+  public boolean show_weights_and_quit = false;
+  
+  /* Read input from a file (Moses compatible flag) */
+  public String input_file = null;
+  
+  /* Write n-best output to this file */
+  public String n_best_file = null;
+
+  /* Whether to look at source side for special annotations */
+  public boolean source_annotations = false;
+
+  /* Weights overridden from the command line */
+  public String weight_overwrite = "";
+  
+  /**
+   * This method resets the state of JoshuaConfiguration back to the state after initialization.
+   * This is useful when for example making different calls to the decoder within the same java
+   * program, which otherwise leads to potential errors due to inconsistent state as a result of
+   * loading the configuration multiple times without resetting etc.
+   * 
+   * This leads to the insight that in fact it may be an even better idea to refactor the code and
+   * make JoshuaConfiguration an object that is is created and passed as an argument, rather than a
+   * shared static object. This is just a suggestion for the next step.
+   * 
+   */
+  public void reset() {
+    logger.info("Resetting the JoshuaConfiguration to its defaults ...");
+    logger.info("\n\tResetting the StatefullFF global state index ...");
+    logger.info("\n\t...done");
+    StatefulFF.resetGlobalStateIndex();
+    tms = new ArrayList<String>();
+    weights_file = "";
+    default_non_terminal = "[X]";
+    oovList = new ArrayList<OOVItem>(); 
+    oovList.add(new OOVItem(default_non_terminal, 1.0f));
+    goal_symbol = "[GOAL]";
+    amortized_sorting = true;
+    constrain_parse = false;
+    use_pos_labels = false;
+    true_oovs_only = false;
+    filter_grammar = false;
+    pop_limit = 100;
+    maxlen = 200;
+    use_unique_nbest = false;
+    include_align_index = false;
+    topN = 1;
+    outputFormat = "%i ||| %s ||| %f ||| %c";
+    num_parallel_decoders = 1;
+    hypergraphFilePattern = "";
+    mark_oovs = false;
+    // oracleFile = null;
+    parse = false; // perform synchronous parsing
+    features = new ArrayList<String>();
+    weights = new ArrayList<String>();
+    server_port = 0;
+    
+    reordering_limit = 8;
+    num_translation_options = 20;
+    logger.info("...done");
+  }
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+  /**
+   * To process command-line options, we write them to a file that looks like the config file, and
+   * then call readConfigFile() on it. It would be more general to define a class that sits on a
+   * stream and knows how to chop it up, but this was quicker to implement.
+   */
+  public void processCommandLineOptions(String[] options) {
+    try {
+      File tmpFile = File.createTempFile("options", null, null);
+      PrintWriter out = new PrintWriter(new FileWriter(tmpFile));
+
+      for (int i = 0; i < options.length; i++) {
+        String key = options[i].substring(1);
+        if (i + 1 == options.length || options[i + 1].startsWith("-")) {
+          // if this is the last item, or if the next item
+          // is another flag, then this is a boolean flag
+          out.println(key + " = true");
+
+        } else {
+          out.print(key + " =");
+          while (i + 1 < options.length && ! options[i + 1].startsWith("-")) {
+            out.print(String.format(" %s", options[i + 1]));
+            i++;
+          }
+          out.println();
+        }
+      }
+      out.close();
+      this.readConfigFile(tmpFile.getCanonicalPath());
+
+      tmpFile.delete();
+
+    } catch (IOException e) {
+      e.printStackTrace();
+      System.exit(1);
+    }
+  }
+
+  public void readConfigFile(String configFile) throws IOException {
+
+    LineReader configReader = new LineReader(configFile, false);
+    try {
+      for (String line : configReader) {
+        line = line.trim(); // .toLowerCase();
+        
+        if (Regex.commentOrEmptyLine.matches(line))
+          continue;
+
+        /*
+         * There are two kinds of substantive (non-comment, non-blank) lines: parameters and feature
+         * values. Parameters match the pattern "key = value"; all other substantive lines are
+         * interpreted as features.
+         */
+
+        if (line.indexOf("=") != -1) { // parameters; (not feature function)
+          String[] fds = Regex.equalsWithSpaces.split(line, 2);
+          if (fds.length < 2) {
+            Decoder.LOG(1, String.format("* WARNING: skipping config file line '%s'", line));
+            continue;
+          }
+
+          String parameter = normalize_key(fds[0]);
+
+          if (parameter.equals(normalize_key("lm"))) {
+            /* This is deprecated. This support old LM lines of the form
+             * 
+             *   lm = berkeleylm 5 false false 100 lm.gz
+             * 
+             * LMs are now loaded as general feature functions, so we transform that to either
+             * 
+             *   feature-function = LanguageModel -lm_order 5 -lm_type berkeleylm -lm_file lm.gz
+             * 
+             * If the line were state minimizing:
+             * 
+             *   lm = kenlm 5 true false 100 lm.gz
+             *              
+             * feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.gz
+             */
+            
+            String[] tokens = fds[1].split("\\s+");
+            if (tokens[2].equals("true"))
+              features.add(String.format("feature_function = StateMinimizingLanguageModel -lm_type kenlm -lm_order %s -lm_file %s",
+                  tokens[1], tokens[5]));
+            else
+              features.add(String.format("feature_function = LanguageModel -lm_type %s -lm_order %s -lm_file %s",
+                  tokens[0], tokens[1], tokens[5]));
+
+          } else if (parameter.equals(normalize_key("tm"))) {
+            /* If found, convert old format:
+             *   tm = TYPE OWNER MAXSPAN PATH
+             * to new format
+             *   tm = TYPE -owner OWNER -maxspan MAXSPAN -path PATH    
+             */
+            String tmLine = fds[1];
+            
+            String[] tokens = fds[1].split("\\s+");
+            if (! tokens[1].startsWith("-")) { // old format
+              tmLine = String.format("%s -owner %s -maxspan %s -path %s", tokens[0], tokens[1], tokens[2], tokens[3]);
+              Decoder.LOG(1, String.format("WARNING: Converting deprecated TM line from '%s' -> '%s'", fds[1], tmLine));
+            }
+            tms.add(tmLine);
+            
+          } else if (parameter.equals("v")) {
+            Decoder.VERBOSE = Integer.parseInt(fds[1]);
+
+          } else if (parameter.equals(normalize_key("parse"))) {
+            parse = Boolean.parseBoolean(fds[1]);
+            logger.finest(String.format("parse: %s", parse));
+
+          } else if (parameter.equals(normalize_key("dump-hypergraph"))) {
+            hypergraphFilePattern = fds[1].trim();
+            logger
+                .finest(String.format("  hypergraph dump file format: %s", hypergraphFilePattern));
+
+          } else if (parameter.equals(normalize_key("oov-list"))) {
+            if (new File(fds[1]).exists()) {
+              oovList = new ArrayList<OOVItem>();
+              try {
+                File file = new File(fds[1]);
+                BufferedReader br = new BufferedReader(new FileReader(file));
+                try {
+                  String str = br.readLine();
+                  while (str != null) {
+                    String[] tokens = str.trim().split("\\s+");
+
+                    oovList.add(new OOVItem(FormatUtils.markup(tokens[0]),
+                            (float) Math.log(Float.parseFloat(tokens[1]))));
+
+                    str = br.readLine();
+                  }
+                  br.close();
+                } catch(IOException e){
+                  System.out.println(e);
+                }
+              } catch(IOException e){
+                System.out.println(e);
+              }
+              Collections.sort(oovList);
+
+            } else {
+              String[] tokens = fds[1].trim().split("\\s+");
+              if (tokens.length % 2 != 0) {
+                  System.err.println(String.format("* FATAL: invalid format for '%s'", fds[0]));
+                  System.exit(1);
+                }
+
+              oovList = new ArrayList<OOVItem>();
+
+              for (int i = 0; i < tokens.length; i += 2)
+                oovList.add(new OOVItem(FormatUtils.markup(tokens[i]),
+                    (float) Math.log(Float.parseFloat(tokens[i + 1]))));
+
+              Collections.sort(oovList);
+            }
+
+          } else if (parameter.equals(normalize_key("lattice-decoding"))) {
+            lattice_decoding = true;
+            
+          } else if (parameter.equals(normalize_key("segment-oovs"))) {
+            segment_oovs = true;
+            lattice_decoding = true;
+
+          } else if (parameter.equals(normalize_key("default-non-terminal"))) {
+            default_non_terminal = markup(cleanNonTerminal(fds[1].trim()));
+            logger.finest(String.format("default_non_terminal: %s", default_non_terminal));
+
+          } else if (parameter.equals(normalize_key("goal-symbol"))) {
+            goal_symbol = markup(cleanNonTerminal(fds[1].trim()));
+            logger.finest("goalSymbol: " + goal_symbol);
+
+          } else if (parameter.equals(normalize_key("weights-file"))) {
+            weights_file = fds[1];
+
+          } else if (parameter.equals(normalize_key("constrain_parse"))) {
+            constrain_parse = Boolean.parseBoolean(fds[1]);
+
+          } else if (parameter.equals(normalize_key("true_oovs_only"))) {
+            true_oovs_only = Boolean.parseBoolean(fds[1]);
+
+          } else if (parameter.equals(normalize_key("filter-grammar"))) {
+            filter_grammar = Boolean.parseBoolean(fds[1]);
+
+          } else if (parameter.equals(normalize_key("amortize"))) {
+            amortized_sorting = Boolean.parseBoolean(fds[1]);
+
+          } else if (parameter.equals(normalize_key("use_pos_labels"))) {
+            use_pos_labels = Boolean.parseBoolean(fds[1]);
+
+          } else if (parameter.equals(normalize_key("use_unique_nbest"))) {
+            use_unique_nbest = Boolean.valueOf(fds[1]);
+            logger.finest(String.format("use_unique_nbest: %s", use_unique_nbest));
+
+          } else if (parameter.equals(normalize_key("output-format"))) {
+            outputFormat = fds[1];
+            logger.finest(String.format("output-format: %s", outputFormat));
+
+          } else if (parameter.equals(normalize_key("include_align_index"))) {
+            include_align_index = Boolean.valueOf(fds[1]);
+            logger.finest(String.format("include_align_index: %s", include_align_index));
+
+          } else if (parameter.equals(normalize_key("top_n"))) {
+            topN = Integer.parseInt(fds[1]);
+            logger.finest(String.format("topN: %s", topN));
+
+          } else if (parameter.equals(normalize_key("num_parallel_decoders"))
+              || parameter.equals(normalize_key("threads"))) {
+            num_parallel_decoders = Integer.parseInt(fds[1]);
+            if (num_parallel_decoders <= 0) {
+              throw new IllegalArgumentException(
+                  "Must specify a positive number for num_parallel_decoders");
+            }
+            logger.finest(String.format("num_parallel_decoders: %s", num_parallel_decoders));
+
+          } else if (parameter.equals(normalize_key("mark_oovs"))) {
+            mark_oovs = Boolean.valueOf(fds[1]);
+            logger.finest(String.format("mark_oovs: %s", mark_oovs));
+
+          } else if (parameter.equals(normalize_key("pop-limit"))) {
+            pop_limit = Integer.parseInt(fds[1]);
+            logger.finest(String.format("pop-limit: %s", pop_limit));
+
+          } else if (parameter.equals(normalize_key("input-type"))) {
+            if (fds[1].equals("json"))
+              input_type = INPUT_TYPE.json;
+            else if (fds[1].equals("plain"))
+              input_type = INPUT_TYPE.plain;
+            else {
+              System.err.println(String.format("* FATAL: invalid server type '%s'", fds[1]));
+              System.exit(1);
+            }
+            logger.info(String.format("    input-type: %s", input_type));
+
+          } else if (parameter.equals(normalize_key("server-type"))) {
+            if (fds[1].toLowerCase().equals("tcp"))
+              server_type = SERVER_TYPE.TCP;
+            else if (fds[1].toLowerCase().equals("http"))
+              server_type = SERVER_TYPE.HTTP;
+
+            logger.info(String.format("    server-type: %s", server_type));
+            
+          } else if (parameter.equals(normalize_key("server-port"))) {
+            server_port = Integer.parseInt(fds[1]);
+            logger.info(String.format("    server-port: %d", server_port));
+
+          } else if (parameter.equals(normalize_key("rescore-forest"))) {
+            rescoreForest = true;
+            logger.info(String.format("    rescore-forest: %s", rescoreForest));
+
+          } else if (parameter.equals(normalize_key("rescore-forest-weight"))) {
+            rescoreForestWeight = Float.parseFloat(fds[1]);
+            logger.info(String.format("    rescore-forest-weight: %f", rescoreForestWeight));
+
+          } else if (parameter.equals(normalize_key("maxlen"))) {
+            // reset the maximum length
+            maxlen = Integer.parseInt(fds[1]);
+
+          } else if (parameter.equals("c") || parameter.equals("config")) {
+            // this was used to send in the config file, just ignore it
+            ;
+
+          } else if (parameter.equals(normalize_key("feature-function"))) {
+            // add the feature to the list of features for later processing
+            features.add("feature_function = " + fds[1]);
+
+          } else if (parameter.equals(normalize_key("maxlen"))) {
+            // add the feature to the list of features for later processing
+            maxlen = Integer.parseInt(fds[1]);
+
+          } else if (parameter
+              .equals(normalize_key(SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME))) {
+            fuzzy_matching = Boolean.parseBoolean(fds[1]);
+            logger.finest(String.format(fuzzy_matching + ": %s", fuzzy_matching));
+
+          } else if (parameter.equals(normalize_key("fragment-map"))) {
+            fragmentMapFile = fds[1];
+            Tree.readMapping(fragmentMapFile);
+
+          /** PHRASE-BASED PARAMETERS **/
+          } else if (parameter.equals(normalize_key("search"))) {
+            search_algorithm = fds[1];
+            
+            if (!search_algorithm.equals("cky") && !search_algorithm.equals("stack")) {
+              throw new RuntimeException(
+                  "-search must be one of 'stack' (for phrase-based decoding) " +
+                  "or 'cky' (for hierarchical / syntactic decoding)");
+            }
+            
+            if (search_algorithm.equals("cky") && include_align_index) {
+              throw new RuntimeException(
+                  "include_align_index is currently not supported with cky search");
+            }
+
+          } else if (parameter.equals(normalize_key("reordering-limit"))) {
+            reordering_limit = Integer.parseInt(fds[1]);
+
+          } else if (parameter.equals(normalize_key("num-translation-options"))) {
+            num_translation_options = Integer.parseInt(fds[1]);
+            
+          } else if (parameter.equals(normalize_key("no-dot-chart"))) {
+            use_dot_chart = false;
+            
+          } else if (parameter.equals(normalize_key("moses"))) {
+            moses = true; // triggers some Moses-specific compatibility options
+            
+          } else if (parameter.equals(normalize_key("show-weights"))) {
+            show_weights_and_quit = true;
+
+          } else if (parameter.equals(normalize_key("n-best-list"))) {
+            // for Moses compatibility
+            String[] tokens = fds[1].split("\\s+");
+            n_best_file = tokens[0];
+            if (tokens.length > 1)
+              topN = Integer.parseInt(tokens[1]);
+
+          } else if (parameter.equals(normalize_key("input-file"))) {
+            // for Moses compatibility
+            input_file = fds[1];
+            
+          } else if (parameter.equals(normalize_key("weight-file"))) {
+            // for Moses, ignore
+
+          } else if (parameter.equals(normalize_key("weight-overwrite"))) {
+            weight_overwrite = fds[1];
+            
+          } else if (parameter.equals(normalize_key("source-annotations"))) {
+            // Check source sentence
+            source_annotations = true;
+
+          } else if (parameter.equals(normalize_key("cached-rules-size"))) {
+              // Check source sentence
+              cachedRuleSize = Integer.parseInt(fds[1]);
+          } else if (parameter.equals(normalize_key("lowercase"))) {
+            lowercase = true;
+            
+          } else if (parameter.equals(normalize_key("project-case"))) {
+            project_case = true;
+
+          } else {
+
+            if (parameter.equals(normalize_key("use-sent-specific-tm"))
+                || parameter.equals(normalize_key("add-combined-cost"))
+                || parameter.equals(normalize_key("use-tree-nbest"))
+                || parameter.equals(normalize_key("use-kenlm"))
+                || parameter.equals(normalize_key("useCubePrune"))
+                || parameter.equals(normalize_key("useBeamAndThresholdPrune"))
+                || parameter.equals(normalize_key("regexp-grammar"))) {
+              logger.warning(String.format("WARNING: ignoring deprecated parameter '%s'", fds[0]));
+
+            } else {
+              logger.warning("FATAL: unknown configuration parameter '" + fds[0] + "'");
+              System.exit(1);
+            }
+          }
+
+          Decoder.LOG(1, String.format("    %s = '%s'", normalize_key(fds[0]), fds[1]));
+
+        } else {
+          /*
+           * Lines that don't have an equals sign and are not blank lines, empty lines, or comments,
+           * are feature values, which can be present in this file
+           */
+
+          weights.add(line);
+        }
+      }
+    } finally {
+      configReader.close();
+    }
+  }
+
+  /**
+   * Checks for invalid variable configurations
+   */
+  public void sanityCheck() {
+  }
+
+  /**
+   * Normalizes parameter names by removing underscores and hyphens and lowercasing. This defines
+   * equivalence classes on external use of parameter names, permitting arbitrary_under_scores and
+   * camelCasing in paramter names without forcing the user to memorize them all. Here are some
+   * examples of equivalent ways to refer to parameter names:
+   * 
+   * {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
+   */
+  public static String normalize_key(String text) {
+    return text.replaceAll("[-_]", "").toLowerCase();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
new file mode 100644
index 0000000..841f517
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.net.InetSocketAddress;
+import java.util.logging.Logger;
+
+import com.sun.net.httpserver.HttpServer;
+
+import joshua.decoder.JoshuaConfiguration.SERVER_TYPE;
+import joshua.decoder.io.TranslationRequestStream;
+import joshua.server.TcpServer;
+import joshua.server.ServerThread;
+
+/**
+ * Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
+ * and <code>DecoderThread</code>.
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author Lane Schwartz <do...@users.sourceforge.net>
+ */
+public class JoshuaDecoder {
+
+  private static final Logger logger = Logger.getLogger(JoshuaDecoder.class.getName());
+  
+  // ===============================================================
+  // Main
+  // ===============================================================
+  public static void main(String[] args) throws IOException {
+
+    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+    ArgsParser userArgs = new ArgsParser(args,joshuaConfiguration);
+
+    String logFile = System.getenv().get("JOSHUA") + "/logging.properties";
+    try {
+      java.util.logging.LogManager.getLogManager().readConfiguration(new FileInputStream(logFile));
+    } catch (IOException e) {
+      logger.warning("Couldn't initialize logging properties from '" + logFile + "'");
+    }
+
+    long startTime = System.currentTimeMillis();
+
+    /* Step-0: some sanity checking */
+    joshuaConfiguration.sanityCheck();
+
+    /* Step-1: initialize the decoder, test-set independent */
+    Decoder decoder = new Decoder(joshuaConfiguration, userArgs.getConfigFile());
+
+    Decoder.LOG(1, String.format("Model loading took %d seconds",
+        (System.currentTimeMillis() - startTime) / 1000));
+    Decoder.LOG(1, String.format("Memory used %.1f MB", ((Runtime.getRuntime().totalMemory() - Runtime
+        .getRuntime().freeMemory()) / 1000000.0)));  
+
+    /* Step-2: Decoding */
+    // create a server if requested, which will create TranslationRequest objects
+    if (joshuaConfiguration.server_port > 0) {
+      int port = joshuaConfiguration.server_port;
+      if (joshuaConfiguration.server_type == SERVER_TYPE.TCP) {
+        new TcpServer(decoder, port, joshuaConfiguration).start();
+
+      } else if (joshuaConfiguration.server_type == SERVER_TYPE.HTTP) {
+        HttpServer server = HttpServer.create(new InetSocketAddress(port), 0);
+        Decoder.LOG(1, String.format("** HTTP Server running and listening on port %d.", port));  
+        server.createContext("/", new ServerThread(null, decoder, joshuaConfiguration));
+        server.setExecutor(null); // creates a default executor
+        server.start();
+      } else {
+        System.err.println("* FATAL: unknown server type");
+        System.exit(1);
+      }
+      return;
+    }
+    
+    // Create the n-best output stream
+    FileWriter out = null;
+    if (joshuaConfiguration.n_best_file != null)
+      out = new FileWriter(joshuaConfiguration.n_best_file);
+    
+    // Create a TranslationRequest object, reading from a file if requested, or from STDIN
+    InputStream input = (joshuaConfiguration.input_file != null) 
+      ? new FileInputStream(joshuaConfiguration.input_file)
+      : System.in;
+
+    BufferedReader reader = new BufferedReader(new InputStreamReader(input));
+    TranslationRequestStream fileRequest = new TranslationRequestStream(reader, joshuaConfiguration);
+    decoder.decodeAll(fileRequest, new PrintStream(System.out));
+    
+    if (joshuaConfiguration.n_best_file != null)
+      out.close();
+
+    Decoder.LOG(1, "Decoding completed.");
+    Decoder.LOG(1, String.format("Memory used %.1f MB", ((Runtime.getRuntime().totalMemory() - Runtime
+        .getRuntime().freeMemory()) / 1000000.0)));
+
+    /* Step-3: clean up */
+    decoder.cleanUp();
+    Decoder.LOG(1, String.format("Total running time: %d seconds",
+      (System.currentTimeMillis() - startTime) / 1000));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/MetaDataException.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/MetaDataException.java b/src/main/java/org/apache/joshua/decoder/MetaDataException.java
new file mode 100644
index 0000000..932059c
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/MetaDataException.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+/*
+ * This class is used to capture metadata command to Joshua on input and pass them to the
+ * decoder.
+ */
+
+public class MetaDataException extends Exception {
+  private String type = null;
+  private String tokenString = null;
+  
+  public MetaDataException(String message) {
+    int firstSpace = message.indexOf(' ');
+    if (firstSpace != -1) {
+      this.type = message.substring(1, firstSpace);
+      this.tokenString = message.substring(firstSpace + 1);
+    } else if (message.length() > 0) {
+      this.type = message.substring(1);
+      this.tokenString = "";
+    }
+  }
+
+  public String type() {
+    return this.type;
+  }
+  
+  public String tokenString() {
+    return this.tokenString;
+  }
+  
+  public String[] tokens(String regex) {
+    return this.tokenString.split(regex);
+  }
+  
+  public String[] tokens() {
+    return this.tokens("\\s+");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
new file mode 100644
index 0000000..9596ae0
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
@@ -0,0 +1,441 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Scanner;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.PriorityBlockingQueue;
+import java.util.concurrent.TimeUnit;
+
+import joshua.util.Ngram;
+import joshua.util.Regex;
+
+
+/**
+ * this class implements: (1) nbest min risk (MBR) reranking using BLEU as a gain funtion.
+ * <p>
+ * This assume that the string is unique in the nbest list In Hiero, due to spurious ambiguity, a
+ * string may correspond to many possible derivations, and ideally the probability of a string
+ * should be the sum of all the derivataions leading to that string. But, in practice, one normally
+ * uses a Viterbi approximation: the probability of a string is its best derivation probability So,
+ * if one want to deal with spurious ambiguity, he/she should do that before calling this class
+ * 
+ * @author Zhifei Li, <zh...@gmail.com>
+ */
+public class NbestMinRiskReranker {
+
+  // TODO: this functionality is not implemented yet; default is to produce 1best without any
+  // feature scores;
+  boolean produceRerankedNbest = false;
+
+  double scalingFactor = 1.0;
+
+  static int bleuOrder = 4;
+  static boolean doNgramClip = true;
+
+  static boolean useGoogleLinearCorpusGain = false;
+
+  final PriorityBlockingQueue<RankerResult> resultsQueue =
+      new PriorityBlockingQueue<RankerResult>();
+
+  public NbestMinRiskReranker(boolean produceRerankedNbest, double scalingFactor) {
+    this.produceRerankedNbest = produceRerankedNbest;
+    this.scalingFactor = scalingFactor;
+  }
+
+
+  public String processOneSent(List<String> nbest, int sentID) {
+    System.err.println("Now process sentence " + sentID);
+
+    // step-0: preprocess
+    // assumption: each hyp has a formate:
+    // "sent_id ||| hyp_itself ||| feature scores ||| linear-combination-of-feature-scores(this should be logP)"
+
+    /* Quit if you find an empty hypothesis. */
+    if (nbest.size() == 1) {
+      String[] fields = Regex.threeBarsWithSpace.split(nbest.get(0));
+      if (fields[1].equals("") || Regex.spaces.matches(fields[1])) {
+        System.err.println(String.format("-> sentence is empty"));
+        return "";
+      }
+    } 
+
+    List<String> hypsItself = new ArrayList<String>();
+    // ArrayList<String> l_feat_scores = new ArrayList<String>();
+    List<Double> baselineScores = new ArrayList<Double>(); // linear combination of all baseline
+                                                           // features
+    List<HashMap<String, Integer>> ngramTbls = new ArrayList<HashMap<String, Integer>>();
+    List<Integer> sentLens = new ArrayList<Integer>();
+
+    for (String hyp : nbest) {
+      String[] fds = Regex.threeBarsWithSpace.split(hyp);
+      int tSentID = Integer.parseInt(fds[0]);
+      if (sentID != tSentID) {
+        throw new RuntimeException("sentence_id does not match");
+      }
+      String hypothesis = (fds.length >= 4) ? fds[1] : "";
+      hypsItself.add(hypothesis);
+
+      String[] words = Regex.spaces.split(hypothesis);
+      sentLens.add(words.length);
+
+      HashMap<String, Integer> ngramTbl = new HashMap<String, Integer>();
+      Ngram.getNgrams(ngramTbl, 1, bleuOrder, words);
+      ngramTbls.add(ngramTbl);
+
+      // l_feat_scores.add(fds[2]);
+
+      // The value of finalIndex is expected to be 3,
+      // unless the hyp_itself is empty,
+      // in which case finalIndex will be 2.
+      int finalIndex = fds.length - 1;
+      baselineScores.add(Double.parseDouble(fds[finalIndex]));
+
+    }
+
+    // step-1: get normalized distribution
+
+    /**
+     * value in baselineScores will be changed to normalized probability
+     * */
+    computeNormalizedProbs(baselineScores, scalingFactor);
+
+    List<Double> normalizedProbs = baselineScores;
+
+    // === required by google linear corpus gain
+    HashMap<String, Double> posteriorCountsTbl = null;
+    if (useGoogleLinearCorpusGain) {
+      posteriorCountsTbl = new HashMap<String, Double>();
+      getGooglePosteriorCounts(ngramTbls, normalizedProbs, posteriorCountsTbl);
+    }
+
+
+    // step-2: rerank the nbest
+    /**
+     * TODO: zhifei: now the re-ranking takes O(n^2) where n is the size of the nbest. But, we can
+     * significantly speed up this (leadding to O(n)) by first estimating a model on nbest, and then
+     * rerank the nbest using the estimated model.
+     * */
+    double bestGain = -1000000000;// set as worst gain
+    String bestHyp = null;
+    List<Double> gains = new ArrayList<Double>();
+    for (int i = 0; i < hypsItself.size(); i++) {
+      String curHyp = hypsItself.get(i);
+      int curHypLen = sentLens.get(i);
+      HashMap<String, Integer> curHypNgramTbl = ngramTbls.get(i);
+      // double cur_gain = computeGain(cur_hyp, l_hyp_itself, l_normalized_probs);
+      double curGain = 0;
+      if (useGoogleLinearCorpusGain) {
+        curGain = computeExpectedLinearCorpusGain(curHypLen, curHypNgramTbl, posteriorCountsTbl);
+      } else {
+        curGain =
+            computeExpectedGain(curHypLen, curHypNgramTbl, ngramTbls, sentLens, normalizedProbs);
+      }
+
+      gains.add(curGain);
+      if (i == 0 || curGain > bestGain) { // maximize
+        bestGain = curGain;
+        bestHyp = curHyp;
+      }
+    }
+
+    // step-3: output the 1best or nbest
+    if (this.produceRerankedNbest) {
+      // TOTO: sort the list and write the reranked nbest; Use Collections.sort(List list,
+      // Comparator c)
+    } else {
+      /*
+       * this.out.write(best_hyp); this.out.write("\n"); out.flush();
+       */
+    }
+
+    System.err.println("best gain: " + bestGain);
+    if (null == bestHyp) {
+      throw new RuntimeException("mbr reranked one best is null, must be wrong");
+    }
+    return bestHyp;
+  }
+
+
+  /**
+   * based on a list of log-probabilities in nbestLogProbs, obtain a normalized distribution, and
+   * put the normalized probability (real value in [0,1]) into nbestLogProbs
+   * */
+  // get a normalized distributeion and put it back to nbestLogProbs
+  static public void computeNormalizedProbs(List<Double> nbestLogProbs, double scalingFactor) {
+
+    // === get noralization constant, remember features, remember the combined linear score
+    double normalizationConstant = Double.NEGATIVE_INFINITY;// log-semiring
+
+    for (double logp : nbestLogProbs) {
+      normalizationConstant = addInLogSemiring(normalizationConstant, logp * scalingFactor, 0);
+    }
+    // System.out.println("normalization_constant (logP) is " + normalization_constant);
+
+    // === get normalized prob for each hyp
+    double tSum = 0;
+    for (int i = 0; i < nbestLogProbs.size(); i++) {
+
+      double normalizedProb =
+          Math.exp(nbestLogProbs.get(i) * scalingFactor - normalizationConstant);
+      tSum += normalizedProb;
+      nbestLogProbs.set(i, normalizedProb);
+
+      if (Double.isNaN(normalizedProb)) {
+        throw new RuntimeException("prob is NaN, must be wrong\nnbest_logps.get(i): "
+            + nbestLogProbs.get(i) + "; scaling_factor: " + scalingFactor
+            + "; normalization_constant:" + normalizationConstant);
+      }
+      // logger.info("probability: " + normalized_prob);
+    }
+
+    // sanity check
+    if (Math.abs(tSum - 1.0) > 1e-4) {
+      throw new RuntimeException("probabilities not sum to one, must be wrong");
+    }
+
+  }
+
+
+  // Gain(e) = negative risk = \sum_{e'} G(e, e')P(e')
+  // curHyp: e
+  // trueHyp: e'
+  public double computeExpectedGain(int curHypLen, HashMap<String, Integer> curHypNgramTbl,
+      List<HashMap<String, Integer>> ngramTbls, List<Integer> sentLens, List<Double> nbestProbs) {
+
+    // ### get noralization constant, remember features, remember the combined linear score
+    double gain = 0;
+
+    for (int i = 0; i < nbestProbs.size(); i++) {
+      HashMap<String, Integer> trueHypNgramTbl = ngramTbls.get(i);
+      double trueProb = nbestProbs.get(i);
+      int trueLen = sentLens.get(i);
+      gain +=
+          trueProb
+              * BLEU.computeSentenceBleu(trueLen, trueHypNgramTbl, curHypLen, curHypNgramTbl,
+                  doNgramClip, bleuOrder);
+    }
+    // System.out.println("Gain is " + gain);
+    return gain;
+  }
+
+  // Gain(e) = negative risk = \sum_{e'} G(e, e')P(e')
+  // curHyp: e
+  // trueHyp: e'
+  static public double computeExpectedGain(String curHyp, List<String> nbestHyps,
+      List<Double> nbestProbs) {
+    // ### get noralization constant, remember features, remember the combined linear score
+    double gain = 0;
+
+    for (int i = 0; i < nbestHyps.size(); i++) {
+      String trueHyp = nbestHyps.get(i);
+      double trueProb = nbestProbs.get(i);
+      gain += trueProb * BLEU.computeSentenceBleu(trueHyp, curHyp, doNgramClip, bleuOrder);
+    }
+    // System.out.println("Gain is " + gain);
+    return gain;
+  }
+
+  void getGooglePosteriorCounts(List<HashMap<String, Integer>> ngramTbls,
+      List<Double> normalizedProbs, HashMap<String, Double> posteriorCountsTbl) {
+    // TODO
+  }
+
+  double computeExpectedLinearCorpusGain(int curHypLen, HashMap<String, Integer> curHypNgramTbl,
+      HashMap<String, Double> posteriorCountsTbl) {
+    // TODO
+    double[] thetas = {-1, 1, 1, 1, 1};
+
+    double res = 0;
+    res += thetas[0] * curHypLen;
+    for (Entry<String, Integer> entry : curHypNgramTbl.entrySet()) {
+      String key = entry.getKey();
+      String[] tem = Regex.spaces.split(key);
+
+      double post_prob = posteriorCountsTbl.get(key);
+      res += entry.getValue() * post_prob * thetas[tem.length];
+    }
+    return res;
+  }
+
+  // OR: return Math.log(Math.exp(x) + Math.exp(y));
+  static private double addInLogSemiring(double x, double y, int addMode) {// prevent over-flow
+    if (addMode == 0) { // sum
+      if (x == Double.NEGATIVE_INFINITY) {// if y is also n-infinity, then return n-infinity
+        return y;
+      }
+      if (y == Double.NEGATIVE_INFINITY) {
+        return x;
+      }
+
+      if (y <= x) {
+        return x + Math.log(1 + Math.exp(y - x));
+      } else {
+        return y + Math.log(1 + Math.exp(x - y));
+      }
+    } else if (addMode == 1) { // viter-min
+      return (x <= y) ? x : y;
+    } else if (addMode == 2) { // viter-max
+      return (x >= y) ? x : y;
+    } else {
+      throw new RuntimeException("invalid add mode");
+    }
+  }
+
+
+
+  public static void main(String[] args) throws IOException {
+
+    // If you don't know what to use for scaling factor, try using 1
+
+    if (args.length < 2) {
+      System.err
+          .println("usage: java NbestMinRiskReranker <produce_reranked_nbest> <scaling_factor> [numThreads]");
+      return;
+    }
+    long startTime = System.currentTimeMillis();
+    boolean produceRerankedNbest = Boolean.valueOf(args[0].trim());
+    double scalingFactor = Double.parseDouble(args[1].trim());
+    int numThreads = (args.length > 2) ? Integer.parseInt(args[2].trim()) : 1;
+
+
+    NbestMinRiskReranker mbrReranker =
+        new NbestMinRiskReranker(produceRerankedNbest, scalingFactor);
+
+    System.err.println("##############running mbr reranking");
+
+    int oldSentID = -1;
+    List<String> nbest = new ArrayList<String>();
+
+    Scanner scanner = new Scanner(System.in, "UTF-8");
+
+    if (numThreads == 1) {
+
+      while (scanner.hasNextLine()) {
+        String line = scanner.nextLine();
+        String[] fds = Regex.threeBarsWithSpace.split(line);
+        int newSentID = Integer.parseInt(fds[0]);
+        if (oldSentID != -1 && oldSentID != newSentID) {
+          if (nbest.size() > 0) {
+            String best_hyp = mbrReranker.processOneSent(nbest, oldSentID);// nbest: list of unique
+                                                                           // strings
+            System.out.println(best_hyp);
+          } else {
+            System.out.println();
+          }
+          nbest.clear();
+        }
+        oldSentID = newSentID;
+        if (!fds[1].matches("^\\s*$")) nbest.add(line);
+      }
+
+      // last nbest
+      if (oldSentID >= 0) {
+        String bestHyp = mbrReranker.processOneSent(nbest, oldSentID);
+        System.out.println(bestHyp);
+        nbest.clear();
+      }
+
+    } else {
+
+      ExecutorService threadPool = Executors.newFixedThreadPool(numThreads);
+
+      while (scanner.hasNextLine()) {
+        String line = scanner.nextLine();
+        String[] fds = Regex.threeBarsWithSpace.split(line);
+        int newSentID = Integer.parseInt(fds[0]);
+        if (oldSentID != -1 && oldSentID != newSentID) {
+
+          threadPool.execute(mbrReranker.new RankerTask(nbest, oldSentID));
+
+          nbest.clear();
+        }
+        oldSentID = newSentID;
+        nbest.add(line);
+      }
+
+      // last nbest
+      threadPool.execute(mbrReranker.new RankerTask(nbest, oldSentID));
+      nbest.clear();
+
+      threadPool.shutdown();
+
+      try {
+        threadPool.awaitTermination(Integer.MAX_VALUE, TimeUnit.SECONDS);
+
+        while (!mbrReranker.resultsQueue.isEmpty()) {
+          RankerResult result = mbrReranker.resultsQueue.remove();
+          String best_hyp = result.toString();
+          System.out.println(best_hyp);
+        }
+
+
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+
+    }
+    
+    scanner.close();
+
+    System.err.println("Total running time (seconds) is "
+        + (System.currentTimeMillis() - startTime) / 1000.0);
+  }
+
+  private class RankerTask implements Runnable {
+
+    final List<String> nbest;
+    final int sentID;
+
+    RankerTask(final List<String> nbest, final int sentID) {
+      this.nbest = new ArrayList<String>(nbest);
+      this.sentID = sentID;
+    }
+
+    public void run() {
+      String result = processOneSent(nbest, sentID);
+      resultsQueue.add(new RankerResult(result, sentID));
+    }
+
+  }
+
+  private static class RankerResult implements Comparable<RankerResult> {
+    final String result;
+    final Integer sentenceNumber;
+
+    RankerResult(String result, int sentenceNumber) {
+      this.result = result;
+      this.sentenceNumber = sentenceNumber;
+    }
+
+    public int compareTo(RankerResult o) {
+      return sentenceNumber.compareTo(o.sentenceNumber);
+    }
+
+    public String toString() {
+      return result;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
new file mode 100644
index 0000000..7b2185f
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiFeatures;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiWordAlignmentList;
+import static joshua.util.FormatUtils.removeSentenceMarkers;
+
+import java.util.List;
+import java.util.Map;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.hypergraph.HyperGraph;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * structuredTranslation provides a more structured access to translation
+ * results than the Translation class.
+ * Members of instances of this class can be used upstream.
+ * <br/>
+ * TODO:
+ * Enable K-Best extraction.
+ * 
+ * @author fhieber
+ */
+public class StructuredTranslation {
+  
+  private final Sentence sourceSentence;
+  private final String translationString;
+  private final List<String> translationTokens;
+  private final float translationScore;
+  private final List<List<Integer>> translationWordAlignments;
+  private final Map<String,Float> translationFeatures;
+  private final float extractionTime;
+  
+  public StructuredTranslation(final Sentence sourceSentence,
+      final HyperGraph hypergraph,
+      final List<FeatureFunction> featureFunctions) {
+    
+      final long startTime = System.currentTimeMillis();
+      
+      this.sourceSentence = sourceSentence;
+      this.translationString = removeSentenceMarkers(getViterbiString(hypergraph));
+      this.translationTokens = extractTranslationTokens();
+      this.translationScore = extractTranslationScore(hypergraph);
+      this.translationFeatures = getViterbiFeatures(hypergraph, featureFunctions, sourceSentence).getMap();
+      this.translationWordAlignments = getViterbiWordAlignmentList(hypergraph);
+      this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
+  }
+  
+  private float extractTranslationScore(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return 0;
+    } else {
+      return hypergraph.goalNode.getScore();
+    }
+  }
+  
+  private List<String> extractTranslationTokens() {
+    if (translationString.isEmpty()) {
+      return emptyList();
+    } else {
+      return asList(translationString.split("\\s+"));
+    }
+  }
+  
+  // Getters to use upstream
+  
+  public Sentence getSourceSentence() {
+    return sourceSentence;
+  }
+
+  public int getSentenceId() {
+    return sourceSentence.id();
+  }
+
+  public String getTranslationString() {
+    return translationString;
+  }
+
+  public List<String> getTranslationTokens() {
+    return translationTokens;
+  }
+
+  public float getTranslationScore() {
+    return translationScore;
+  }
+
+  /**
+   * Returns a list of target to source alignments.
+   */
+  public List<List<Integer>> getTranslationWordAlignments() {
+    return translationWordAlignments;
+  }
+  
+  public Map<String,Float> getTranslationFeatures() {
+    return translationFeatures;
+  }
+  
+  /**
+   * Time taken to build output information from the hypergraph.
+   */
+  public Float getExtractionTime() {
+    return extractionTime;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/Support.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Support.java b/src/main/java/org/apache/joshua/decoder/Support.java
new file mode 100644
index 0000000..af33ec5
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/Support.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import java.util.List;
+
+/**
+ * @author Zhifei Li, <zh...@gmail.com>
+ */
+public class Support {
+
+  public static double findMin(double a, double b) {
+    return (a <= b) ? a : b;
+  }
+
+  public static double findMax(double a, double b) {
+    return (a > b) ? a : b;
+  }
+
+  
+  public static int[] toArray(List<Integer> in) {
+    return subIntArray(in, 0, in.size());
+  }
+
+  /**
+   * @param start inclusive
+   * @param end exclusive
+   */
+  public static int[] subIntArray(List<Integer> in, int start, int end) {
+    int[] res = new int[end - start];
+    for (int i = start; i < end; i++) {
+      res[i - start] = in.get(i);
+    }
+    return res;
+  }
+
+  public static long current_time() {
+    return 0;
+    // return System.currentTimeMillis();
+    // return System.nanoTime();
+  }
+
+  // Only used in LMGrammarJAVA
+  public static long getMemoryUse() {
+    putOutTheGarbage();
+    long totalMemory = Runtime.getRuntime().totalMemory();// all the memory I get from the system
+    putOutTheGarbage();
+    long freeMemory = Runtime.getRuntime().freeMemory();
+    return (totalMemory - freeMemory) / 1024;// in terms of kb
+  }
+
+  private static void putOutTheGarbage() {
+    collectGarbage();
+    collectGarbage();
+  }
+
+  private static void collectGarbage() {
+    long fSLEEP_INTERVAL = 100;
+    try {
+      System.gc();
+      Thread.sleep(fSLEEP_INTERVAL);
+      System.runFinalization();
+      Thread.sleep(fSLEEP_INTERVAL);
+
+    } catch (InterruptedException ex) {
+      ex.printStackTrace();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
new file mode 100644
index 0000000..8004d9f
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiFeatures;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiWordAlignments;
+import static joshua.util.FormatUtils.removeSentenceMarkers;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.List;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.ff.lm.StateMinimizingLanguageModel;
+import joshua.decoder.hypergraph.HyperGraph;
+import joshua.decoder.hypergraph.KBestExtractor;
+import joshua.decoder.io.DeNormalize;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * This class represents translated input objects (sentences or lattices). It is aware of the source
+ * sentence and id and contains the decoded hypergraph. Translation objects are returned by
+ * DecoderThread instances to the InputHandler, where they are assembled in order for output.
+ * 
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+
+public class Translation {
+  private Sentence source;
+
+  /**
+   * This stores the output of the translation so we don't have to hold onto the hypergraph while we
+   * wait for the outputs to be assembled.
+   */
+  private String output = null;
+
+  private StructuredTranslation structuredTranslation = null;
+  
+  public Translation(Sentence source, HyperGraph hypergraph, 
+      List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
+    this.source = source;
+    
+    if (joshuaConfiguration.use_structured_output) {
+      
+      structuredTranslation = new StructuredTranslation(
+          source, hypergraph, featureFunctions);
+      this.output = structuredTranslation.getTranslationString();
+      
+    } else {
+
+      StringWriter sw = new StringWriter();
+      BufferedWriter out = new BufferedWriter(sw);
+
+      try {
+        if (hypergraph != null) {
+          if (!joshuaConfiguration.hypergraphFilePattern.equals("")) {
+            hypergraph.dump(String.format(joshuaConfiguration.hypergraphFilePattern, source.id()), featureFunctions);
+          }
+
+          long startTime = System.currentTimeMillis();
+
+          // We must put this weight as zero, otherwise we get an error when we try to retrieve it
+          // without checking
+          Decoder.weights.increment("BLEU", 0);
+          
+          if (joshuaConfiguration.topN == 0) {
+            
+            /* construct Viterbi output */
+            final String best = getViterbiString(hypergraph);
+            
+            Decoder.LOG(1, String.format("Translation %d: %.3f %s", source.id(), hypergraph.goalNode.getScore(),
+                best));
+            
+            /*
+             * Setting topN to 0 turns off k-best extraction, in which case we need to parse through
+             * the output-string, with the understanding that we can only substitute variables for the
+             * output string, sentence number, and model score.
+             */
+            String translation = joshuaConfiguration.outputFormat
+                .replace("%s", removeSentenceMarkers(best))
+                .replace("%S", DeNormalize.processSingleLine(best))
+                .replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
+                .replace("%i", String.format("%d", source.id()));
+            
+            if (joshuaConfiguration.outputFormat.contains("%a")) {
+              translation = translation.replace("%a", getViterbiWordAlignments(hypergraph));
+            }
+            
+            if (joshuaConfiguration.outputFormat.contains("%f")) {
+              final FeatureVector features = getViterbiFeatures(hypergraph, featureFunctions, source);
+              translation = translation.replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString());
+            }
+            
+            out.write(translation);
+            out.newLine();
+            
+          } else {
+            
+            final KBestExtractor kBestExtractor = new KBestExtractor(
+                source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
+            kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+
+            if (joshuaConfiguration.rescoreForest) {
+              Decoder.weights.increment("BLEU", joshuaConfiguration.rescoreForestWeight);
+              kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+
+              Decoder.weights.increment("BLEU", -joshuaConfiguration.rescoreForestWeight);
+              kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+            }
+          }
+
+          float seconds = (float) (System.currentTimeMillis() - startTime) / 1000.0f;
+          Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
+              joshuaConfiguration.topN, seconds));
+
+      } else {
+        
+        // Failed translations and blank lines get empty formatted outputs
+        // @formatter:off
+        String outputString = joshuaConfiguration.outputFormat
+            .replace("%s", source.source())
+            .replace("%e", "")
+            .replace("%S", "")
+            .replace("%t", "()")
+            .replace("%i", Integer.toString(source.id()))
+            .replace("%f", "")
+            .replace("%c", "0.000");
+        // @formatter:on
+
+        out.write(outputString);
+        out.newLine();
+      }
+
+        out.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+        System.exit(1);
+      }
+      
+      this.output = sw.toString();
+      
+    }
+
+    /*
+     * KenLM hack. If using KenLMFF, we need to tell KenLM to delete the pool used to create chart
+     * objects for this sentence.
+     */
+    for (FeatureFunction feature : featureFunctions) {
+      if (feature instanceof StateMinimizingLanguageModel) {
+        ((StateMinimizingLanguageModel) feature).destroyPool(getSourceSentence().id());
+        break;
+      }
+    }
+    
+  }
+
+  public Sentence getSourceSentence() {
+    return this.source;
+  }
+
+  public int id() {
+    return source.id();
+  }
+
+  @Override
+  public String toString() {
+    return output;
+  }
+  
+  /**
+   * Returns the StructuredTranslation object
+   * if JoshuaConfiguration.construct_structured_output == True.
+   * @throws RuntimeException if StructuredTranslation object not set.
+   * @return
+   */
+  public StructuredTranslation getStructuredTranslation() {
+    if (structuredTranslation == null) {
+      throw new RuntimeException("No StructuredTranslation object created. You should set JoshuaConfigration.construct_structured_output = true");
+    }
+    return structuredTranslation;
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/Translations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translations.java b/src/main/java/org/apache/joshua/decoder/Translations.java
new file mode 100644
index 0000000..e6ba9e6
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/Translations.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import java.util.LinkedList;
+import joshua.decoder.io.TranslationRequestStream;
+
+/**
+ * This class represents a streaming sequence of translations. It is returned by the main entry
+ * point to the Decoder object, the call to decodeAll. The translations here are parallel to the
+ * input sentences in the corresponding TranslationRequest object. Because of parallelization, the
+ * translated sentences might be computed out of order. Each Translation is sent to this
+ * Translations object by a DecoderThreadRunner via the record() function, which places the
+ * Translation in the right place. When the next translation in a sequence is available, next() is
+ * notified.
+ * 
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+public class Translations {
+
+  /* The source sentences to be translated. */
+  private TranslationRequestStream request = null;
+
+  /*
+   * This records the index of the sentence at the head of the underlying list. The iterator's
+   * next() blocks when the value at this position in the translations LinkedList is null.
+   */
+  private int currentID = 0;
+
+  /* The set of translated sentences. */
+  private LinkedList<Translation> translations = null;
+
+  private boolean spent = false;
+
+  public Translations(TranslationRequestStream request) {
+    this.request = request;
+    this.translations = new LinkedList<Translation>();
+  }
+
+  /**
+   * This is called when null is received from the TranslationRequest, indicating that there are no
+   * more input sentences to translated. That in turn means that the request size will no longer
+   * grow. We then notify any waiting thread if the last ID we've processed is the last one, period.
+   */
+  public void finish() {
+    synchronized (this) {
+      spent = true;
+      if (currentID == request.size()) {
+        this.notifyAll();
+      }
+    }
+  }
+
+  /**
+   * This is called whenever a translation is completed by one of the decoder threads. There may be
+   * a current output thread waiting for the current translation, which is determined by checking if
+   * the ID of the translation is the same as the one being waited for (currentID). If so, the
+   * thread waiting for it is notified.
+   * 
+   * @param translation
+   */
+  public void record(Translation translation) {
+    synchronized (this) {
+
+      /* Pad the set of translations with nulls to accommodate the new translation. */
+      int offset = translation.id() - currentID;
+      while (offset >= translations.size())
+        translations.add(null);
+      translations.set(offset, translation);
+
+      /*
+       * If the id of the current translation is at the head of the list (first element), then we
+       * have the next Translation to be return, and we should notify anyone waiting on next(),
+       * which will then remove the item and increment the currentID.
+       */
+      if (translation.id() == currentID) {
+        this.notify();
+      }
+    }
+  }
+
+  /**
+   * Returns the next Translation, blocking if necessary until it's available, since the next
+   * Translation might not have been produced yet.
+   */
+  public Translation next() {
+    synchronized (this) {
+
+      /*
+       * If there are no more input sentences, and we've already distributed what we then know is
+       * the last one, we're done.
+       */
+      if (spent && currentID == request.size())
+        return null;
+
+      /*
+       * Otherwise, there is another sentence. If it's not available already, we need to wait for
+       * it.
+       */
+      if (translations.size() == 0 || translations.peek() == null) {
+        try {
+          this.wait();
+        } catch (InterruptedException e) {
+          // TODO Auto-generated catch block
+          e.printStackTrace();
+        }
+      }
+
+      /* We now have the sentence and can return it. */
+      currentID++;
+      return translations.poll();
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
new file mode 100644
index 0000000..d8d16d8
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder.chart_parser;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.logging.Logger;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.state_maintenance.DPState;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.hypergraph.HGNode;
+import joshua.decoder.hypergraph.HyperEdge;
+
+/**
+ * this class implement functions: (1) combine small itesm into larger ones using rules, and create
+ * items and hyper-edges to construct a hyper-graph, (2) evaluate model score for items, (3)
+ * cube-pruning Note: Bin creates Items, but not all Items will be used in the hyper-graph
+ * 
+ * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, <zh...@gmail.com>
+ */
+class Cell {
+
+  // The chart this cell belongs to
+  private Chart chart = null;
+
+  // The top-level (goal) symbol
+  private int goalSymbol;
+
+  // to maintain uniqueness of nodes
+  private HashMap<HGNode.Signature, HGNode> nodesSigTbl = new LinkedHashMap<HGNode.Signature, HGNode>();
+
+  // signature by lhs
+  private Map<Integer, SuperNode> superNodesTbl = new HashMap<Integer, SuperNode>();
+
+  /**
+   * sort values in nodesSigTbl, we need this list when necessary
+   */
+  private List<HGNode> sortedNodes = null;
+
+  // ===============================================================
+  // Static fields
+  // ===============================================================
+  private static final Logger logger = Logger.getLogger(Cell.class.getName());
+
+  // ===============================================================
+  // Constructor
+  // ===============================================================
+
+  public Cell(Chart chart, int goalSymID) {
+    this.chart = chart;
+    this.goalSymbol = goalSymID;
+  }
+
+  public Cell(Chart chart, int goal_sym_id, int constraint_symbol_id) {
+    this(chart, goal_sym_id);
+  }
+
+  // ===============================================================
+  // Package-protected methods
+  // ===============================================================
+  
+  public Set<Integer> getKeySet() {
+    return superNodesTbl.keySet();
+  }
+  
+  public SuperNode getSuperNode(int lhs) {
+    return superNodesTbl.get(lhs);
+  }
+
+  /**
+   * This function loops over all items in the top-level bin (covering the input sentence from
+   * <s> ... </s>), looking for items with the goal LHS. For each of these, 
+   * add all the items with GOAL_SYM state into the goal bin the goal bin has only one Item, which
+   * itself has many hyperedges only "goal bin" should call this function
+   */
+  // note that the input bin is bin[0][n], not the goal bin
+  boolean transitToGoal(Cell bin, List<FeatureFunction> featureFunctions, int sentenceLength) {
+    this.sortedNodes = new ArrayList<HGNode>();
+    HGNode goalItem = null;
+
+    for (HGNode antNode : bin.getSortedNodes()) {
+      if (antNode.lhs == this.goalSymbol) {
+        float logP = antNode.bestHyperedge.getBestDerivationScore();
+        List<HGNode> antNodes = new ArrayList<HGNode>();
+        antNodes.add(antNode);
+
+        float finalTransitionLogP = ComputeNodeResult.computeFinalCost(featureFunctions, antNodes,
+            0, sentenceLength, null, this.chart.getSentence());
+
+        List<HGNode> previousItems = new ArrayList<HGNode>();
+        previousItems.add(antNode);
+
+        HyperEdge dt = new HyperEdge(null, logP + finalTransitionLogP, finalTransitionLogP,
+            previousItems, null);
+
+        if (null == goalItem) {
+          goalItem = new HGNode(0, sentenceLength + 1, this.goalSymbol, null, dt, logP
+              + finalTransitionLogP);
+          this.sortedNodes.add(goalItem);
+        } else {
+          goalItem.addHyperedgeInNode(dt);
+        }
+      } // End if item.lhs == this.goalSymID
+    } // End foreach Item in bin.get_sorted_items()
+
+    int itemsInGoalBin = getSortedNodes().size();
+    if (1 != itemsInGoalBin) {
+      logger.severe("the goal_bin does not have exactly one item");
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * a note about pruning: when a hyperedge gets created, it first needs to pass through
+   * shouldPruneEdge filter. Then, if it does not trigger a new node (i.e. will be merged to an old
+   * node), then does not trigger pruningNodes. If it does trigger a new node (either because its
+   * signature is new or because its logP is better than the old node's logP), then it will trigger
+   * pruningNodes, which might causes *other* nodes got pruned as well
+   * */
+
+  /**
+   * Creates a new hyperedge and adds it to the chart, subject to pruning. The logic of this
+   * function is as follows: if the pruner permits the edge to be added, we build the new edge,
+   * which ends in an HGNode. If this is the first time we've built an HGNode for this point in the
+   * graph, it gets added automatically. Otherwise, we add the hyperedge to the existing HGNode,
+   * possibly updating the HGNode's cache of the best incoming hyperedge.
+   * 
+   * @return the new hypernode, or null if the cell was pruned.
+   */
+  HGNode addHyperEdgeInCell(ComputeNodeResult result, Rule rule, int i, int j, List<HGNode> ants,
+      SourcePath srcPath, boolean noPrune) {
+
+//    System.err.println(String.format("ADD_EDGE(%d-%d): %s", i, j, rule.getRuleString()));
+//    if (ants != null) {
+//      for (int xi = 0; xi < ants.size(); xi++) {
+//        System.err.println(String.format("  -> TAIL %s", ants.get(xi)));
+//      }
+//    }
+
+    List<DPState> dpStates = result.getDPStates();
+    float pruningEstimate = result.getPruningEstimate();
+    float transitionLogP = result.getTransitionCost();
+    float finalizedTotalLogP = result.getViterbiCost();
+
+    /**
+     * Here, the edge has passed pre-pruning. The edge will be added to the chart in one of three
+     * ways:
+     * 
+     * 1. If there is no existing node, a new one gets created and the edge is its only incoming
+     * hyperedge.
+     * 
+     * 2. If there is an existing node, the edge will be added to its list of incoming hyperedges,
+     * possibly taking place as the best incoming hyperedge for that node.
+     */
+
+    HyperEdge hyperEdge = new HyperEdge(rule, finalizedTotalLogP, transitionLogP, ants, srcPath);
+    HGNode newNode = new HGNode(i, j, rule.getLHS(), dpStates, hyperEdge, pruningEstimate);
+
+    /**
+     * each node has a list of hyperedges, need to check whether the node is already exist, if
+     * yes, just add the hyperedges, this may change the best logP of the node
+     * */
+    HGNode oldNode = this.nodesSigTbl.get(newNode.signature());
+    if (null != oldNode) { // have an item with same states, combine items
+      this.chart.nMerged++;
+
+      /**
+       * the position of oldItem in this.heapItems may change, basically, we should remove the
+       * oldItem, and re-insert it (linear time), this is too expense)
+       **/
+      if (newNode.getScore() > oldNode.getScore()) { // merge old to new: semiring plus
+
+        newNode.addHyperedgesInNode(oldNode.hyperedges);
+        // This will update the HashMap, so that the oldNode is destroyed.
+        addNewNode(newNode);
+      } else {// merge new to old, does not trigger pruningItems
+        oldNode.addHyperedgesInNode(newNode.hyperedges);
+      }
+
+    } else { // first time item
+      this.chart.nAdded++; // however, this item may not be used in the future due to pruning in
+      // the hyper-graph
+      addNewNode(newNode);
+    }
+
+    return newNode;
+  }
+
+  List<HGNode> getSortedNodes() {
+    ensureSorted();
+    return this.sortedNodes;
+  }
+  
+  Map<Integer, SuperNode> getSortedSuperItems() {
+    ensureSorted();
+    return this.superNodesTbl;
+  }
+  
+  // ===============================================================
+  // Private Methods
+  // ===============================================================
+
+  /**
+   * two cases this function gets called (1) a new hyperedge leads to a non-existing node signature
+   * (2) a new hyperedge's signature matches an old node's signature, but the best-logp of old node
+   * is worse than the new hyperedge's logP
+   * */
+  private void addNewNode(HGNode node) {
+    this.nodesSigTbl.put(node.signature(), node); // add/replace the item
+    this.sortedNodes = null; // reset the list
+    
+//    System.err.println(String.format("** NEW NODE %s %d %d", Vocabulary.word(node.lhs), node.i, node.j));
+
+    // since this.sortedItems == null, this is not necessary because we will always call
+    // ensure_sorted to reconstruct the this.tableSuperItems
+    // add a super-items if necessary
+    SuperNode si = this.superNodesTbl.get(node.lhs);
+    if (null == si) {
+      si = new SuperNode(node.lhs);
+      this.superNodesTbl.put(node.lhs, si);
+    }
+    si.nodes.add(node);// TODO what about the dead items?
+  }
+
+  /**
+   * get a sorted list of Nodes in the cell, and also make sure the list of node in any SuperItem is
+   * sorted, this will be called only necessary, which means that the list is not always sorted,
+   * mainly needed for goal_bin and cube-pruning
+   */
+  private void ensureSorted() {
+    if (null == this.sortedNodes) {
+      
+      // get sortedNodes.
+      this.sortedNodes = new ArrayList<>(this.nodesSigTbl.size());
+      for (HGNode node : this.nodesSigTbl.values()) {
+        this.sortedNodes.add(node);
+      }
+
+      // sort the node in an decreasing-LogP order 
+      this.sortedNodes.sort(HGNode.inverseLogPComparator);
+
+      // TODO: we cannot create new SuperItem here because the DotItem link to them.
+      // Thus, we clear nodes from existing SuperNodes
+      for (SuperNode superNode : this.superNodesTbl.values()) {
+        superNode.nodes.clear();
+      }
+
+      for (HGNode node : this.sortedNodes) {
+        SuperNode superNode = this.superNodesTbl.get(node.lhs);
+        checkNotNull(superNode, "Does not have super Item, have to exist");
+        superNode.nodes.add(node);
+      }
+
+      // Remove SuperNodes who may not contain any nodes anymore due to pruning
+      for (Iterator<Entry<Integer, SuperNode>> it = this.superNodesTbl.entrySet().iterator(); it.hasNext(); ) {
+        Entry<Integer, SuperNode> entry = it.next();
+        if (entry.getValue().nodes.isEmpty()) {
+          it.remove();
+        }
+      }
+    }
+  }
+}