You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/17 12:27:54 UTC

[05/14] incubator-joshua git commit: Joshua 7 configuration system

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 5332135..412cf60 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -27,8 +27,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Stack;
 
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.Accumulator;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.StatefulFF;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
@@ -38,6 +38,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.typesafe.config.Config;
+
 /**
  * <p>Feature function that reads in a list of language model fragments and matches them against the
  * hypergraph. This allows for language model fragment "glue" features, which fire when LM fragments
@@ -106,15 +108,15 @@ public class FragmentLMFF extends StatefulFF {
    * @param args arguments passed to the feature function
    * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
    */
-  public FragmentLMFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
-    super(weights, "FragmentLMFF", args, config);
+  public FragmentLMFF(Config featureConfig, FeatureVector weights) {
+    super("FragmentLMFF", featureConfig, weights);
 
     lmFragments = new HashMap<>();
 
-    fragmentLMFile = parsedArgs.get("lm");
-    BUILD_DEPTH = Integer.parseInt(parsedArgs.get("build-depth"));
-    MAX_DEPTH = Integer.parseInt(parsedArgs.get("max-depth"));
-    MIN_LEX_DEPTH = Integer.parseInt(parsedArgs.get("min-lex-depth"));
+    fragmentLMFile = featureConfig.getString("lm");
+    BUILD_DEPTH = featureConfig.getInt("build-depth");
+    MAX_DEPTH = featureConfig.getInt("max-depth");
+    MIN_LEX_DEPTH = featureConfig.getInt("min-lex-depth");
 
     /* Read in the language model fragments */
     try {
@@ -169,7 +171,7 @@ public class FragmentLMFF extends StatefulFF {
    * @param j todo
    * @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
    * @param sentence {@link org.apache.joshua.lattice.Lattice} input
-   * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+   * @param acc {@link org.apache.joshua.decoder.ff.Accumulator} object permitting generalization of feature computation
    * @return the new dynamic programming state (null for stateless features)
    */
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index 0b522cb..e30242e 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -27,9 +27,10 @@ import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.decoder.Support;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.Accumulator;
 import org.apache.joshua.decoder.ff.FeatureMap;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.StatefulFF;
@@ -45,6 +46,7 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.primitives.Ints;
+import com.typesafe.config.Config;
 
 /**
  * This class performs the following:
@@ -95,8 +97,9 @@ public class LanguageModelFF extends StatefulFF {
   /**
    * We cache the weight of the feature since there is only one.
    */
-  protected String type;
+  protected final String type;
   protected final String path;
+  protected final boolean useSourceAnnotations;
 
   /** Whether this is a class-based LM */
   protected boolean isClassLM;
@@ -105,21 +108,23 @@ public class LanguageModelFF extends StatefulFF {
   /** Whether this feature function fires LM oov indicators */ 
   protected boolean withOovFeature;
 
-  public LanguageModelFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
-    super(weights, NAME_PREFIX + LM_INDEX, args, config);
+  public LanguageModelFF(Config featureConfig, FeatureVector weights) {
+    super(NAME_PREFIX + LM_INDEX, featureConfig, weights);
     this.oovFeatureId = FeatureMap.hashFeature(NAME_PREFIX + LM_INDEX + OOV_SUFFIX);
     LM_INDEX++;
 
-    this.type = parsedArgs.get("lm_type");
-    this.ngramOrder = Integer.parseInt(parsedArgs.get("lm_order"));
-    this.path = parsedArgs.get("lm_file");
+    this.type = featureConfig.getString("lm_type");
+    this.ngramOrder = featureConfig.getInt("lm_order");
+    this.path = featureConfig.getString("lm_file");
+    this.useSourceAnnotations = featureConfig.hasPath("source_annotations") ? 
+        featureConfig.getBoolean("source_annotations") : false;
 
-    if (parsedArgs.containsKey("class_map")) {
+    if (featureConfig.hasPath("class_map")) {
       this.isClassLM = true;
-      this.classMap = new ClassMap(parsedArgs.get("class_map"));
+      this.classMap = new ClassMap(featureConfig.getString("class_map"));
     }
     
-    if (parsedArgs.containsKey("oov_feature")) {
+    if (featureConfig.hasPath("oov_feature")) {
       this.withOovFeature = true;
     }
 
@@ -146,7 +151,9 @@ public class LanguageModelFF extends StatefulFF {
     }
 
     Vocabulary.registerLanguageModel(this.languageModel);
-    Vocabulary.id(config.default_non_terminal);
+    // TODO(fhieber): this should not be here really, but it works like this.
+    final String defaultNonTerminal = Decoder.getDefaultFlags().getString("default_non_terminal");
+    Vocabulary.id(defaultNonTerminal);
 
     startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
   }
@@ -177,7 +184,7 @@ public class LanguageModelFF extends StatefulFF {
     }
 
     int[] words;
-    if (config.source_annotations) {
+    if (useSourceAnnotations) {
       // get source side annotations and project them to the target side
       words = getTags(rule, i, j, sentence);
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 8e54a2d..2be8b0f 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -24,9 +24,10 @@ import java.util.List;
 import java.util.UUID;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.Accumulator;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
@@ -35,6 +36,8 @@ import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.hypergraph.HGNode;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
+import com.typesafe.config.Config;
+
 /**
  * Wrapper for KenLM LMs with left-state minimization. We inherit from the regular
  *
@@ -43,13 +46,10 @@ import org.apache.joshua.decoder.segment_file.Sentence;
  */
 public class StateMinimizingLanguageModel extends LanguageModelFF {
 
-  public StateMinimizingLanguageModel(FeatureVector weights, String[] args, JoshuaConfiguration config) {
-    super(weights, args, config);
-    this.type = "kenlm";
-    if (parsedArgs.containsKey("lm_type") && ! parsedArgs.get("lm_type").equals("kenlm")) {
-      String msg = "* FATAL: StateMinimizingLanguageModel only supports 'kenlm' lm_type backend"
-          + "*        Remove lm_type from line or set to 'kenlm'";
-      throw new RuntimeException(msg);
+  public StateMinimizingLanguageModel(Config featureConfig, FeatureVector weights) {
+    super(featureConfig, weights);
+    if (!featureConfig.getString("lm_type").equals("kenlm")) {
+      throw new RuntimeException("StateMinimizingLanguageModel only supports 'lm_type = kenlm'");
     }
   }
 
@@ -63,7 +63,9 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
     this.languageModel = new KenLM(ngramOrder, path);
 
     Vocabulary.registerLanguageModel(this.languageModel);
-    Vocabulary.id(config.default_non_terminal);
+    // TODO(fhieber): this should not be here really, but it works like this.
+    final String defaultNonTerminal = Decoder.getDefaultFlags().getString("default_non_terminal");
+    Vocabulary.id(defaultNonTerminal);
 
   }
 
@@ -100,7 +102,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
     }
 
     int[] ruleWords;
-    if (config.source_annotations) {
+    if (useSourceAnnotations) {
       // get source side annotations and project them to the target side
       ruleWords = getTags(rule, i, j, sentence);
     } else {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index 4309820..a2b5209 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -20,8 +20,8 @@ package org.apache.joshua.decoder.ff.phrase;
 
 import java.util.List;
 
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
+import org.apache.joshua.decoder.ff.Accumulator;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.StatelessFF;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
@@ -30,16 +30,12 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
 import org.apache.joshua.decoder.phrase.Hypothesis;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
+import com.typesafe.config.Config;
+
 public class Distortion extends StatelessFF {
 
-  public Distortion(FeatureVector weights, String[] args, JoshuaConfiguration config) {
-    super(weights, "Distortion", args, config);
-    
-    if (! config.search_algorithm.equals("stack")) {
-      String msg = "* FATAL: Distortion feature only application for phrase-based decoding. "
-          + "Use -search phrase or remove this feature";
-      throw new RuntimeException(msg);
-    }
+  public Distortion(Config featureConfig, FeatureVector weights) {
+    super("Distortion", featureConfig, weights);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
index 91cf00f..f6f48cf 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
@@ -19,21 +19,14 @@
 package org.apache.joshua.decoder.ff.tm;
 
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
 
-import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.phrase.PhraseTable;
-import org.apache.joshua.decoder.segment_file.Token;
-import org.apache.joshua.lattice.Arc;
-import org.apache.joshua.lattice.Lattice;
-import org.apache.joshua.lattice.Node;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.typesafe.config.Config;
+
 /**
  * Partial implementation of the <code>Grammar</code> interface that provides logic for sorting a
  * grammar.
@@ -89,21 +82,22 @@ public abstract class AbstractGrammar implements Grammar {
   /* The maximum span of the input this grammar rules can be applied to. */
   protected final int spanLimit;
 
-  protected final JoshuaConfiguration joshuaConfiguration;
+  protected final Config config;
 
   /**
-   * Creates an empty, unsorted grammar with given owner and spanlimit
+   * Creates an empty, unsorted grammar with 
+   * owner and spanLimit configured. The Grammar is initially not sorted.
    * 
    * @see Grammar#isSorted()
    * @param owner the associated decoder-wide {@link org.apache.joshua.decoder.ff.tm.OwnerMap}
    * @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
    * @param spanLimit the maximum span of the input grammar rule(s) can be applied to.
    */
-  public AbstractGrammar(final String owner, final JoshuaConfiguration config, final int spanLimit) {
+  public AbstractGrammar(final Config config) {
+    this.config = config;
+    this.owner = OwnerMap.register(config.getString("owner"));
+    this.spanLimit = config.getInt("span_limit");
     this.sorted = false;
-    this.owner = OwnerMap.register(owner);
-    this.joshuaConfiguration = config;
-    this.spanLimit = spanLimit;
   }
 
   public static final int OOV_RULE_ID = 0;
@@ -183,48 +177,4 @@ public abstract class AbstractGrammar implements Grammar {
       }
     }
   }
-
-  // write grammar to disk
-  public void writeGrammarOnDisk(String file) {
-  }
-  
-  /**
-   * Adds OOV rules for all words in the input lattice to the current grammar. Uses addOOVRule() so that
-   * sub-grammars can define different types of OOV rules if needed (as is used in {@link PhraseTable}).
-   * 
-   * @param grammar Grammar in the Trie
-   * @param inputLattice the lattice representing the input sentence
-   * @param featureFunctions a list of feature functions used for scoring
-   * @param onlyTrue determine if word is actual OOV.
-   */
-  public static void addOOVRules(Grammar grammar, Lattice<Token> inputLattice, 
-      List<FeatureFunction> featureFunctions, boolean onlyTrue) {
-    /*
-     * Add OOV rules; This should be called after the manual constraints have
-     * been set up.
-     */
-    HashSet<Integer> words = new HashSet<>();
-    for (Node<Token> node : inputLattice) {
-      for (Arc<Token> arc : node.getOutgoingArcs()) {
-        // create a rule, but do not add into the grammar trie
-        // TODO: which grammar should we use to create an OOV rule?
-        int sourceWord = arc.getLabel().getWord();
-        if (sourceWord == Vocabulary.id(Vocabulary.START_SYM)
-            || sourceWord == Vocabulary.id(Vocabulary.STOP_SYM))
-          continue;
-
-        // Determine if word is actual OOV.
-        if (onlyTrue && ! Vocabulary.hasId(sourceWord))
-          continue;
-
-        words.add(sourceWord);
-      }
-    }
-
-    for (int sourceWord: words) 
-      grammar.addOOVRules(sourceWord, featureFunctions);
-
-    // Sort all the rules (not much to actually do, this just marks it as sorted)
-    grammar.sortGrammar(featureFunctions);
-  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
index e8242f6..46e4bb9 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
@@ -28,7 +28,6 @@ import java.util.HashSet;
 import java.util.Set;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.util.io.LineReader;
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
@@ -47,7 +46,7 @@ public class CreateGlueGrammar {
   private String grammarPath;
 
   @Option(name = "--goal", aliases = {"-goal"}, required = false, usage = "specify custom GOAL symbol. Default: 'GOAL'")
-  private final String goalSymbol = cleanNonTerminal(new JoshuaConfiguration().goal_symbol);
+  private final String goalSymbol = "GOAL";
 
   /* Rule templates */
   // [GOAL] ||| <s> ||| <s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
index 8497c17..67271de 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
@@ -20,6 +20,7 @@ package org.apache.joshua.decoder.ff.tm;
 
 import java.util.List;
 
+import org.apache.joshua.decoder.DecoderConfig;
 import org.apache.joshua.decoder.ff.FeatureFunction;
 
 /**
@@ -98,11 +99,8 @@ public interface Grammar {
   
   /**
    * Add an OOV rule for the requested word for the grammar.
-   * 
-   * @param word input word to add rules to
-   * @param featureFunctions a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
    */
-  void addOOVRules(int word, List<FeatureFunction> featureFunctions);
+  void addOOVRules(int word, DecoderConfig config);
   
   /**
    * Add a rule to the grammar.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
index 70e786c..4d6f483 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
@@ -21,7 +21,6 @@ package org.apache.joshua.decoder.ff.tm;
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
 import org.apache.joshua.decoder.ff.tm.format.MosesFormatReader;
 import org.apache.joshua.util.io.LineReader;
@@ -136,7 +135,7 @@ public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iter
     advanceReader();
 
 
-    if (Decoder.VERBOSE >= 1) {
+    if (true) {
       int newProgress = (reader != null) ? reader.progress() : 100;
 
       //TODO: review this code. It is better to print progress based on time gap (like for every 1s or 2sec) than %!

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
deleted file mode 100644
index 4f545b7..0000000
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.ff.tm;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import org.apache.joshua.decoder.ff.tm.hash_based.ExtensionIterator;
-import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class implements dynamic sentence-level filtering. This is accomplished with a parallel
- * trie, a subset of the original trie, that only contains trie paths that are reachable from
- * traversals of the current sentence.
- * 
- * @author Matt Post post@cs.jhu.edu
- */
-public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
-
-  private static final Logger LOG = LoggerFactory.getLogger(SentenceFilteredGrammar.class);
-
-  private final AbstractGrammar baseGrammar;
-  private final SentenceFilteredTrie filteredTrie;
-  private final int[] tokens;
-  private final Sentence sentence;
-
-  /**
-   * Construct a new sentence-filtered grammar. The main work is done in the enclosed trie (obtained
-   * from the base grammar, which contains the complete grammar).
-   * 
-   * @param baseGrammar a new {@link org.apache.joshua.decoder.ff.tm.AbstractGrammar} to populate
-   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
-   */
-  SentenceFilteredGrammar(AbstractGrammar baseGrammar, Sentence sentence) {
-    super(OwnerMap.getOwner(baseGrammar.getOwner()), baseGrammar.joshuaConfiguration, baseGrammar.getSpanLimit());
-    this.baseGrammar = baseGrammar;
-    this.sentence = sentence;
-    this.tokens = sentence.getWordIDs();
-
-    int origCount = getNumRules(baseGrammar.getTrieRoot());
-    long startTime = System.currentTimeMillis();
-
-    /* Filter the rules; returns non-null object */
-    this.filteredTrie = filter(baseGrammar.getTrieRoot());
-    int filteredCount = getNumRules();
-
-    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
-
-    LOG.debug("Sentence-level filtering of sentence {} ({} -> {} rules) in {} seconds",
-        sentence.id(), origCount, filteredCount, seconds);
-  }
-
-  @Override
-  public Trie getTrieRoot() {
-    return filteredTrie;
-  }
-
-  /**
-   * This function is poorly named: it doesn't mean whether a rule exists in the grammar for the
-   * current span, but whether the grammar is permitted to apply rules to the current span (a
-   * grammar-level parameter). As such we can just chain to the underlying grammar.
-   */
-  @Override
-  public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
-    return baseGrammar.hasRuleForSpan(startIndex, endIndex, pathLength);
-  }
-
-  @Override
-  public int getNumRules() {
-    return getNumRules(getTrieRoot());
-  }
-
-  /**
-   * A convenience function that counts the number of rules in a grammar's trie.
-   * 
-   * @param node the {@link org.apache.joshua.decoder.ff.tm.Trie} implementation for which to count rules
-   * @return the number of rules
-   */
-  public int getNumRules(Trie node) {
-    int numRules = 0;
-    if (node != null) {
-      if (node.getRuleCollection() != null)
-        numRules += node.getRuleCollection().getRules().size();
-
-      if (node.getExtensions() != null)
-        for (Trie child : node.getExtensions())
-          numRules += getNumRules(child);
-    }
-
-    return numRules;
-  }
-
-  /**
-   * What is the algorithm?
-   * 
-   * Take the first word of the sentence, and start at the root of the trie. There are two things to
-   * consider: (a) word matches and (b) nonterminal matches.
-   * 
-   * For a word match, simply follow that arc along the trie. We create a parallel arc in our
-   * filtered grammar to represent it. Each arc in the filtered trie knows about its
-   * corresponding/underlying node in the unfiltered grammar trie.
-   * 
-   * A nonterminal is always permitted to match. The question then is how much of the input sentence
-   * we imagine it consumed. The answer is that it could have been any amount. So the recursive call
-   * has to be a set of calls, one each to the next trie node with different lengths of the sentence
-   * remaining.
-   * 
-   * A problem occurs when we have multiple sequential nonterminals. For scope-3 grammars, there can
-   * be four sequential nonterminals (in the case when they are grounded by terminals on both ends
-   * of the nonterminal chain). We'd like to avoid looking at all possible ways to split up the
-   * subsequence, because with respect to filtering rules, they are all the same.
-   * 
-   * We accomplish this with the following restriction: for purposes of grammar filtering, only the
-   * first in a sequence of nonterminal traversals can consume more than one word. Each of the
-   * subsequent ones would have to consume just one word. We then just have to record in the
-   * recursive call whether the last traversal was a nonterminal or not.
-   * 
-   * @param unfilteredTrieRoot todo
-   * @return the root of the filtered trie
-   */
-  private SentenceFilteredTrie filter(Trie unfilteredTrieRoot) {
-    SentenceFilteredTrie filteredTrieRoot = new SentenceFilteredTrie(unfilteredTrieRoot);
-
-    // System.err.println(String.format("FILTERING TO SENTENCE\n  %s\n",
-    // Vocabulary.getWords(tokens)));
-
-    /*
-     * The root of the trie is where rule applications start, so we simply try all possible
-     * positions in the sentence.
-     */
-    for (int i = 0; i < tokens.length; i++) {
-      filter(i, filteredTrieRoot, false);
-    }
-
-    return filteredTrieRoot;
-  }
-
-  /**
-   * Matches rules against the sentence. Intelligently handles chains of sequential nonterminals.
-   * Marks arcs that are traversable for this sentence.
-   * 
-   * @param i the position in the sentence to start matching
-   * @param trie the trie node to match against
-   * @param lastWasNT true if the match that brought us here was against a nonterminal
-   */
-  private void filter(int i, SentenceFilteredTrie trieNode, boolean lastWasNT) {
-    if (i >= tokens.length)
-      return;
-
-    /* Make sure the underlying unfiltered node has children. */
-    Trie unfilteredTrieNode = trieNode.unfilteredTrieNode;
-    if (unfilteredTrieNode.getChildren() == null) {
-      // trieNode.path.retreat();
-      return;
-    }
-
-    /* Match a word */
-    Trie trie = unfilteredTrieNode.match(tokens[i]);
-    if (trie != null) {
-      /*
-       * The current filtered node might already have an arc for this label. If so, retrieve it
-       * (since we still need to follow it); if not, create it.
-       */
-      SentenceFilteredTrie nextFilteredTrie = trieNode.match(tokens[i]);
-      if (nextFilteredTrie == null) {
-        nextFilteredTrie = new SentenceFilteredTrie(trie);
-        trieNode.children.put(tokens[i], nextFilteredTrie);
-      }
-
-      /*
-       * Now continue, trying to match the child node against the next position in the sentence. The
-       * third argument records that this match was not against a nonterminal.
-       */
-      filter(i + 1, nextFilteredTrie, false);
-    }
-
-    /*
-     * Now we attempt to match nonterminals. Any nonterminal is permitted to match any region of the
-     * sentence, up to the maximum span for that grammar. So we enumerate all children of the
-     * current (unfiltered) trie grammar node, looking for nonterminals (items whose label value is
-     * less than 0), then recurse.
-     * 
-     * There is one subtlely. Adjacent nonterminals in a grammar rule can match a span (i, j) in (j
-     * - i - 1) ways, but for purposes of determining whether a rule fits, this is all wasted
-     * effort. To handle this, we allow the first nonterminal in a sequence to record 1, 2, 3, ...
-     * terminals (up to the grammar's span limit, or the rest of the sentence, whichever is
-     * shorter). Subsequent adjacent nonterminals are permitted to consume only a single terminal.
-     */
-    HashMap<Integer, ? extends Trie> children = unfilteredTrieNode.getChildren();
-    if (children != null) {
-      for (int label : children.keySet()) {
-        if (label < 0) {
-          SentenceFilteredTrie nextFilteredTrie = trieNode.match(label);
-          if (nextFilteredTrie == null) {
-            nextFilteredTrie = new SentenceFilteredTrie(unfilteredTrieNode.match(label));
-            trieNode.children.put(label, nextFilteredTrie);
-          }
-
-          /*
-           * Recurse. If the last match was a nonterminal, we can only consume one more token.
-           * 
-           * TODO: This goes too far by looking at the whole sentence; each grammar has a maximum
-           * span limit which should be consulted. What we should be doing is passing the point
-           * where we started matching the current sentence, so we can apply this span limit, which
-           * is easily accessible (baseGrammar.spanLimit).
-           */
-          int maxJ = lastWasNT ? (i + 1) : tokens.length;
-          for (int j = i + 1; j <= maxJ; j++) {
-            filter(j, nextFilteredTrie, true);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Alternate filter that uses regular expressions, walking the grammar trie and matching the
-   * source side of each rule collection against the input sentence. Failed matches are discarded,
-   * and trie nodes extending from that position need not be explored.
-   * 
-   * @param unfilteredTrie todo
-   * @return the root of the filtered trie if any rules were retained, otherwise null
-   */
-  @SuppressWarnings("unused")
-  private SentenceFilteredTrie filter_regexp(Trie unfilteredTrie) {
-    SentenceFilteredTrie trie = null;
-
-    /* Case 1: keep the trie node if it has a rule collection that matches the sentence */
-    if (unfilteredTrie.hasRules())
-      if (matchesSentence(unfilteredTrie))
-        trie = new SentenceFilteredTrie(unfilteredTrie);
-      else
-        return null;
-
-    /* Case 2: keep the trie node if it has children who have valid rule collections */
-    if (unfilteredTrie.hasExtensions())
-      for (Entry<Integer, ? extends Trie> arc : unfilteredTrie.getChildren().entrySet()) {
-        Trie unfilteredChildTrie = arc.getValue();
-        SentenceFilteredTrie nextTrie = filter_regexp(unfilteredChildTrie);
-        if (nextTrie != null) {
-          if (trie == null)
-            trie = new SentenceFilteredTrie(unfilteredTrie);
-          trie.children.put(arc.getKey(), nextTrie);
-        }
-      }
-
-    return trie;
-  }
-
-  private boolean matchesSentence(Trie childTrie) {
-    Rule rule = childTrie.getRuleCollection().getRules().get(0);
-    return rule.matches(sentence);
-  }
-
-  /**
-   * Implements a filtered trie, by sitting on top of a base trie and annotating nodes that match
-   * the given input sentence.
-   * 
-   * @author Matt Post post@cs.jhu.edu
-   * 
-   */
-  public class SentenceFilteredTrie implements Trie {
-
-    /* The underlying unfiltered trie node. */
-    private final Trie unfilteredTrieNode;
-
-    /* The child nodes in the filtered trie. */
-    private HashMap<Integer, SentenceFilteredTrie> children = null;
-
-    /**
-     * Constructor.
-     * 
-     * @param unfilteredTrieNode todo
-     */
-    public SentenceFilteredTrie(Trie unfilteredTrieNode) {
-      this.unfilteredTrieNode = unfilteredTrieNode;
-      this.children = new HashMap<>();
-    }
-
-    @Override
-    public SentenceFilteredTrie match(int wordID) {
-      if (children != null)
-        return children.get(wordID);
-      return null;
-    }
-
-    @Override
-    public boolean hasExtensions() {
-      return children != null;
-    }
-
-    @Override
-    public Collection<SentenceFilteredTrie> getExtensions() {
-      if (children != null)
-        return children.values();
-
-      return null;
-    }
-
-    @Override
-    public HashMap<Integer, SentenceFilteredTrie> getChildren() {
-      return children;
-    }
-
-    @Override
-    public boolean hasRules() {
-      // Chain to the underlying unfiltered node.
-      return unfilteredTrieNode.hasRules();
-    }
-
-    @Override
-    public RuleCollection getRuleCollection() {
-      // Chain to the underlying unfiltered node, since the rule collection just varies by target
-      // side.
-      return unfilteredTrieNode.getRuleCollection();
-    }
-
-    /**
-     * Counts the number of rules.
-     * 
-     * @return the number of rules rooted at this node.
-     */
-    public int getNumRules() {
-      int numRules = 0;
-      if (getTrieRoot() != null)
-        if (getTrieRoot().getRuleCollection() != null)
-          numRules += getTrieRoot().getRuleCollection().getRules().size();
-
-      for (SentenceFilteredTrie node : getExtensions())
-        numRules += node.getNumRules();
-
-      return numRules;
-    }
-
-    @Override
-    public Iterator<Integer> getTerminalExtensionIterator() {
-      return new ExtensionIterator(children, true);
-    }
-
-    @Override
-    public Iterator<Integer> getNonterminalExtensionIterator() {
-      return new ExtensionIterator(children, false);
-    }
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 4b549fb..9c6f386 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -25,7 +25,6 @@ import org.apache.joshua.decoder.ff.tm.OwnerId;
 import org.apache.joshua.decoder.ff.tm.OwnerMap;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.util.Constants;
-import org.apache.joshua.util.FormatUtils;
 import org.apache.joshua.util.io.LineReader;
 
 /***

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
deleted file mode 100644
index 92566da..0000000
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.ff.tm.hash_based;
-
-import static org.apache.joshua.decoder.ff.tm.GrammarReader.createReader;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
-import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
-import org.apache.joshua.decoder.ff.tm.GrammarReader;
-import org.apache.joshua.decoder.ff.tm.Rule;
-import org.apache.joshua.decoder.ff.tm.Trie;
-import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
-import org.apache.joshua.util.FormatUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class implements a memory-based bilingual BatchGrammar.
- * <p>
- * The rules are stored in a trie. Each trie node has: (1) RuleBin: a list of rules matching the
- * french sides so far (2) A HashMap of next-layer trie nodes, the next french word used as the key
- * in HashMap
- * 
- * @author Zhifei Li zhifei.work@gmail.com
- * @author Matt Post post@cs.jhu.edu
- */
-public class MemoryBasedBatchGrammar extends AbstractGrammar {
-
-  private static final Logger LOG = LoggerFactory.getLogger(MemoryBasedBatchGrammar.class);
-
-  /* The number of rules read. */
-  private int qtyRulesRead = 0;
-
-  /* The number of distinct source sides. */
-  private int qtyRuleBins = 0;
-
-  /* The trie root. */
-  private final MemoryBasedTrie root = new MemoryBasedTrie();
-
-  /* The file containing the grammar. */
-  private String grammarFile;
-
-  /**
-   * Constructor used by Decoder mostly. Default spanLimit of 20
-   * @param owner the associated decoder-wide {@link org.apache.joshua.decoder.ff.tm.OwnerMap}
-   * @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
-   * @param spanLimit the maximum span of the input grammar rule(s) can be applied to.
-   */
-  public MemoryBasedBatchGrammar(String owner, JoshuaConfiguration config, int spanLimit) {
-    super(owner, config, spanLimit);
-  }
-
-  public MemoryBasedBatchGrammar(String formatKeyword, String grammarFile, String owner,
-      String defaultLHSSymbol, int spanLimit, JoshuaConfiguration joshuaConfiguration)
-      throws IOException {
-
-    super(owner, joshuaConfiguration, spanLimit);
-    Vocabulary.id(defaultLHSSymbol);
-    this.grammarFile = grammarFile;
-
-    // ==== loading grammar
-    try(GrammarReader<Rule> reader = createReader(formatKeyword, grammarFile, getOwner());) { 
-      for (Rule rule : reader) {
-        if (rule != null) {
-          addRule(rule);
-        }
-      }
-    }
-
-    this.printGrammar();
-  }
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-  @Override
-  public int getNumRules() {
-    return this.qtyRulesRead;
-  }
-
-  /**
-   * if the span covered by the chart bin is greater than the limit, then return false
-   */
-  public boolean hasRuleForSpan(int i, int j, int pathLength) {
-    if (this.spanLimit == -1) { // mono-glue grammar
-      return (i == 0);
-    } else {
-      // System.err.println(String.format("%s HASRULEFORSPAN(%d,%d,%d)/%d = %s",
-      // Vocabulary.word(this.owner), i, j, pathLength, spanLimit, pathLength <= this.spanLimit));
-      return (pathLength <= this.spanLimit);
-    }
-  }
-
-  public Trie getTrieRoot() {
-    return this.root;
-  }
-
-  /**
-   * Adds a rule to the grammar.
-   */
-  public void addRule(Rule rule) {
-
-    this.qtyRulesRead++;
-
-    // === identify the position, and insert the trie nodes as necessary
-    MemoryBasedTrie pos = root;
-    int[] french = rule.getSource();
-
-    maxSourcePhraseLength = Math.max(maxSourcePhraseLength, french.length);
-
-    for (int curSymID : french) {
-      /*
-       * Note that the nonTerminal symbol in the french is not cleaned (i.e., will be sth like
-       * [X,1]), but the symbol in the Trie has to be cleaned, so that the match does not care about
-       * the markup (i.e., [X,1] or [X,2] means the same thing, that is X) if
-       * (Vocabulary.nt(french[k])) { curSymID = modelReader.cleanNonTerminal(french[k]); if
-       * (logger.isLoggable(Level.FINEST)) logger.finest("Amended to: " + curSymID); }
-       */
-
-      MemoryBasedTrie nextLayer = (MemoryBasedTrie) pos.match(curSymID);
-      if (null == nextLayer) {
-        nextLayer = new MemoryBasedTrie();
-        if (pos.hasExtensions() == false) {
-          pos.childrenTbl = new HashMap<>();
-        }
-        pos.childrenTbl.put(curSymID, nextLayer);
-      }
-      pos = nextLayer;
-    }
-
-    // === add the rule into the trie node
-    if (!pos.hasRules()) {
-      pos.ruleBin = new MemoryBasedRuleBin(rule.getArity(), rule.getSource());
-      this.qtyRuleBins++;
-    }
-    pos.ruleBin.addRule(rule);
-  }
-
-  protected void printGrammar() {
-    LOG.info("MemoryBasedBatchGrammar: Read {} rules with {} distinct source sides from '{}'",
-        this.qtyRulesRead, this.qtyRuleBins, grammarFile);
-  }
-
-  /***
-   * Takes an input word and creates an OOV rule in the current grammar for that word.
-   * 
-   * @param sourceWord integer representation of word
-   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
-   */
-  @Override
-  public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
-
-    // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now
-    // almost
-    // certainly is)
-    final int targetWord = this.joshuaConfiguration.mark_oovs ? Vocabulary.id(Vocabulary
-        .word(sourceWord) + "_OOV") : sourceWord;
-
-    final int[] sourceWords = { sourceWord };
-    final int[] targetWords = { targetWord };
-    final byte[] alignment = { 0, 0 };
-    final FeatureVector features = new FeatureVector(0);
-
-    if (this.joshuaConfiguration.oovList != null && this.joshuaConfiguration.oovList.size() != 0) {
-      
-      for (OOVItem item : this.joshuaConfiguration.oovList) {
-        final Rule oovRule = new Rule(
-            Vocabulary.id(item.label),
-            sourceWords,
-            targetWords,
-            0,
-            features,
-            alignment,
-            getOwner());
-        addRule(oovRule);
-        oovRule.estimateRuleCost(featureFunctions);
-      }
-      
-    } else {
-      
-      final Rule oovRule = new Rule(
-          Vocabulary.id(this.joshuaConfiguration.default_non_terminal),
-          sourceWords,
-          targetWords,
-          0,
-          features,
-          alignment,
-          getOwner());
-      addRule(oovRule);
-      oovRule.estimateRuleCost(featureFunctions);
-      
-    }
-  }
-
-  /**
-   * Adds a default set of glue rules.
-   * 
-   * @param featureFunctions an {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
-   */
-  public void addGlueRules(ArrayList<FeatureFunction> featureFunctions) {
-    final HieroFormatReader reader = new HieroFormatReader(getOwner());
-
-    String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
-    String defaultNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
-
-    String[] ruleStrings = new String[] {
-        String.format("[%s] ||| %s ||| %s ||| 0", goalNT, Vocabulary.START_SYM,
-            Vocabulary.START_SYM),
-        String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1", goalNT, goalNT, defaultNT,
-            goalNT, defaultNT),
-        String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0", goalNT, goalNT,
-            Vocabulary.STOP_SYM, goalNT, Vocabulary.STOP_SYM) };
-
-    for (String ruleString : ruleStrings) {
-      Rule rule = reader.parseLine(ruleString);
-      addRule(rule);
-      rule.estimateRuleCost(featureFunctions);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammar.java
new file mode 100644
index 0000000..5923965
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammar.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.tm.hash_based;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.DecoderConfig;
+import org.apache.joshua.decoder.ff.FeatureFunction;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.ff.tm.Trie;
+import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
+import org.apache.joshua.util.FormatUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Throwables;
+import com.typesafe.config.Config;
+
+/**
+ * This class implements a memory-based bilingual BatchGrammar.
+ * <p>
+ * The rules are stored in a trie. Each trie node has: (1) RuleBin: a list of rules matching the
+ * french sides so far (2) A HashMap of next-layer trie nodes, the next french word used as the key
+ * in HashMap
+ * 
+ * @author Zhifei Li zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
+ */
+public class TextGrammar extends AbstractGrammar {
+
+  private static final Logger LOG = LoggerFactory.getLogger(TextGrammar.class);
+
+  /* The number of rules read. */
+  private int qtyRulesRead = 0;
+
+  /* The number of distinct source sides. */
+  private int qtyRuleBins = 0;
+
+  /* The trie root. */
+  private final MemoryBasedTrie root = new MemoryBasedTrie();
+
+  /* The path containing the grammar. */
+  private final Optional<String> path;
+
+  public TextGrammar(final Config config) {
+    super(config);
+    this.path = config.hasPath("path") ? Optional.of(config.getString("path")) : Optional.empty();
+    
+    // if path is configured, actually load the grammar
+    if (this.path.isPresent()) {
+      this.loadGrammar(this.path.get());
+      this.printGrammar();
+    }
+  }
+  
+  private void loadGrammar(final String path) {
+    try(final HieroFormatReader reader = new HieroFormatReader(path, getOwner());) {
+      for (Rule rule : reader) {
+        if (rule != null) {
+          addRule(rule);
+        }
+      }
+    } catch (IOException e) {
+      Throwables.propagate(e);
+    }
+  }
+
+  @Override
+  public int getNumRules() {
+    return this.qtyRulesRead;
+  }
+
+  /**
+   * if the span covered by the chart bin is greater than the limit, then return false
+   */
+  public boolean hasRuleForSpan(int i, int j, int pathLength) {
+    if (this.spanLimit == -1) { // mono-glue grammar
+      return (i == 0);
+    } else {
+      // System.err.println(String.format("%s HASRULEFORSPAN(%d,%d,%d)/%d = %s",
+      // Vocabulary.word(this.owner), i, j, pathLength, spanLimit, pathLength <= this.spanLimit));
+      return (pathLength <= this.spanLimit);
+    }
+  }
+
+  public Trie getTrieRoot() {
+    return this.root;
+  }
+
+  /**
+   * Adds a rule to the grammar.
+   */
+  public void addRule(Rule rule) {
+
+    this.qtyRulesRead++;
+
+    // === identify the position, and insert the trie nodes as necessary
+    MemoryBasedTrie pos = root;
+    int[] french = rule.getSource();
+
+    maxSourcePhraseLength = Math.max(maxSourcePhraseLength, french.length);
+
+    for (int curSymID : french) {
+      /*
+       * Note that the nonTerminal symbol in the french is not cleaned (i.e., will be sth like
+       * [X,1]), but the symbol in the Trie has to be cleaned, so that the match does not care about
+       * the markup (i.e., [X,1] or [X,2] means the same thing, that is X) if
+       * (Vocabulary.nt(french[k])) { curSymID = modelReader.cleanNonTerminal(french[k]); if
+       * (logger.isLoggable(Level.FINEST)) logger.finest("Amended to: " + curSymID); }
+       */
+
+      MemoryBasedTrie nextLayer = (MemoryBasedTrie) pos.match(curSymID);
+      if (null == nextLayer) {
+        nextLayer = new MemoryBasedTrie();
+        if (pos.hasExtensions() == false) {
+          pos.childrenTbl = new HashMap<>();
+        }
+        pos.childrenTbl.put(curSymID, nextLayer);
+      }
+      pos = nextLayer;
+    }
+
+    // === add the rule into the trie node
+    if (!pos.hasRules()) {
+      pos.ruleBin = new MemoryBasedRuleBin(rule.getArity(), rule.getSource());
+      this.qtyRuleBins++;
+    }
+    pos.ruleBin.addRule(rule);
+  }
+
+  protected void printGrammar() {
+    LOG.info("{}: Read {} rules with {} distinct source sides from '{}'",
+        this.getClass().getName(), this.qtyRulesRead, this.qtyRuleBins, path);
+  }
+
+  /***
+   * Takes an input word and creates an OOV rule in the current grammar for that word.
+   * 
+   * @param sourceWord integer representation of word
+   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+   */
+  @Override
+  public void addOOVRules(int sourceWord, DecoderConfig config) {
+
+    // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now
+    // almost
+    // certainly is)
+    final int targetWord = config.getFlags().getBoolean("mark_oovs") ? Vocabulary.id(Vocabulary
+        .word(sourceWord) + "_OOV") : sourceWord;
+
+    final int[] sourceWords = { sourceWord };
+    final int[] targetWords = { targetWord };
+    final byte[] alignment = { 0, 0 };
+    final FeatureVector features = new FeatureVector(0);
+
+    final Rule oovRule = new Rule(
+          Vocabulary.id(config.getFlags().getString("default_non_terminal")),
+          sourceWords,
+          targetWords,
+          0,
+          features,
+          alignment,
+          getOwner());
+    addRule(oovRule);
+    oovRule.estimateRuleCost(config.getFeatureFunctions());
+  }
+
+  /**
+   * Adds a default set of glue rules.
+   * 
+   * @param featureFunctions an {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+   */
+  public void addGlueRules(List<FeatureFunction> featureFunctions, Config config) {
+    String goalNT = FormatUtils.cleanNonTerminal(config.getString("goal_symbol"));
+    String defaultNT = FormatUtils.cleanNonTerminal(config.getString("default_non_terminal"));
+
+    String[] ruleStrings = new String[] {
+        String.format("[%s] ||| %s ||| %s ||| 0", goalNT, Vocabulary.START_SYM,
+            Vocabulary.START_SYM),
+        String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1", goalNT, goalNT, defaultNT,
+            goalNT, defaultNT),
+        String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0", goalNT, goalNT,
+            Vocabulary.STOP_SYM, goalNT, Vocabulary.STOP_SYM) };
+
+    try(final HieroFormatReader reader = new HieroFormatReader(getOwner());) {
+      for (String ruleString : ruleStrings) {
+        Rule rule = reader.parseLine(ruleString);
+        addRule(rule);
+        rule.estimateRuleCost(featureFunctions);
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammarFactory.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammarFactory.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammarFactory.java
new file mode 100644
index 0000000..e72f703
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/TextGrammarFactory.java
@@ -0,0 +1,148 @@
+package org.apache.joshua.decoder.ff.tm.hash_based;
+
+import static java.util.Collections.emptyList;
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
+import static org.apache.joshua.util.Constants.CUSTOM_OWNER;
+import static org.apache.joshua.util.Constants.GLUE_OWNER;
+import static org.apache.joshua.util.Constants.OOV_OWNER;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.DecoderConfig;
+import org.apache.joshua.decoder.SearchAlgorithm;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
+import org.apache.joshua.decoder.phrase.Hypothesis;
+import org.apache.joshua.decoder.phrase.PhraseTable;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.decoder.segment_file.Token;
+import org.apache.joshua.lattice.Arc;
+import org.apache.joshua.lattice.Node;
+import org.apache.joshua.util.FormatUtils;
+
+import com.google.common.collect.ImmutableMap;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
+/**
+ * Provides some static functions to create default/backoff/oov/glue TextGrammars
+ * that are dynamically created during decoding.
+ */
+public class TextGrammarFactory {
+  
+  public static TextGrammar createGlueTextGrammar(String goalSymbol, String defaultNonTerminal) {
+    final Config config = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", GLUE_OWNER, "span_limit", "-1"), "Glue Grammar Config");
+    final TextGrammar glueGrammar = new TextGrammar(config);
+    final HieroFormatReader reader = new HieroFormatReader(glueGrammar.getOwner());
+    final String goalNT = FormatUtils.cleanNonTerminal(goalSymbol);
+    final String defaultNT = FormatUtils.cleanNonTerminal(defaultNonTerminal);
+    
+    final String[] ruleStrings = new String[] {
+        String.format("[%s] ||| %s ||| %s ||| 0", goalNT, Vocabulary.START_SYM,
+            Vocabulary.START_SYM),
+        String.format("[%s] ||| [%s,1] [%s,2] ||| [%s,1] [%s,2] ||| -1", goalNT, goalNT, defaultNT,
+            goalNT, defaultNT),
+        String.format("[%s] ||| [%s,1] %s ||| [%s,1] %s ||| 0", goalNT, goalNT,
+            Vocabulary.STOP_SYM, goalNT, Vocabulary.STOP_SYM) };
+    
+    for (String ruleString : ruleStrings) {
+      Rule rule = reader.parseLine(ruleString);
+      glueGrammar.addRule(rule);
+      // glue rules do not any features
+      rule.estimateRuleCost(emptyList());
+    }
+    return glueGrammar;
+  }
+  
+  public static Grammar createCustomGrammar(SearchAlgorithm searchAlgorithm) {
+    final Config config = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", CUSTOM_OWNER, "span_limit", "20"), "Custom Grammar Config");
+    switch (searchAlgorithm) {
+    case stack:
+      return new PhraseTable(config);
+    case cky:
+      return new TextGrammar(config);
+    default:
+      return null;
+    }
+  }
+  
+  public static Grammar addEpsilonDeletingGrammar(String goalSymbol, String defaultNonTerminal) {
+    final Config config = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", "lattice", "span_limit", "-1"), "Epsilon Grammar Config");
+    final TextGrammar latticeGrammar = new TextGrammar(config);
+    final HieroFormatReader reader = new HieroFormatReader(latticeGrammar.getOwner());
+    final String goalNT = FormatUtils.cleanNonTerminal(goalSymbol);
+    final String defaultNT = FormatUtils.cleanNonTerminal(defaultNonTerminal);
+
+    //FIXME: arguments changed to match string format on best effort basis.  Author please review.
+    final String ruleString = String.format("[%s] ||| [%s,1] <eps> ||| [%s,1] ||| ", goalNT, defaultNT, defaultNT);
+    
+    final Rule rule = reader.parseLine(ruleString);
+    latticeGrammar.addRule(rule);
+    rule.estimateRuleCost(emptyList());
+    return latticeGrammar;
+  }
+  
+  public static Grammar createOovGrammarForSentence(final Sentence sentence, DecoderConfig config) {
+    final Config grammarConfig = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", OOV_OWNER, "span_limit", "20"), "OOV grammar config");
+    final TextGrammar oovGrammar = new TextGrammar(grammarConfig);
+    final Set<Integer> words = getOovCandidateWords(sentence, config.getFlags().getBoolean("true_oovs_only"));
+    for (int sourceWord: words) {
+      oovGrammar.addOOVRules(sourceWord, config);
+    }
+    // Sort all the rules (not much to actually do, this just marks it as sorted)
+    oovGrammar.sortGrammar(config.getFeatureFunctions());
+    return oovGrammar;
+  }
+  
+  public static PhraseTable createOovPhraseTable(Sentence sentence, DecoderConfig config) {
+    final Config grammarConfig = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", OOV_OWNER, "span_limit", "0"), "OOV phrase table config");
+    final PhraseTable oovPhraseTable = new PhraseTable(grammarConfig);
+    final Set<Integer> words = getOovCandidateWords(sentence, config.getFlags().getBoolean("true_oovs_only"));
+    for (int sourceWord: words) {
+      oovPhraseTable.addOOVRules(sourceWord, config);
+    }
+    // Sort all the rules (not much to actually do, this just marks it as sorted)
+    oovPhraseTable.sortGrammar(config.getFeatureFunctions());
+    return oovPhraseTable;
+  }
+  
+  /**
+   * Returns a set of integer ids for which OOV rules will be created.
+   * The set is determined by the flag trueOovsOnly.
+   */
+  private static Set<Integer> getOovCandidateWords(final Sentence sentence, boolean trueOovsOnly) {
+    final Set<Integer> words = new HashSet<>();
+    for (Node<Token> node : sentence.getLattice()) {
+      for (Arc<Token> arc : node.getOutgoingArcs()) {
+        int sourceWord = arc.getLabel().getWord();
+        if (sourceWord == Vocabulary.id(Vocabulary.START_SYM)
+            || sourceWord == Vocabulary.id(Vocabulary.STOP_SYM))
+          continue;
+
+        // Determine if word is actual OOV.
+        if (trueOovsOnly && ! Vocabulary.hasId(sourceWord))
+          continue;
+
+        words.add(sourceWord);
+      }
+    }
+    return words;
+  }
+  
+  public static PhraseTable createEndRulePhraseTable(Sentence sentence, DecoderConfig config) {
+    final Config grammarConfig = ConfigFactory.parseMap(
+        ImmutableMap.of("owner", UNKNOWN_OWNER, "span_limit", "0"), "End Rule Phrase Table Config");
+    final PhraseTable endRulePhraseTable = new PhraseTable(grammarConfig);
+    endRulePhraseTable.addRule(Hypothesis.END_RULE);
+    return endRulePhraseTable;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index d98d76f..de809f6 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -80,7 +80,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.DecoderConfig;
 import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
@@ -99,8 +99,10 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Supplier;
 import com.google.common.base.Suppliers;
+import com.google.common.base.Throwables;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
+import com.typesafe.config.Config;
 
 public class PackedGrammar extends AbstractGrammar {
 
@@ -117,49 +119,50 @@ public class PackedGrammar extends AbstractGrammar {
   // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
   private final Cache<Trie, List<Rule>> cached_rules;
 
-  private final String grammarDir;
-  
-  private JoshuaConfiguration config;
-
-  public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
-      JoshuaConfiguration joshuaConfiguration) throws IOException {
-    super(owner, joshuaConfiguration, span_limit);
+  private final String path;
 
-    this.grammarDir = grammar_dir;
-    this.config = joshuaConfiguration;
+  public PackedGrammar(Config config) {
+    super(config);
 
-    // Read the vocabulary.
-    vocabFile = new File(grammar_dir + File.separator + VOCABULARY_FILENAME);
-    LOG.info("Reading vocabulary: {}", vocabFile);
-    if (!Vocabulary.read(vocabFile)) {
-      throw new RuntimeException("mismatches or collisions while reading on-disk vocabulary");
-    }
-
-    // Read the config
-    String configFile = grammar_dir + File.separator + "config";
-    if (new File(configFile).exists()) {
-      LOG.info("Reading packed config: {}", configFile);
-      readConfig(configFile);
-    }
+    this.path = config.getString("path");
+    vocabFile = new File(path + File.separator + VOCABULARY_FILENAME);
 
-    // Read the quantizer setup.
-    LOG.info("Reading encoder configuration: {}{}encoding", grammar_dir, File.separator);
-    encoding = new EncoderConfiguration();
-    encoding.load(grammar_dir + File.separator + "encoding");
-
-    final List<String> listing = Arrays.asList(new File(grammar_dir).list());
-    sort(listing); // File.list() has arbitrary sort order
-    slices = new ArrayList<>();
-    for (String prefix : listing) {
-      if (prefix.startsWith("slice_") && prefix.endsWith(".source"))
-        slices.add(new PackedSlice(grammar_dir + File.separator + prefix.substring(0, 11)));
+    try {
+      // Read the vocabulary.
+      LOG.info("Reading vocabulary: {}", vocabFile);
+      if (!Vocabulary.read(vocabFile)) {
+        throw new RuntimeException("mismatches or collisions while reading on-disk vocabulary");
+      }
+  
+      // Read the config
+      String configFile = path + File.separator + "config";
+      if (new File(configFile).exists()) {
+        LOG.info("Reading packed config: {}", configFile);
+        readConfig(configFile);
+      }
+  
+      // Read the quantizer setup.
+      LOG.info("Reading encoder configuration: {}{}encoding", path, File.separator);
+      encoding = new EncoderConfiguration();
+      encoding.load(path + File.separator + "encoding");
+  
+      final List<String> listing = Arrays.asList(new File(path).list());
+      sort(listing); // File.list() has arbitrary sort order
+      slices = new ArrayList<>();
+      for (String prefix : listing) {
+        if (prefix.startsWith("slice_") && prefix.endsWith(".source"))
+          slices.add(new PackedSlice(path + File.separator + prefix.substring(0, 11)));
+      }
+    } catch (IOException e) {
+      Throwables.propagate(e);
     }
 
     long count = 0;
     for (PackedSlice s : slices)
       count += s.estimated.length;
     root = new PackedRoot(slices);
-    cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build();
+    int cacheSize = config.getInt("rule_cache_size");
+    cached_rules = CacheBuilder.newBuilder().maximumSize(cacheSize).build();
 
     LOG.info("Loaded {} rules", count);
   }
@@ -854,7 +857,7 @@ public class PackedGrammar extends AbstractGrammar {
         @Override
         public int[] getSource() {
           int phrase[] = new int[src.length + 1];
-          int ntid = Vocabulary.id(PackedGrammar.this.joshuaConfiguration.default_non_terminal);
+          int ntid = Vocabulary.id(PackedGrammar.this.config.getString("default_non_terminal"));
           phrase[0] = ntid;
           System.arraycopy(src,  0, phrase, 1, src.length);
           return phrase;
@@ -956,9 +959,9 @@ public class PackedGrammar extends AbstractGrammar {
       }
     }
   }
-
+  
   @Override
-  public void addOOVRules(int word, List<FeatureFunction> featureFunctions) {
+  public void addOOVRules(int word, DecoderConfig config) {
     throw new RuntimeException("PackedGrammar.addOOVRules(): I can't add OOV rules");
   }
 
@@ -989,17 +992,15 @@ public class PackedGrammar extends AbstractGrammar {
 
     if (! isSupportedVersion(version)) {
       String message = String.format("The grammar at %s was packed with packer version %d, which is incompatible with the current config",
-          this.grammarDir, version);
+          this.path, version);
       throw new RuntimeException(message);
     }
   }
   
-  /*
-   * Determines whether the current grammar is a supported version. For hierarchical decoding,
-   * no changes have occurred, so any version past 2 (the default) is supported. For phrase-
-   * based decoding, version 4 is required.
-   */
+  /**
+    * With Joshua 7 we require newly packed grammars for everything.
+    */
   private boolean isSupportedVersion(int version) {
-    return (config.search_algorithm.equals("cky") && version >= 2) || (version >= 4);
+    return version >= 3; // TODO(fhieber): fix this once we ship Joshua
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index a1132e8..085b239 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -22,16 +22,17 @@ import java.io.PrintStream;
 import java.util.HashSet;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.decoder.ff.tm.OwnerMap;
 import org.apache.joshua.decoder.ff.tm.Rule;
-import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
+import org.apache.joshua.decoder.ff.tm.hash_based.TextGrammar;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.typesafe.config.Config;
+
 /**
  * This walker function builds up a new context-free grammar by visiting each node in a hypergraph.
  * For a quick overview, see Chris Dyer's 2010 NAACL paper
@@ -49,13 +50,13 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
 
   private static final Logger LOG = LoggerFactory.getLogger(GrammarBuilderWalkerFunction.class);
 
-  private final MemoryBasedBatchGrammar grammar;
+  private final TextGrammar grammar;
   private final PrintStream outStream;
   private final int goalSymbol;
   private final HashSet<Rule> rules;
 
-  public GrammarBuilderWalkerFunction(String goal, JoshuaConfiguration joshuaConfiguration, String owner) {
-    grammar = new MemoryBasedBatchGrammar(owner, joshuaConfiguration, 1000);
+  public GrammarBuilderWalkerFunction(String goal, Config config) {
+    grammar = new TextGrammar(config);
     outStream = null;
     goalSymbol = Vocabulary.id(goal);
     rules = new HashSet<>();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
index cb79bf9..6494143 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
@@ -36,14 +36,12 @@ import java.util.PriorityQueue;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.BLEU;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.DecoderConfig;
 import org.apache.joshua.decoder.StructuredTranslation;
 import org.apache.joshua.decoder.StructuredTranslationFactory;
-import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.fragmentlm.Tree;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
-import org.apache.joshua.decoder.ff.tm.OwnerMap;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.io.DeNormalize;
 import org.apache.joshua.decoder.segment_file.Sentence;
@@ -96,7 +94,6 @@ import org.apache.joshua.util.FormatUtils;
  * @author Matt Post post@cs.jhu.edu
  */
 public class KBestExtractor {
-  private final JoshuaConfiguration joshuaConfiguration;
   private final String outputFormat;
   private final HashMap<HGNode, VirtualNode> virtualNodesTable = new HashMap<>();
 
@@ -117,33 +114,25 @@ public class KBestExtractor {
   /* The input sentence */
   private final Sentence sentence;
 
-  /* The weights being used to score the forest */
-  private final FeatureVector weights;
-
-  /* The feature functions */
-  private final List<FeatureFunction> featureFunctions;
+  /* The decoderConfig */
+  private final DecoderConfig config;
 
   /* BLEU statistics of the references */
   private BLEU.References references = null;
 
   public KBestExtractor(
-      Sentence sentence,
-      List<FeatureFunction> featureFunctions,
-      FeatureVector weights,
-      boolean isMonolingual,
-      JoshuaConfiguration joshuaConfiguration) {
-
-    this.featureFunctions = featureFunctions;
+      final Sentence sentence,
+      final DecoderConfig config,
+      boolean isMonolingual) {
 
-    this.joshuaConfiguration = joshuaConfiguration;
-    this.outputFormat = this.joshuaConfiguration.outputFormat;
-    this.extractUniqueNbest = joshuaConfiguration.use_unique_nbest;
+    this.config = config;
+    this.outputFormat = config.getFlags().getString("output_format");
+    this.extractUniqueNbest = config.getFlags().getBoolean("use_unique_nbest");
 
-    this.weights = weights;
     this.defaultSide = (isMonolingual ? Side.SOURCE : Side.TARGET);
     this.sentence = sentence;
 
-    if (joshuaConfiguration.rescoreForest) {
+    if (config.getFlags().getBoolean("rescore_forest")) {
       references = new BLEU.References(sentence.references());
     }
   }
@@ -281,7 +270,7 @@ public class KBestExtractor {
   private String maybeProjectCase(String hypothesis, DerivationState state) {
     String output = hypothesis;
 
-    if (joshuaConfiguration.project_case) {
+    if (config.getFlags().getBoolean("project_case")) {
       String[] tokens = hypothesis.split("\\s+");
       List<List<Integer>> points = state.getWordAlignmentList();
       for (int i = 0; i < points.size(); i++) {
@@ -518,7 +507,7 @@ public class KBestExtractor {
                 + virtualTailNode.nbests.get(newRanks[i] - 1).getModelCost();
             nextState.setCost(cost);
 
-            if (joshuaConfiguration.rescoreForest)
+            if (config.getFlags().getBoolean("rescore_forest"))
               nextState.bleu = nextState.computeBLEU();
 
             candHeap.add(nextState);
@@ -632,7 +621,7 @@ public class KBestExtractor {
       cost = hyperEdge.getBestDerivationScore();
 
       DerivationState state = new DerivationState(parentNode, hyperEdge, ranks, cost, edgePos);
-      if (joshuaConfiguration.rescoreForest)
+      if (config.getFlags().getBoolean("rescore_forest"))
         state.bleu = state.computeBLEU();
 
       return state;
@@ -738,7 +727,7 @@ public class KBestExtractor {
      * @return float representing model cost plus the BLEU score
      */
     public float getCost() {
-      return cost - weights.getOrDefault(hashFeature("BLEU")) * bleu;
+      return cost - config.getWeights().getOrDefault(hashFeature("BLEU")) * bleu;
     }
 
     public String toString() {
@@ -839,7 +828,7 @@ public class KBestExtractor {
     }
 
     public FeatureVector getFeatures() {
-      final FeatureVectorExtractor extractor = new FeatureVectorExtractor(featureFunctions, sentence);
+      final FeatureVectorExtractor extractor = new FeatureVectorExtractor(config.getFeatureFunctions(), sentence);
       visit(extractor);
       return extractor.getFeatures();
     }
@@ -1019,7 +1008,7 @@ public class KBestExtractor {
         for (int i = 0; i < indent * 2; i++)
           sb.append(" ");
 
-        final FeatureVectorExtractor extractor = new FeatureVectorExtractor(featureFunctions, sentence);
+        final FeatureVectorExtractor extractor = new FeatureVectorExtractor(config.getFeatureFunctions(), sentence);
         extractor.before(state, indent, tailNodeIndex);
         final FeatureVector transitionFeatures = extractor.getFeatures();
 
@@ -1033,7 +1022,7 @@ public class KBestExtractor {
           sb.append(" ").append(dpState);
         }
         sb.append(" ||| ").append(transitionFeatures);
-        sb.append(" ||| ").append(weights.innerProduct(transitionFeatures));
+        sb.append(" ||| ").append(config.getWeights().innerProduct(transitionFeatures));
         if (rule.getAlignment() != null)
           sb.append(" ||| ").append(Arrays.toString(rule.getAlignment()));
         sb.append("\n");

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java b/joshua-core/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
index afb63ab..dcf50ad 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
@@ -22,12 +22,12 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.Reader;
 
-import com.google.gson.stream.JsonReader;
-
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.JoshuaConfiguration.INPUT_TYPE;
+import org.apache.joshua.decoder.InputType;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
+import com.google.gson.stream.JsonReader;
+import com.typesafe.config.Config;
+
 /**
  * This class iterates over an input stream, looking for inputs to translate. By default, it
  * expects plain-text input, which can be plain sentences or PLF-encoded lattices. If
@@ -48,7 +48,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
  * @author orluke
  */
 public class TranslationRequestStream {
-  private final JoshuaConfiguration joshuaConfiguration;
+  private final Config config;
   private int sentenceNo = -1;
 
   /* Plain text or JSON input */
@@ -57,10 +57,10 @@ public class TranslationRequestStream {
   /* Whether the request has been killed by a broken client connection. */
   private volatile boolean isShutDown = false;
 
-  public TranslationRequestStream(BufferedReader reader, JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
+  public TranslationRequestStream(BufferedReader reader, Config config) {
+    this.config = config;
     
-    if (joshuaConfiguration.input_type == INPUT_TYPE.json) {
+    if (InputType.valueOf(config.getString("serverSettings.input_type")) == InputType.json) {
       this.requestHandler = new JSONStreamHandler(reader);
     } else {
       this.requestHandler = new PlaintextStreamHandler(reader);
@@ -103,7 +103,7 @@ public class TranslationRequestStream {
       if (line == null)
         return null;
 
-      return new Sentence(line, -1, joshuaConfiguration);
+      return new Sentence(line, -1, config);
     }
   }
   
@@ -121,7 +121,7 @@ public class TranslationRequestStream {
       String line = reader.readLine();
 
       if (line != null) {
-        return new Sentence(line, sentenceNo, joshuaConfiguration);
+        return new Sentence(line, sentenceNo, config);
       }
       
       return null;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 9f0dec1..a1745f2 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -18,6 +18,8 @@
  */
 package org.apache.joshua.decoder.phrase;
 
+import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
+
 /*** 
  * A candidate represents a translation hypothesis that may possibly be added to the translation
  * hypergraph. It groups together (a) a set of translation hypotheses all having the same coverage
@@ -37,19 +39,16 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.joshua.corpus.Span;
-import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+import org.apache.joshua.decoder.DecoderConfig;
 import org.apache.joshua.decoder.chart_parser.NodeResult;
-import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.hypergraph.HGNode;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
-import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
-
 public class Candidate implements Comparable<Candidate> {
   
-  private List<FeatureFunction> featureFunctions;
+  private final DecoderConfig config;
   private Sentence sentence;
   
   // the set of hypotheses that can be paired with phrases from this span 
@@ -115,9 +114,9 @@ public class Candidate implements Comparable<Candidate> {
         getHypothesis(), getPhraseNode().bestHyperedge.getRule().getTargetWords(), getSpan());
   }
 
-  public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
+  public Candidate(DecoderConfig config, Sentence sentence, 
       List<Hypothesis> hypotheses, PhraseNodes phrases, float delta, int[] ranks) {
-    this.featureFunctions = featureFunctions;
+    this.config = config;
     this.sentence = sentence;
     this.hypotheses = hypotheses;
     this.phrases = phrases;
@@ -161,7 +160,7 @@ public class Candidate implements Comparable<Candidate> {
    */
   public Candidate extendHypothesis() {
     if (ranks[0] < hypotheses.size() - 1) {
-      return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0] + 1, ranks[1] });
+      return new Candidate(config, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0] + 1, ranks[1] });
     }
     return null;
   }
@@ -173,7 +172,7 @@ public class Candidate implements Comparable<Candidate> {
    */
   public Candidate extendPhrase() {
     if (ranks[1] < phrases.size() - 1) {
-      return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0], ranks[1] + 1 });
+      return new Candidate(config, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0], ranks[1] + 1 });
     }
     
     return null;
@@ -232,7 +231,7 @@ public class Candidate implements Comparable<Candidate> {
     if (computedResult == null) {
       // add the rule
       // TODO: sourcepath
-      computedResult = computeNodeResult(featureFunctions, getRule(), getTailNodes(), getLastCovered(), getPhraseEnd(), null, sentence);
+      computedResult = computeNodeResult(config, getRule(), getTailNodes(), getLastCovered(), getPhraseEnd(), null, sentence);
     }
     
     return computedResult;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
index 8355eb5..6564e7b 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
@@ -18,13 +18,14 @@
  */
 package org.apache.joshua.decoder.phrase;
 
-import java.util.ArrayList;	
+import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
+
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+import org.apache.joshua.decoder.DecoderConfig;
 import org.apache.joshua.decoder.chart_parser.NodeResult;
-import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.ff.tm.RuleCollection;
 import org.apache.joshua.decoder.hypergraph.HGNode;
@@ -33,7 +34,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
+import com.google.common.collect.ImmutableList;
 
 /**
  * This class represents a bundle of phrase tables that have been read in,
@@ -50,13 +51,13 @@ public class PhraseChart {
   private final List<PhraseNodes> entries;
 
   // number of translation options
-  private int numOptions = 20;
+  private final int numOptions;
   
   // The feature functions
-  private final List<FeatureFunction> features;
+  private final DecoderConfig config;
   
   // The input sentence
-  private Sentence sentence;
+  private final Sentence sentence;
 
   /**
    * Create a new PhraseChart object, which represents all phrases that are
@@ -68,13 +69,13 @@ public class PhraseChart {
    * @param source input to {@link org.apache.joshua.lattice.Lattice}
    * @param num_options number of translation options (typically set to 20)
    */
-  public PhraseChart(PhraseTable[] tables, List<FeatureFunction> features, Sentence source,
+  public PhraseChart(ImmutableList<PhraseTable> tables, DecoderConfig config, Sentence source,
       int num_options) {
 
     float startTime = System.currentTimeMillis();
 
     this.numOptions = num_options;
-    this.features = features;
+    this.config = config;
     this.sentence = source;
 
     max_source_phrase_length = 0;
@@ -193,7 +194,7 @@ public class PhraseChart {
        * performance gains --- the more common the word, the more translations options it is
        * likely to have (often into the tens of thousands).
        */
-      List<Rule> rules = to.getSortedRules(features);
+      List<Rule> rules = to.getSortedRules(config.getFeatureFunctions());
       
       // TODO: I think this is a race condition
       if (numOptions > 0 && rules.size() > numOptions)
@@ -208,7 +209,7 @@ public class PhraseChart {
 
         // Turn each rule into an HGNode, add them one by one 
         for (Rule rule: rules) {
-          NodeResult result = computeNodeResult(features, rule, null, i, j, null, sentence);
+          NodeResult result = computeNodeResult(config, rule, null, i, j, null, sentence);
           HyperEdge edge = new HyperEdge(rule, result.getViterbiCost(), result.getTransitionCost(), null, null);
           HGNode phraseNode = new HGNode(i, j, rule.getLHS(), result.getDPStates(), edge, result.getPruningEstimate());
           nodes.add(phraseNode);