You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:58 UTC

[42/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/CreateGlueGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/CreateGlueGrammar.java b/src/joshua/decoder/ff/tm/CreateGlueGrammar.java
deleted file mode 100644
index 51e9fc3..0000000
--- a/src/joshua/decoder/ff/tm/CreateGlueGrammar.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import static joshua.decoder.ff.tm.packed.PackedGrammar.VOCABULARY_FILENAME;
-import static joshua.util.FormatUtils.cleanNonTerminal;
-import static joshua.util.FormatUtils.isNonterminal;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.util.io.LineReader;
-
-import org.kohsuke.args4j.CmdLineException;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-
-public class CreateGlueGrammar {
-  
-  
-  private final Set<String> nonTerminalSymbols = new HashSet<>();
-  private static final Logger log = Logger.getLogger(CreateGlueGrammar.class.getName());
-  
-  @Option(name = "--grammar", aliases = {"-g"}, required = true, usage = "provide grammar to determine list of NonTerminal symbols.")
-  private String grammarPath;
-  
-  @Option(name = "--goal", aliases = {"-goal"}, required = false, usage = "specify custom GOAL symbol. Default: 'GOAL'")
-  private String goalSymbol = cleanNonTerminal(new JoshuaConfiguration().goal_symbol);
-
-  /* Rule templates */
-  // [GOAL] ||| <s> ||| <s> ||| 0
-  private static final String R_START = "[%1$s] ||| <s> ||| <s> ||| 0";
-  // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
-  private static final String R_TWO = "[%1$s] ||| [%1$s,1] [%2$s,2] ||| [%1$s,1] [%2$s,2] ||| -1";
-  // [GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
-  private static final String R_END = "[%1$s] ||| [%1$s,1] </s> ||| [%1$s,1] </s> ||| 0";
-  // [GOAL] ||| <s> [X,1] </s> ||| <s> [X,1] </s> ||| 0
-  private static final String R_TOP = "[%1$s] ||| <s> [%2$s,1] </s> ||| <s> [%2$s,1] </s> ||| 0";
-  
-  private void run() throws IOException {
-    
-    File grammar_file = new File(grammarPath);
-    if (!grammar_file.exists()) {
-      throw new IOException("Grammar file doesn't exist: " + grammarPath);
-    }
-
-    // in case of a packedGrammar, we read the serialized vocabulary,
-    // collecting all cleaned nonTerminal symbols.
-    if (grammar_file.isDirectory()) {
-      Vocabulary.read(new File(grammarPath + File.separator + VOCABULARY_FILENAME));
-      for (int i = 0; i < Vocabulary.size(); ++i) {
-        final String token = Vocabulary.word(i);
-        if (isNonterminal(token)) {
-          nonTerminalSymbols.add(cleanNonTerminal(token));
-        }
-      }
-    // otherwise we collect cleaned left-hand sides from the rules in the text grammar.
-    } else { 
-      final LineReader reader = new LineReader(grammarPath);
-      while (reader.hasNext()) {
-        final String line = reader.next();
-        int lhsStart = line.indexOf("[") + 1;
-        int lhsEnd = line.indexOf("]");
-        if (lhsStart < 1 || lhsEnd < 0) {
-          log.info(String.format("malformed rule: %s\n", line));
-          continue;
-        }
-        final String lhs = line.substring(lhsStart, lhsEnd);
-        nonTerminalSymbols.add(lhs);
-      }
-    }
-    
-    log.info(
-        String.format("%d nonTerminal symbols read: %s",
-        nonTerminalSymbols.size(),
-        nonTerminalSymbols.toString()));
-
-    // write glue rules to stdout
-    
-    System.out.println(String.format(R_START, goalSymbol));
-    
-    for (String nt : nonTerminalSymbols)
-      System.out.println(String.format(R_TWO, goalSymbol, nt));
-    
-    System.out.println(String.format(R_END, goalSymbol));
-    
-    for (String nt : nonTerminalSymbols)
-      System.out.println(String.format(R_TOP, goalSymbol, nt));
-
-  }
-  
-  public static void main(String[] args) throws IOException {
-    final CreateGlueGrammar glueCreator = new CreateGlueGrammar();
-    final CmdLineParser parser = new CmdLineParser(glueCreator);
-
-    try {
-      parser.parseArgument(args);
-      glueCreator.run();
-    } catch (CmdLineException e) {
-      log.info(e.toString());
-      parser.printUsage(System.err);
-      System.exit(1);
-    }
-   }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Grammar.java b/src/joshua/decoder/ff/tm/Grammar.java
deleted file mode 100644
index a834442..0000000
--- a/src/joshua/decoder/ff/tm/Grammar.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.util.List;
-
-import joshua.decoder.ff.FeatureFunction;
-
-/**
- * Grammar is a class for wrapping a trie of TrieGrammar in order to store holistic metadata.
- * 
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
- */
-public interface Grammar {
-
-  /**
-   * Gets the root of the <code>Trie</code> backing this grammar.
-   * <p>
-   * <em>Note</em>: This method should run as a small constant-time function.
-   * 
-   * @return the root of the <code>Trie</code> backing this grammar
-   */
-  Trie getTrieRoot();
-
-  /**
-   * After calling this method, the rules in this grammar are guaranteed to be sorted based on the
-   * latest feature function values.
-   * <p>
-   * Cube-pruning requires that the grammar be sorted based on the latest feature functions.
-   * 
-   * @param weights The model weights.
-   */
-  void sortGrammar(List<FeatureFunction> models);
-
-  /**
-   * Determines whether the rules in this grammar have been sorted based on the latest feature
-   * function values.
-   * <p>
-   * This method is needed for the cube-pruning algorithm.
-   * 
-   * @return <code>true</code> if the rules in this grammar have been sorted based on the latest
-   *         feature function values, <code>false</code> otherwise
-   */
-  boolean isSorted();
-
-  /**
-   * Returns whether this grammar has any valid rules for covering a particular span of a sentence.
-   * Hiero's "glue" grammar will only say True if the span is longer than our span limit, and is
-   * anchored at startIndex==0. Hiero's "regular" grammar will only say True if the span is less
-   * than the span limit. Other grammars, e.g. for rule-based systems, may have different behaviors.
-   * 
-   * @param startIndex Indicates the starting index of a phrase in a source input phrase, or a
-   *          starting node identifier in a source input lattice
-   * @param endIndex Indicates the ending index of a phrase in a source input phrase, or an ending
-   *          node identifier in a source input lattice
-   * @param pathLength Length of the input path in a source input lattice. If a source input phrase
-   *          is used instead of a lattice, this value will likely be ignored by the underlying
-   *          implementation, but would normally be defined as <code>endIndex-startIndex</code>
-   */
-  boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength);
-
-  /**
-   * Gets the number of rules stored in the grammar.
-   * 
-   * @return the number of rules stored in the grammar
-   */
-  int getNumRules();
-  
-  /**
-   * Returns the number of dense features.
-   * 
-   * @return the number of dense features
-   */
-  int getNumDenseFeatures();
-
-  /**
-   * This is used to construct a manual rule supported from outside the grammar, but the owner
-   * should be the same as the grammar. Rule ID will the same as OOVRuleId, and no lattice cost
-   */
-  @Deprecated
-  Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity);
-
-  /**
-   * Dump the grammar to disk.
-   * 
-   * @param file
-   */
-  @Deprecated
-  void writeGrammarOnDisk(String file);
-
-  /**
-   * This returns true if the grammar contains rules that are regular expressions, possibly matching
-   * many different inputs.
-   * 
-   * @return true if the grammar's rules may contain regular expressions.
-   */
-  boolean isRegexpGrammar();
-
-  /**
-   * Return the grammar's owner.
-   */
-  int getOwner();
-
-  /**
-   * Return the maximum source phrase length (terminals + nonterminals).
-   */
-  int getMaxSourcePhraseLength();
-  
-  /**
-   * Add an OOV rule for the requested word for the grammar.
-   * 
-   * @param word
-   * @param featureFunctions
-   */
-  void addOOVRules(int word, List<FeatureFunction> featureFunctions);
-  
-  /**
-   * Add a rule to the grammar.
-   *
-   * @param Rule the rule
-   */
-  void addRule(Rule rule);
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/GrammarReader.java b/src/joshua/decoder/ff/tm/GrammarReader.java
deleted file mode 100644
index f94a472..0000000
--- a/src/joshua/decoder/ff/tm/GrammarReader.java
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.util.io.LineReader;
-
-/**
- * This is a base class for simple, ASCII line-based grammars that are stored on disk.
- * 
- * @author Juri Ganitkevitch
- * 
- */
-public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iterator<R> {
-
-  protected static String fieldDelimiter;
-  protected static String nonTerminalRegEx;
-  protected static String nonTerminalCleanRegEx;
-
-  protected static String description;
-
-  protected String fileName;
-  protected LineReader reader;
-  protected String lookAhead;
-  protected int numRulesRead;
-
-  private static final Logger logger = Logger.getLogger(GrammarReader.class.getName());
-
-  // dummy constructor for
-  public GrammarReader() {
-    this.fileName = null;
-  }
-
-  public GrammarReader(String fileName) {
-    this.fileName = fileName;
-  }
-
-  public void initialize() {
-    try {
-      this.reader = new LineReader(fileName);
-    } catch (IOException e) {
-      throw new RuntimeException("Error opening translation model file: " + fileName + "\n"
-          + (null != e.getMessage() ? e.getMessage() : "No details available. Sorry."), e);
-    }
-
-    Decoder.LOG(1, String.format("Reading grammar from file %s...", fileName));
-    numRulesRead = 0;
-    advanceReader();
-  }
-
-  // the reader is the iterator itself
-  public Iterator<R> iterator() {
-    return this;
-  }
-
-  /** Unsupported Iterator method. */
-  public void remove() throws UnsupportedOperationException {
-    throw new UnsupportedOperationException();
-  }
-
-  public void close() {
-    if (null != this.reader) {
-      try {
-        this.reader.close();
-      } catch (IOException e) {
-        // FIXME: is this the right logging level?
-        if (logger.isLoggable(Level.WARNING))
-          logger.info("Error closing grammar file stream: " + this.fileName);
-      }
-      this.reader = null;
-    }
-  }
-
-  /**
-   * For correct behavior <code>close</code> must be called on every GrammarReader, however this
-   * code attempts to avoid resource leaks.
-   * 
-   * @see joshua.util.io.LineReader
-   */
-  @Override
-  protected void finalize() throws Throwable {
-    if (this.reader != null) {
-      logger.severe("Grammar file stream was not closed, this indicates a coding error: "
-          + this.fileName);
-    }
-
-    this.close();
-    super.finalize();
-  }
-
-  @Override
-  public boolean hasNext() {
-    return lookAhead != null;
-  }
-
-  private void advanceReader() {
-    try {
-      lookAhead = reader.readLine();
-      numRulesRead++;
-    } catch (IOException e) {
-      logger.severe("Error reading grammar from file: " + fileName);
-    }
-    if (lookAhead == null && reader != null) {
-      this.close();
-    }
-  }
-
-  /**
-   * Read the next line, and print reader progress.
-   */
-  @Override
-  public R next() {
-    String line = lookAhead;
-
-    int oldProgress = reader.progress();
-    advanceReader();
-    
-    if (Decoder.VERBOSE >= 1) {
-      int newProgress = (reader != null) ? reader.progress() : 100;
-
-      if (newProgress > oldProgress) {
-        for (int i = oldProgress + 1; i <= newProgress; i++)
-          if (i == 97) {
-            System.err.print("1");
-          } else if (i == 98) {
-            System.err.print("0");
-          } else if (i == 99) {
-            System.err.print("0");
-          } else if (i == 100) {
-            System.err.println("%");
-          } else if (i % 10 == 0) {
-            System.err.print(String.format("%d", i));
-            System.err.flush();
-          } else if ((i - 1) % 10 == 0)
-            ; // skip at 11 since 10, 20, etc take two digits
-          else {
-            System.err.print(".");
-            System.err.flush();
-          }
-      }
-    }
-    return parseLine(line);
-  }
-
-  protected abstract R parseLine(String line);
-
-  // TODO: keep these around or not?
-  public abstract String toWords(R rule);
-
-  public abstract String toWordsWithoutFeatureScores(R rule);
-
-  /**
-   * Removes square brackets (and index, if present) from nonterminal id 
-   * @param tokenID
-   * @return cleaned ID
-   */
-  public static int cleanNonTerminal(int tokenID) {
-    // cleans NT of any markup, e.g., [X,1] may becomes [X], depending
-    return Vocabulary.id(cleanNonTerminal(Vocabulary.word(tokenID)));
-  }
-
-  /**
-   * Removes square brackets (and index, if present) from nonterminal id 
-   * @param token
-   * @return cleaned token
-   */
-  public static String cleanNonTerminal(String token) {
-    // cleans NT of any markup, e.g., [X,1] may becomes [X], depending on nonTerminalCleanRegEx
-    return token.replaceAll(nonTerminalCleanRegEx, "");
-  }
-
-  public static boolean isNonTerminal(final String word) {
-    // checks if word matches NT regex
-    return word.matches(nonTerminalRegEx);
-  }
-
-  public String getNonTerminalRegEx() {
-    return nonTerminalRegEx;
-  }
-
-  public String getNonTerminalCleanRegEx() {
-    return nonTerminalCleanRegEx;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/PhraseRule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java b/src/joshua/decoder/ff/tm/PhraseRule.java
deleted file mode 100644
index 8f5d249..0000000
--- a/src/joshua/decoder/ff/tm/PhraseRule.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import com.google.common.base.Supplier;
-import com.google.common.base.Suppliers;
-
-/***
- * A class for reading in rules from a Moses phrase table. Most of the conversion work is done
- * in {@link joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes prepending every
- * rule with a nonterminal, so that the phrase-based decoder can assume the same hypergraph
- * format as the hierarchical decoder (by pretending to be a strictly left-branching grammar and
- * dispensing with the notion of coverage spans). However, prepending the nonterminals means all
- * the alignments are off by 1. We do not want to fix those when reading in due to the expense,
- * so instead we use this rule which adjust the alignments on the fly.
- * 
- * Also, we only convert the Moses dense features on the fly, via this class.
- * 
- * TODO: this class should also be responsible for prepending the nonterminals.
- * 
- * @author Matt Post
- *
- */
-public class PhraseRule extends Rule {
-
-
-  private final String mosesFeatureString;
-  private final Supplier<byte[]> alignmentSupplier;
-  private final Supplier<String> sparseFeaturesStringSupplier;
-  
-  public PhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity,
-      String alignment) {
-    super(lhs, french, english, null, arity, alignment);
-    this.mosesFeatureString = sparse_features;
-    this.alignmentSupplier = initializeAlignmentSupplier();
-    this.sparseFeaturesStringSupplier = initializeSparseFeaturesStringSupplier();
-  }
-  
-  /** 
-   * Moses features are probabilities; we need to convert them here by taking the negative log prob.
-   * We do this only when the rule is used to amortize.
-   */
-  private Supplier<String> initializeSparseFeaturesStringSupplier() {
-    return Suppliers.memoize(() ->{
-      StringBuffer values = new StringBuffer();
-      for (String value: mosesFeatureString.split(" ")) {
-        float f = Float.parseFloat(value);
-        values.append(String.format("%f ", f <= 0.0 ? -100 : -Math.log(f)));
-      }
-      return values.toString().trim();
-    });
-  }
-
-  /**
-   * This is the exact same as the parent implementation, but we need to add 1 to each alignment
-   * point to account for the nonterminal [X] that was prepended to each rule. 
-   */
-  private Supplier<byte[]> initializeAlignmentSupplier(){
-    return Suppliers.memoize(() ->{
-      String[] tokens = getAlignmentString().split("[-\\s]+");
-      byte[] alignmentArray = new byte[tokens.length + 2];
-      alignmentArray[0] = alignmentArray[1] = 0;
-      for (int i = 0; i < tokens.length; i++)
-          alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
-      return alignmentArray;
-    });
-  }
-
-  @Override
-  public String getFeatureString() {
-    return this.sparseFeaturesStringSupplier.get();
-  }
-  
-  @Override
-  public byte[] getAlignment() {
-    return this.alignmentSupplier.get();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java
deleted file mode 100644
index 9f1fb8f..0000000
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.util.ArrayList;
-import java.util.Arrays;  
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-import com.google.common.base.Supplier;
-import com.google.common.base.Suppliers;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * This class define the interface for Rule. 
- * 
- * All feature scores are interpreted as negative log probabilities, and are therefore negated.
- * Note that not all features need to be negative log probs, but you should be aware that they
- * will be negated, so if you want a positive count, it should come in as negative.
- * 
- * @author Zhifei Li, <zh...@gmail.com>
- */
-
-
-/**
- * Normally, the feature score in the rule should be *cost* (i.e., -LogP), so that the feature
- * weight should be positive
- * 
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- */
-public class Rule implements Comparator<Rule>, Comparable<Rule> {
-
-  private int lhs; // tag of this rule
-  private int[] pFrench; // pointer to the RuleCollection, as all the rules under it share the same
-                         // Source side
-  protected int arity;
-
-  // And a string containing the sparse ones
-  //protected final String sparseFeatureString;
-  protected final Supplier<String> sparseFeatureStringSupplier;
-  private final Supplier<FeatureVector> featuresSupplier;
-
-  /*
-   * a feature function will be fired for this rule only if the owner of the rule matches the owner
-   * of the feature function
-   */
-  private int owner = -1;
-
-  /**
-   * This is the cost computed only from the features present with the grammar rule. This cost is
-   * needed to sort the rules in the grammar for cube pruning, but isn't the full cost of applying
-   * the rule (which will include contextual features that can't be computed until the rule is
-   * applied).
-   */
-  private float estimatedCost = Float.NEGATIVE_INFINITY;
-
-  private float precomputableCost = Float.NEGATIVE_INFINITY;
-
-  private int[] english;
-
-  // The alignment string, e.g., 0-0 0-1 1-1 2-1
-  private String alignmentString;
-  private final Supplier<byte[]> alignmentSupplier;
-
-  /**
-   * Constructs a new rule using the provided parameters. Rule id for this rule is
-   * undefined. Note that some of the sparse features may be unlabeled, but they cannot be mapped to
-   * their default names ("tm_OWNER_INDEX") until later, when we know the owner of the rule. This is
-   * not known until the rule is actually added to a grammar in Grammar::addRule().
-   * 
-   * Constructor used by other constructors below;
-   * 
-   * @param lhs Left-hand side of the rule.
-   * @param sourceRhs Source language right-hand side of the rule.
-   * @param targetRhs Target language right-hand side of the rule.
-   * @param sparseFeatures Feature value scores for the rule.
-   * @param arity Number of nonterminals in the source language right-hand side.
-   * @param owner
-   */
-  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, int owner) {
-    this.lhs = lhs;
-    this.pFrench = sourceRhs;
-    this.arity = arity;
-    this.owner = owner;
-    this.english = targetRhs;
-    this.sparseFeatureStringSupplier = Suppliers.memoize(() -> { return sparseFeatures; });
-    this.featuresSupplier = initializeFeatureSupplierFromString();
-    this.alignmentSupplier = initializeAlignmentSupplier();
-  }
-  
-  /**
-   * Constructor used by PackedGrammar's sortRules().
-   */
-  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, int owner) {
-    this.lhs = lhs;
-    this.pFrench = sourceRhs;
-    this.arity = arity;
-    this.owner = owner;
-    this.english = targetRhs;
-    this.featuresSupplier = Suppliers.memoize(() -> { return features; });
-    this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier();
-    this.alignmentSupplier = initializeAlignmentSupplier();
-  }
-
-  /**
-   * Constructor used for SamtFormatReader and GrammarBuilderWalkerFunction's getRuleWithSpans()
-   * Owner set to -1
-   */
-  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
-    this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
-  }
-
-  /**
-   * Constructor used for addOOVRules(), HieroFormatReader and PhraseRule.
-   */
-  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, String alignment) {
-    this(lhs, sourceRhs, targetRhs, sparseFeatures, arity);
-    this.alignmentString = alignment;
-  }
-  
-  /**
-   * Constructor (implicitly) used by PackedRule
-   */
-  public Rule() {
-    this.lhs = -1;
-    this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier();
-    this.featuresSupplier = initializeFeatureSupplierFromString();
-    this.alignmentSupplier = initializeAlignmentSupplier();
-  }
-
-  // ==========================================================================
-  // Lazy loading Suppliers for alignments, feature vector, and feature strings
-  // ==========================================================================
-  
-  private Supplier<byte[]> initializeAlignmentSupplier(){
-    return Suppliers.memoize(() ->{
-      byte[] alignment = null;
-      String alignmentString = getAlignmentString();
-      if (alignmentString != null) {
-        String[] tokens = alignmentString.split("[-\\s]+");
-        alignment = new byte[tokens.length];
-        for (int i = 0; i < tokens.length; i++)
-          alignment[i] = (byte) Short.parseShort(tokens[i]);
-      }
-      return alignment;
-    });
-  }
-  
-  /**
-   * If Rule was constructed with sparseFeatures String, we lazily populate the
-   * FeatureSupplier.
-   */
-  private Supplier<FeatureVector> initializeFeatureSupplierFromString(){
-    return Suppliers.memoize(() ->{
-      if (owner != -1) {
-        return new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_");
-      } else {
-        return new FeatureVector();
-      }
-    });
-  }
-  
-  /**
-   * If Rule was constructed with a FeatureVector, we lazily populate the sparseFeaturesStringSupplier.
-   */
-  private Supplier<String> initializeSparseFeaturesStringSupplier() {
-    return Suppliers.memoize(() -> {
-      return getFeatureVector().toString();
-    });
-  }
-
-  // ===============================================================
-  // Attributes
-  // ===============================================================
-
-  public void setEnglish(int[] eng) {
-    this.english = eng;
-  }
-
-  public int[] getEnglish() {
-    return this.english;
-  }
-
-  /**
-   * Two Rules are equal of they have the same LHS, the same source RHS and the same target
-   * RHS.
-   * 
-   * @param o the object to check for equality
-   * @return true if o is the same Rule as this rule, false otherwise
-   */
-  public boolean equals(Object o) {
-    if (!(o instanceof Rule)) {
-      return false;
-    }
-    Rule other = (Rule) o;
-    if (getLHS() != other.getLHS()) {
-      return false;
-    }
-    if (!Arrays.equals(getFrench(), other.getFrench())) {
-      return false;
-    }
-    if (!Arrays.equals(english, other.getEnglish())) {
-      return false;
-    }
-    return true;
-  }
-
-  public int hashCode() {
-    // I just made this up. If two rules are equal they'll have the
-    // same hashcode. Maybe someone else can do a better job though?
-    int frHash = Arrays.hashCode(getFrench());
-    int enHash = Arrays.hashCode(english);
-    return frHash ^ enHash ^ getLHS();
-  }
-
-  // ===============================================================
-  // Attributes
-  // ===============================================================
-
-  public void setArity(int arity) {
-    this.arity = arity;
-  }
-
-  public int getArity() {
-    return this.arity;
-  }
-
-  public void setOwner(int owner) {
-    this.owner = owner;
-  }
-
-  public int getOwner() {
-    return this.owner;
-  }
-
-  public void setLHS(int lhs) {
-    this.lhs = lhs;
-  }
-
-  public int getLHS() {
-    return this.lhs;
-  }
-
-  public void setFrench(int[] french) {
-    this.pFrench = french;
-  }
-
-  public int[] getFrench() {
-    return this.pFrench;
-  }
-
-  /**
-   * This function does the work of turning the string version of the sparse features (passed in
-   * when the rule was created) into an actual set of features. This is a bit complicated because we
-   * support intermingled labeled and unlabeled features, where the unlabeled features are mapped to
-   * a default name template of the form "tm_OWNER_INDEX".
-   * 
-   * This function returns the dense (phrasal) features discovered when the rule was loaded. Dense
-   * features are the list of unlabeled features that preceded labeled ones. They can also be
-   * specified as labeled features of the form "tm_OWNER_INDEX", but the former format is preferred.
-   */
-  public FeatureVector getFeatureVector() {
-    return featuresSupplier.get();
-  }
-
-  /**
-   * This function returns the estimated cost of a rule, which should have been computed when the
-   * grammar was first sorted via a call to Rule::estimateRuleCost(). This function is a getter
-   * only; it will not compute the value if it has not already been set. It is necessary in addition
-   * to estimateRuleCost(models) because sometimes the value needs to be retrieved from contexts
-   * that do not have access to the feature functions.
-   * 
-   * This function is called by the rule comparator when sorting the grammar. As such it may be
-   * called many times and any implementation of it should be a cached implementation.
-   * 
-   * @return the estimated cost of the rule (a lower bound on the true cost)
-   */
-  public float getEstimatedCost() {
-    return estimatedCost;
-  }
-
-  /**
-   * Precomputable costs is the inner product of the weights found on each grammar rule and the
-   * weight vector. This is slightly different from the estimated rule cost, which can include other
-   * features (such as a language model estimate). This getter and setter should also be cached, and
-   * is basically provided to allow the PhraseModel feature to cache its (expensive) computation for
-   * each rule.
-   * 
-   * @return the precomputable cost of each rule
-   */
-  public float getPrecomputableCost() {
-    return precomputableCost;
-  }
-
-  public float getDenseFeature(int k) {
-    return getFeatureVector().getDense(k);
-  }
-  
-  public void setPrecomputableCost(float[] phrase_weights, FeatureVector weights) {
-    float cost = 0.0f;
-    FeatureVector features = getFeatureVector();
-    for (int i = 0; i < features.getDenseFeatures().size() && i < phrase_weights.length; i++) {
-      cost += phrase_weights[i] * features.getDense(i);
-    }
-
-    for (String key: features.getSparseFeatures().keySet()) {
-      cost += weights.getSparse(key) * features.getSparse(key);
-    }
-    
-    this.precomputableCost = cost;
-  }
-
-  /**
-   * This function estimates the cost of a rule, which is used for sorting the rules for cube
-   * pruning. The estimated cost is basically the set of precomputable features (features listed
-   * along with the rule in the grammar file) along with any other estimates that other features
-   * would like to contribute (e.g., a language model estimate). This cost will be a lower bound on
-   * the rule's actual cost.
-   * 
-   * The value of this function is used only for sorting the rules. When the rule is later applied
-   * in context to particular hypernodes, the rule's actual cost is computed.
-   * 
-   * @param models the list of models available to the decoder
-   * @return estimated cost of the rule
-   */
-  public float estimateRuleCost(List<FeatureFunction> models) {
-    if (null == models)
-      return 0.0f;
-
-    if (this.estimatedCost <= Float.NEGATIVE_INFINITY) {
-      this.estimatedCost = 0.0f; // weights.innerProduct(computeFeatures());
-
-      if (Decoder.VERBOSE >= 4)
-        System.err.println(String.format("estimateCost(%s ;; %s)", getFrenchWords(), getEnglishWords()));
-      for (FeatureFunction ff : models) {
-        float val = ff.estimateCost(this, null);
-        if (Decoder.VERBOSE >= 4) 
-          System.err.println(String.format("  FEATURE %s -> %.3f", ff.getName(), val));
-        this.estimatedCost += val; 
-      }
-    }
-    
-    return estimatedCost;
-  }
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-  public String toString() {
-    StringBuffer sb = new StringBuffer();
-    sb.append(Vocabulary.word(this.getLHS()));
-    sb.append(" ||| ");
-    sb.append(getFrenchWords());
-    sb.append(" ||| ");
-    sb.append(getEnglishWords());
-    sb.append(" |||");
-    sb.append(" " + getFeatureVector());
-    sb.append(String.format(" ||| est=%.3f", getEstimatedCost()));
-    sb.append(String.format(" pre=%.3f", getPrecomputableCost()));
-    return sb.toString();
-  }
-  
-  /**
-   * Returns a version of the rule suitable for reading in from a text file.
-   * 
-   * @return
-   */
-  public String textFormat() {
-    StringBuffer sb = new StringBuffer();
-    sb.append(Vocabulary.word(this.getLHS()));
-    sb.append(" |||");
-    
-    int nt = 1;
-    for (int i = 0; i < getFrench().length; i++) {
-      if (getFrench()[i] < 0)
-        sb.append(" " + Vocabulary.word(getFrench()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
-      else
-        sb.append(" " + Vocabulary.word(getFrench()[i]));
-    }
-    sb.append(" |||");
-    nt = 1;
-    for (int i = 0; i < getEnglish().length; i++) {
-      if (getEnglish()[i] < 0)
-        sb.append(" " + Vocabulary.word(getEnglish()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
-      else
-        sb.append(" " + Vocabulary.word(getEnglish()[i]));
-    }
-    sb.append(" |||");
-    sb.append(" " + getFeatureString());
-    if (getAlignmentString() != null)
-      sb.append(" ||| " + getAlignmentString());
-    return sb.toString();
-  }
-
-  public String getFeatureString() {
-    return sparseFeatureStringSupplier.get();
-  }
-
-  /**
-   * Returns an alignment as a sequence of integers. The integers at positions i and i+1 are paired,
-   * with position i indexing the source and i+1 the target.
-   */
-  public byte[] getAlignment() {
-    return this.alignmentSupplier.get();
-  }
-  
-  public String getAlignmentString() {
-    return this.alignmentString;
-  }
-
-  /**
-   * The nonterminals on the English side are pointers to the source side nonterminals (-1 and -2),
-   * rather than being directly encoded. These number indicate the correspondence between the
-   * nonterminals on each side, introducing a level of indirection however when we want to resolve
-   * them. So to get the ID, we need to look up the corresponding source side ID.
-   * 
-   * @return The string of English words
-   */
-  public String getEnglishWords() {
-    int[] foreignNTs = getForeignNonTerminals();
-  
-    StringBuilder sb = new StringBuilder();
-    for (Integer index : getEnglish()) {
-      if (index >= 0)
-        sb.append(Vocabulary.word(index) + " ");
-      else
-        sb.append(Vocabulary.word(foreignNTs[-index - 1]).replace("]",
-            String.format(",%d] ", Math.abs(index))));
-    }
-  
-    return sb.toString().trim();
-  }
-
-  public boolean isTerminal() {
-    for (int i = 0; i < getEnglish().length; i++)
-      if (getEnglish()[i] < 0)
-        return false;
-  
-    return true;
-  }
-
-  /**
-   * Return the French (source) nonterminals as list of Strings
-   * 
-   * @return
-   */
-  public int[] getForeignNonTerminals() {
-    int[] nts = new int[getArity()];
-    int index = 0;
-    for (int id : getFrench())
-      if (id < 0)
-        nts[index++] = -id;
-    return nts;
-  }
-  
-  /**
-   * Returns an array of size getArity() containing the source indeces of non terminals.
-   */
-  public int[] getNonTerminalSourcePositions() {
-    int[] nonTerminalPositions = new int[getArity()];
-    int ntPos = 0;
-    for (int sourceIdx = 0; sourceIdx < getFrench().length; sourceIdx++) {
-      if (getFrench()[sourceIdx] < 0)
-        nonTerminalPositions[ntPos++] = sourceIdx;
-    }
-    return nonTerminalPositions;
-  }
-  
-  /**
-   * Parses the Alignment byte[] into a Map from target to (possibly a list of) source positions.
-   * Used by the WordAlignmentExtractor.
-   */
-  public Map<Integer, List<Integer>> getAlignmentMap() {
-    byte[] alignmentArray = getAlignment();
-    Map<Integer, List<Integer>> alignmentMap = new HashMap<Integer, List<Integer>>();
-    if (alignmentArray != null) {
-      for (int alignmentIdx = 0; alignmentIdx < alignmentArray.length; alignmentIdx += 2 ) {
-        int s = alignmentArray[alignmentIdx];
-        int t = alignmentArray[alignmentIdx + 1];
-        List<Integer> values = alignmentMap.get(t);
-        if (values == null)
-          alignmentMap.put(t, values = new ArrayList<Integer>());
-        values.add(s);
-      }
-    }
-    return alignmentMap;
-  }
-
-  /**
-   * Return the English (target) nonterminals as list of Strings
-   * 
-   * @return
-   */
-  public int[] getEnglishNonTerminals() {
-    int[] nts = new int[getArity()];
-    int[] foreignNTs = getForeignNonTerminals();
-    int index = 0;
-  
-    for (int i : getEnglish()) {
-      if (i < 0)
-        nts[index++] = foreignNTs[Math.abs(getEnglish()[i]) - 1];
-    }
-  
-    return nts;
-  }
-
-  private int[] getNormalizedEnglishNonterminalIndices() {
-    int[] result = new int[getArity()];
-  
-    int ntIndex = 0;
-    for (Integer index : getEnglish()) {
-      if (index < 0)
-        result[ntIndex++] = -index - 1;
-    }
-  
-    return result;
-  }
-
-  public boolean isInverting() {
-    int[] normalizedEnglishNonTerminalIndices = getNormalizedEnglishNonterminalIndices();
-    if (normalizedEnglishNonTerminalIndices.length == 2) {
-      if (normalizedEnglishNonTerminalIndices[0] == 1) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public String getFrenchWords() {
-    return Vocabulary.getWords(getFrench());
-  }
-
-  public static final String NT_REGEX = "\\[[^\\]]+?\\]";
-
-  private Pattern getPattern() {
-    String source = getFrenchWords();
-    String pattern = Pattern.quote(source);
-    pattern = pattern.replaceAll(NT_REGEX, "\\\\E.+\\\\Q");
-    pattern = pattern.replaceAll("\\\\Q\\\\E", "");
-    pattern = "(?:^|\\s)" + pattern + "(?:$|\\s)";
-    return Pattern.compile(pattern);
-  }
-
-  /**
-   * Matches the string representation of the rule's source side against a sentence
-   * 
-   * @param sentence
-   * @return
-   */
-  public boolean matches(Sentence sentence) {
-    boolean match = getPattern().matcher(sentence.fullSource()).find();
-    // System.err.println(String.format("match(%s,%s) = %s", Pattern.quote(getFrenchWords()),
-    // sentence.annotatedSource(), match));
-    return match;
-  }
-
-  /**
-   * This comparator is used for sorting the rules during cube pruning. An estimate of the cost
-   * of each rule is computed and used to sort. 
-   */
-  public static Comparator<Rule> EstimatedCostComparator = new Comparator<Rule>() {
-    public int compare(Rule rule1, Rule rule2) {
-      float cost1 = rule1.getEstimatedCost();
-      float cost2 = rule2.getEstimatedCost();
-      return Float.compare(cost2,  cost1);
-    }
-  };
-  
-  public int compare(Rule rule1, Rule rule2) {
-    return EstimatedCostComparator.compare(rule1, rule2);
-  }
-
-  public int compareTo(Rule other) {
-    return EstimatedCostComparator.compare(this, other);
-  }
-
-  public String getRuleString() {
-    return String.format("%s -> %s ||| %s", Vocabulary.word(getLHS()), getFrenchWords(), getEnglishWords());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/RuleCollection.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/RuleCollection.java b/src/joshua/decoder/ff/tm/RuleCollection.java
deleted file mode 100644
index 6812fd5..0000000
--- a/src/joshua/decoder/ff/tm/RuleCollection.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.util.List;
-
-import joshua.decoder.ff.FeatureFunction;
-
-/**
- * A RuleCollection represents a set of rules that share the same source side (and hence the same
- * arity). These rules are likely stored together in a Trie data structure, although the interface
- * allows any implementation to be used.
- * 
- * @author Zhifei Li
- * @author Lane Schwartz
- * @author Matt Post <po...@cs.jhu.edu>
- */
-public interface RuleCollection {
-
-  /**
-   * Returns true if the rules are sorted. This is used to allow rules to be sorted in an amortized
-   * fashion; rather than sorting all trie nodes when the grammar is originally loaded, we sort them
-   * only as the decoder actually needs them.
-   */
-  boolean isSorted();
-
-  /**
-   * This returns a list of the rules, sorting them if necessary. 
-   * 
-   * Implementations of this function should be synchronized.  
-   */
-  List<Rule> getSortedRules(List<FeatureFunction> models);
-
-  /**
-   * Get the list of rules. There are no guarantees about whether they're sorted or not.
-   */
-  List<Rule> getRules();
-
-  /**
-   * Gets the source side for all rules in this RuleCollection. This source side is the same for all
-   * the rules in the RuleCollection.
-   * 
-   * @return the (common) source side for all rules in this RuleCollection
-   */
-  int[] getSourceSide();
-
-  /**
-   * Gets the number of nonterminals in the source side of the rules in this RuleCollection. The
-   * source side is the same for all the rules in the RuleCollection, so the arity will also be the
-   * same for all of these rules.
-   * 
-   * @return the (common) number of nonterminals in the source side of the rules in this
-   *         RuleCollection
-   */
-  int getArity();
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
deleted file mode 100644
index d540727..0000000
--- a/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import joshua.decoder.ff.tm.hash_based.ExtensionIterator;
-import joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * This class implements dynamic sentence-level filtering. This is accomplished with a parallel
- * trie, a subset of the original trie, that only contains trie paths that are reachable from
- * traversals of the current sentence.
- * 
- * @author Matt Post <po...@cs.jhu.edu>
- */
-public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
-  private AbstractGrammar baseGrammar;
-  private SentenceFilteredTrie filteredTrie;
-  private int[] tokens;
-  private Sentence sentence;
-
-  /**
-   * Construct a new sentence-filtered grammar. The main work is done in the enclosed trie (obtained
-   * from the base grammar, which contains the complete grammar).
-   * 
-   * @param baseGrammar
-   * @param sentence
-   */
-  SentenceFilteredGrammar(AbstractGrammar baseGrammar, Sentence sentence) {
-    super(baseGrammar.joshuaConfiguration);
-    this.baseGrammar = baseGrammar;
-    this.sentence = sentence;
-    this.tokens = sentence.getWordIDs();
-
-    int origCount = getNumRules(baseGrammar.getTrieRoot());
-    long startTime = System.currentTimeMillis();
-
-    /* Filter the rules; returns non-null object */
-    this.filteredTrie = filter(baseGrammar.getTrieRoot());
-    int filteredCount = getNumRules();
-
-    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
-
-    System.err.println(String.format(
-        "Sentence-level filtering of sentence %d (%d -> %d rules) in %.3f seconds", sentence.id(),
-        origCount, filteredCount, seconds));
-  }
-
-  @Override
-  public Trie getTrieRoot() {
-    return filteredTrie;
-  }
-
-  /**
-   * This function is poorly named: it doesn't mean whether a rule exists in the grammar for the
-   * current span, but whether the grammar is permitted to apply rules to the current span (a
-   * grammar-level parameter). As such we can just chain to the underlying grammar.
-   */
-  @Override
-  public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
-    return baseGrammar.hasRuleForSpan(startIndex, endIndex, pathLength);
-  }
-
-  @Override
-  public int getNumRules() {
-    return getNumRules(getTrieRoot());
-  }
-
-  /**
-   * A convenience function that counts the number of rules in a grammar's trie.
-   * 
-   * @param node
-   * @return
-   */
-  public int getNumRules(Trie node) {
-    int numRules = 0;
-    if (node != null) {
-      if (node.getRuleCollection() != null)
-        numRules += node.getRuleCollection().getRules().size();
-
-      if (node.getExtensions() != null)
-        for (Trie child : node.getExtensions())
-          numRules += getNumRules(child);
-    }
-
-    return numRules;
-  }
-
-  @Override
-  public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores,
-      int aritity) {
-    // TODO Auto-generated method stub
-    return null;
-  }
-
-  @Override
-  public boolean isRegexpGrammar() {
-    return false;
-  }
-
-  /**
-   * What is the algorithm?
-   * 
-   * Take the first word of the sentence, and start at the root of the trie. There are two things to
-   * consider: (a) word matches and (b) nonterminal matches.
-   * 
-   * For a word match, simply follow that arc along the trie. We create a parallel arc in our
-   * filtered grammar to represent it. Each arc in the filtered trie knows about its
-   * corresponding/underlying node in the unfiltered grammar trie.
-   * 
-   * A nonterminal is always permitted to match. The question then is how much of the input sentence
-   * we imagine it consumed. The answer is that it could have been any amount. So the recursive call
-   * has to be a set of calls, one each to the next trie node with different lengths of the sentence
-   * remaining.
-   * 
-   * A problem occurs when we have multiple sequential nonterminals. For scope-3 grammars, there can
-   * be four sequential nonterminals (in the case when they are grounded by terminals on both ends
-   * of the nonterminal chain). We'd like to avoid looking at all possible ways to split up the
-   * subsequence, because with respect to filtering rules, they are all the same.
-   * 
-   * We accomplish this with the following restriction: for purposes of grammar filtering, only the
-   * first in a sequence of nonterminal traversals can consume more than one word. Each of the
-   * subsequent ones would have to consume just one word. We then just have to record in the
-   * recursive call whether the last traversal was a nonterminal or not.
-   * 
-   * @return the root of the filtered trie
-   */
-  private SentenceFilteredTrie filter(Trie unfilteredTrieRoot) {
-    SentenceFilteredTrie filteredTrieRoot = new SentenceFilteredTrie(unfilteredTrieRoot);
-
-    // System.err.println(String.format("FILTERING TO SENTENCE\n  %s\n",
-    // Vocabulary.getWords(tokens)));
-
-    /*
-     * The root of the trie is where rule applications start, so we simply try all possible
-     * positions in the sentence.
-     */
-    for (int i = 0; i < tokens.length; i++) {
-      filter(i, filteredTrieRoot, false);
-    }
-
-    return filteredTrieRoot;
-  }
-
-  /**
-   * Matches rules against the sentence. Intelligently handles chains of sequential nonterminals.
-   * Marks arcs that are traversable for this sentence.
-   * 
-   * @param i the position in the sentence to start matching
-   * @param trie the trie node to match against
-   * @param lastWasNT true if the match that brought us here was against a nonterminal
-   */
-  private void filter(int i, SentenceFilteredTrie trieNode, boolean lastWasNT) {
-    if (i >= tokens.length)
-      return;
-
-    /* Make sure the underlying unfiltered node has children. */
-    Trie unfilteredTrieNode = trieNode.unfilteredTrieNode;
-    if (unfilteredTrieNode.getChildren() == null) {
-      // trieNode.path.retreat();
-      return;
-    }
-
-    /* Match a word */
-    Trie trie = unfilteredTrieNode.match(tokens[i]);
-    if (trie != null) {
-      /*
-       * The current filtered node might already have an arc for this label. If so, retrieve it
-       * (since we still need to follow it); if not, create it.
-       */
-      SentenceFilteredTrie nextFilteredTrie = trieNode.match(tokens[i]);
-      if (nextFilteredTrie == null) {
-        nextFilteredTrie = new SentenceFilteredTrie(trie);
-        trieNode.children.put(tokens[i], nextFilteredTrie);
-      }
-
-      /*
-       * Now continue, trying to match the child node against the next position in the sentence. The
-       * third argument records that this match was not against a nonterminal.
-       */
-      filter(i + 1, nextFilteredTrie, false);
-    }
-
-    /*
-     * Now we attempt to match nonterminals. Any nonterminal is permitted to match any region of the
-     * sentence, up to the maximum span for that grammar. So we enumerate all children of the
-     * current (unfiltered) trie grammar node, looking for nonterminals (items whose label value is
-     * less than 0), then recurse.
-     * 
-     * There is one subtlely. Adjacent nonterminals in a grammar rule can match a span (i, j) in (j
-     * - i - 1) ways, but for purposes of determining whether a rule fits, this is all wasted
-     * effort. To handle this, we allow the first nonterminal in a sequence to record 1, 2, 3, ...
-     * terminals (up to the grammar's span limit, or the rest of the sentence, whichever is
-     * shorter). Subsequent adjacent nonterminals are permitted to consume only a single terminal.
-     */
-    HashMap<Integer, ? extends Trie> children = unfilteredTrieNode.getChildren();
-    if (children != null) {
-      for (int label : children.keySet()) {
-        if (label < 0) {
-          SentenceFilteredTrie nextFilteredTrie = trieNode.match(label);
-          if (nextFilteredTrie == null) {
-            nextFilteredTrie = new SentenceFilteredTrie(unfilteredTrieNode.match(label));
-            trieNode.children.put(label, nextFilteredTrie);
-          }
-
-          /*
-           * Recurse. If the last match was a nonterminal, we can only consume one more token.
-           * 
-           * TODO: This goes too far by looking at the whole sentence; each grammar has a maximum
-           * span limit which should be consulted. What we should be doing is passing the point
-           * where we started matching the current sentence, so we can apply this span limit, which
-           * is easily accessible (baseGrammar.spanLimit).
-           */
-          int maxJ = lastWasNT ? (i + 1) : tokens.length;
-          for (int j = i + 1; j <= maxJ; j++) {
-            filter(j, nextFilteredTrie, true);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Alternate filter that uses regular expressions, walking the grammar trie and matching the
-   * source side of each rule collection against the input sentence. Failed matches are discarded,
-   * and trie nodes extending from that position need not be explored.
-   * 
-   * @return the root of the filtered trie if any rules were retained, otherwise null
-   */
-  @SuppressWarnings("unused")
-  private SentenceFilteredTrie filter_regexp(Trie unfilteredTrie) {
-    SentenceFilteredTrie trie = null;
-
-    /* Case 1: keep the trie node if it has a rule collection that matches the sentence */
-    if (unfilteredTrie.hasRules())
-      if (matchesSentence(unfilteredTrie))
-        trie = new SentenceFilteredTrie(unfilteredTrie);
-      else
-        return null;
-
-    /* Case 2: keep the trie node if it has children who have valid rule collections */
-    if (unfilteredTrie.hasExtensions())
-      for (Entry<Integer, ? extends Trie> arc : unfilteredTrie.getChildren().entrySet()) {
-        Trie unfilteredChildTrie = arc.getValue();
-        SentenceFilteredTrie nextTrie = filter_regexp(unfilteredChildTrie);
-        if (nextTrie != null) {
-          if (trie == null)
-            trie = new SentenceFilteredTrie(unfilteredTrie);
-          trie.children.put(arc.getKey(), nextTrie);
-        }
-      }
-
-    return trie;
-  }
-
-  private boolean matchesSentence(Trie childTrie) {
-    Rule rule = childTrie.getRuleCollection().getRules().get(0);
-    return rule.matches(sentence);
-  }
-
-  /**
-   * Implements a filtered trie, by sitting on top of a base trie and annotating nodes that match
-   * the given input sentence.
-   * 
-   * @author Matt Post <po...@cs.jhu.edu>
-   * 
-   */
-  public class SentenceFilteredTrie implements Trie {
-
-    /* The underlying unfiltered trie node. */
-    private Trie unfilteredTrieNode;
-
-    /* The child nodes in the filtered trie. */
-    private HashMap<Integer, SentenceFilteredTrie> children = null;
-
-    /**
-     * Constructor.
-     * 
-     * @param trieRoot
-     * @param source
-     */
-    public SentenceFilteredTrie(Trie unfilteredTrieNode) {
-      this.unfilteredTrieNode = unfilteredTrieNode;
-      this.children = new HashMap<Integer, SentenceFilteredTrie>();
-    }
-
-    @Override
-    public SentenceFilteredTrie match(int wordID) {
-      if (children != null)
-        return children.get(wordID);
-      return null;
-    }
-
-    @Override
-    public boolean hasExtensions() {
-      return children != null;
-    }
-
-    @Override
-    public Collection<SentenceFilteredTrie> getExtensions() {
-      if (children != null)
-        return children.values();
-
-      return null;
-    }
-
-    @Override
-    public HashMap<Integer, SentenceFilteredTrie> getChildren() {
-      return children;
-    }
-
-    @Override
-    public boolean hasRules() {
-      // Chain to the underlying unfiltered node.
-      return unfilteredTrieNode.hasRules();
-    }
-
-    @Override
-    public RuleCollection getRuleCollection() {
-      // Chain to the underlying unfiltered node, since the rule collection just varies by target
-      // side.
-      return unfilteredTrieNode.getRuleCollection();
-    }
-
-    /**
-     * Counts the number of rules.
-     * 
-     * @return the number of rules rooted at this node.
-     */
-    public int getNumRules() {
-      int numRules = 0;
-      if (getTrieRoot() != null)
-        if (getTrieRoot().getRuleCollection() != null)
-          numRules += getTrieRoot().getRuleCollection().getRules().size();
-
-      for (SentenceFilteredTrie node : getExtensions())
-        numRules += node.getNumRules();
-
-      return numRules;
-    }
-
-    @Override
-    public Iterator<Integer> getTerminalExtensionIterator() {
-      return new ExtensionIterator(children, true);
-    }
-
-    @Override
-    public Iterator<Integer> getNonterminalExtensionIterator() {
-      return new ExtensionIterator(children, false);
-    }
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/Trie.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Trie.java b/src/joshua/decoder/ff/tm/Trie.java
deleted file mode 100644
index df481d6..0000000
--- a/src/joshua/decoder/ff/tm/Trie.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-
-/**
- * An interface for trie-like data structures.
- * 
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Zhifei Li, <zh...@gmail.com>
- */
-public interface Trie {
-
-  /**
-   * Traverse one ply further down the trie. If there is no match, the result is null.
-   * 
-   * @param wordID
-   * @return Child node of this trie
-   */
-  Trie match(int wordID);
-
-  
-  /**
-   * Returns whether matchOne(Symbol) could succeed for any symbol.
-   * 
-   * @return <code>true</code> if {@link #match(int)} could succeed for some symbol,
-   *         <code>false</code> otherwise
-   */
-  boolean hasExtensions();
-
-
-  /**
-   * If the trie node has extensions, then return a list of extended trie nodes, otherwise return
-   * null.
-   * 
-   * @return A list of extended <code>Trie</code> nodes if this node has extensions,
-   *         <code>null<code>
-   *         otherwise
-   */
-  Collection<? extends Trie> getExtensions();
-
-
-  /**
-   * If the trie node has extensions, get a list of their labels.
-   * 
-   * @return
-   */
-  HashMap<Integer,? extends Trie> getChildren();
-
-  /**
-   * Returns an iterator over the trie node's extensions with terminal labels.
-   * 
-   * @return
-   */
-  Iterator<Integer> getTerminalExtensionIterator();
-  
-  /**
-   * Returns an iterator over the trie node's extensions with nonterminal labels.
-   * 
-   * @return
-   */
-  Iterator<Integer> getNonterminalExtensionIterator();
-  
-  
-  /**
-   * Gets whether the current node/state is a "final state" that has matching rules.
-   * 
-   * @return <code>true</code> if the current node/state is a "final state" that has matching rules,
-   *         <code>false</code> otherwise
-   */
-  boolean hasRules();
-
-
-  /**
-   * Retrieve the rules at the current node/state. The implementation of this method must adhere to
-   * the following laws:
-   * 
-   * <ol>
-   * <li>The return value is always non-null. The collection may be empty however.</li>
-   * <li>The collection must be empty if hasRules() is false, and must be non-empty if hasRules() is
-   * true.</li>
-   * <li>The collection must be sorted (at least as used by TMGrammar)</li>
-   * </ol>
-   */
-  RuleCollection getRuleCollection();
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/UnsortedRuleCollectionException.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/UnsortedRuleCollectionException.java b/src/joshua/decoder/ff/tm/UnsortedRuleCollectionException.java
deleted file mode 100644
index 71fe6b2..0000000
--- a/src/joshua/decoder/ff/tm/UnsortedRuleCollectionException.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm;
-
-/**
- * Unchecked runtime exception thrown to indicate that a collection of rules has not been properly
- * sorted according to the feature functions in effect.
- * 
- * @author Lane Schwartz
- */
-public class UnsortedRuleCollectionException extends RuntimeException {
-
-  private static final long serialVersionUID = -4819014771607378835L;
-
-  /**
-   * Constructs an <code>UnsortedRuleCollectionException</code> with the specified detail message.
-   * 
-   * @param message the detail message
-   */
-  public UnsortedRuleCollectionException(String message) {
-    super(message);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java b/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
deleted file mode 100644
index a47813d..0000000
--- a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm.format;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.GrammarReader;
-import joshua.decoder.ff.tm.Rule;
-
-/**
- * This class implements reading files in the format defined by David Chiang for Hiero. 
- * 
- * @author Unknown
- * @author Matt Post <po...@cs.jhu.edu>
- */
-
-public class HieroFormatReader extends GrammarReader<Rule> {
-
-  static {
-    fieldDelimiter = "\\s\\|{3}\\s";
-    nonTerminalRegEx = "^\\[[^\\s]+\\,[0-9]*\\]$";
-    nonTerminalCleanRegEx = ",[0-9\\s]+";
-    // nonTerminalRegEx = "^\\[[A-Z]+\\,[0-9]*\\]$";
-    // nonTerminalCleanRegEx = "[\\[\\]\\,0-9\\s]+";
-    description = "Original Hiero format";
-  }
-
-  public HieroFormatReader() {
-    super();
-  }
-
-  public HieroFormatReader(String grammarFile) {
-    super(grammarFile);
-  }
-
-  @Override
-  public Rule parseLine(String line) {
-    String[] fields = line.split(fieldDelimiter);
-    if (fields.length < 3) {
-      throw new RuntimeException(String.format("Rule '%s' does not have four fields", line));
-    }
-
-    int lhs = Vocabulary.id(cleanNonTerminal(fields[0]));
-
-    int arity = 0;
-    // foreign side
-    String[] foreignWords = fields[1].split("\\s+");
-    int[] french = new int[foreignWords.length];
-    for (int i = 0; i < foreignWords.length; i++) {
-      french[i] = Vocabulary.id(foreignWords[i]);
-      if (Vocabulary.nt(french[i])) {
-        arity++;
-        french[i] = cleanNonTerminal(french[i]);
-      }
-    }
-
-    // English side
-    String[] englishWords = fields[2].split("\\s+");
-    int[] english = new int[englishWords.length];
-    for (int i = 0; i < englishWords.length; i++) {
-      english[i] = Vocabulary.id(englishWords[i]);
-      if (Vocabulary.nt(english[i])) {
-        english[i] = -Vocabulary.getTargetNonterminalIndex(english[i]);
-      }
-    }
-
-    String sparse_features = (fields.length > 3 ? fields[3] : "");
-    String alignment = (fields.length > 4) ? fields[4] : null;
-
-    return new Rule(lhs, french, english, sparse_features, arity, alignment);
-  }
-
-  @Override
-  public String toWords(Rule rule) {
-    StringBuffer sb = new StringBuffer("");
-    sb.append(Vocabulary.word(rule.getLHS()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getFrench()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getEnglish()));
-    sb.append(" |||");
-    sb.append(" " + rule.getFeatureVector());
-
-    return sb.toString();
-  }
-
-  @Override
-  public String toWordsWithoutFeatureScores(Rule rule) {
-    StringBuffer sb = new StringBuffer();
-    sb.append(rule.getLHS());
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getFrench()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getEnglish()));
-    sb.append(" |||");
-
-    return sb.toString();
-  }
-
-
-  public static String getFieldDelimiter() {
-    return fieldDelimiter;
-  }
-
-  public static boolean isNonTerminal(final String word) {
-    return GrammarReader.isNonTerminal(word);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java b/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java
deleted file mode 100644
index be4d522..0000000
--- a/src/joshua/decoder/ff/tm/format/PhraseFormatReader.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm.format;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.PhraseRule;
-import joshua.util.io.LineReader;
-
-/***
- * This class reads in the Moses phrase table format, with support for the source and target side,
- * list of features, and word alignments. It works by simply casting the phrase-based rules to
- * left-branching hierarchical rules and passing them on to its parent class, {@HieroFormatReader}.
- * 
- * There is also a tool to convert the grammars directly, so that they can be suitably packed. Usage:
- * 
- * <pre>
- *     cat PHRASE_TABLE | java -cp $JOSHUA/class joshua.decoder.ff.tm.format.PhraseFormatReader > grammar
- * </pre>
- * 
- * @author Matt Post <po...@cs.jhu.edu>
- *
- */
-
-public class PhraseFormatReader extends HieroFormatReader {
-
-  private int lhs;
-  
-  /* Whether we are reading a Moses phrase table or Thrax phrase table */
-  private boolean moses_format = false;
-
-  public PhraseFormatReader(String grammarFile, boolean is_moses) {
-    super(grammarFile);
-    this.lhs = Vocabulary.id("[X]");
-    this.moses_format = is_moses;
-  }
-  
-  public PhraseFormatReader() {
-    super();
-    this.lhs = Vocabulary.id("[X]");
-  }
-  
-  /**
-   * When dealing with Moses format, this munges a Moses-style phrase table into a grammar.
-   * 
-   *    mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
-   *    
-   * becomes
-   * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3  ||| 0-1 1-0
-   *    
-   * For thrax-extracted phrasal grammars, it transforms
-   * 
-   *    [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
-   *
-   * into
-   * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
-   */
-  @Override
-  public PhraseRule parseLine(String line) {
-    String[] fields = line.split(fieldDelimiter);
-
-    int arity = 1;
-    
-    /* For Thrax phrase-based grammars, skip over the beginning nonterminal */
-    int fieldIndex = 0;
-    if (! moses_format)
-      fieldIndex++;
-    
-    // foreign side
-    String[] foreignWords = fields[fieldIndex].split("\\s+");
-    int[] french = new int[foreignWords.length + 1];
-    french[0] = lhs; 
-    for (int i = 0; i < foreignWords.length; i++) {
-      french[i+1] = Vocabulary.id(foreignWords[i]);
-    }
-
-    // English side
-    fieldIndex++;
-    String[] englishWords = fields[fieldIndex].split("\\s+");
-    int[] english = new int[englishWords.length + 1];
-    english[0] = -1;
-    for (int i = 0; i < englishWords.length; i++) {
-      english[i+1] = Vocabulary.id(englishWords[i]);
-    }
-
-    // transform feature values
-    fieldIndex++;
-    String sparse_features = fields[fieldIndex];
-
-//    System.out.println(String.format("parseLine: %s\n  ->%s", line, sparse_features));
-
-    // alignments
-    fieldIndex++;
-    String alignment = (fields.length > fieldIndex) ? fields[fieldIndex] : null;
-
-    return new PhraseRule(lhs, french, english, sparse_features, arity, alignment);
-  }
-  
-  /**
-   * Converts a Moses phrase table to a Joshua grammar. 
-   * 
-   * @param args
-   */
-  public static void main(String[] args) {
-    PhraseFormatReader reader = new PhraseFormatReader();
-    for (String line: new LineReader(System.in)) {
-      PhraseRule rule = reader.parseLine(line);
-      System.out.println(rule.textFormat());
-    }    
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/format/SamtFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/SamtFormatReader.java b/src/joshua/decoder/ff/tm/format/SamtFormatReader.java
deleted file mode 100644
index 6539d38..0000000
--- a/src/joshua/decoder/ff/tm/format/SamtFormatReader.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm.format;
-
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.GrammarReader;
-
-public class SamtFormatReader extends GrammarReader<Rule> {
-
-  private static final Logger logger = Logger.getLogger(SamtFormatReader.class.getName());
-
-  private static final String samtNonTerminalMarkup;
-
-  static {
-    fieldDelimiter = "#";
-    nonTerminalRegEx = "^@[^\\s]+";
-    nonTerminalCleanRegEx = ",[0-9\\s]+";
-
-    samtNonTerminalMarkup = "@";
-
-    description = "Original SAMT format";
-  }
-
-  public SamtFormatReader(String grammarFile) {
-    super(grammarFile);
-  }
-
-  // Format example:
-  // @VZ-HD @APPR-DA+ART-DA minutes#@2 protokoll @1#@PP-MO+VZ-HD#0 1 1 -0 0.5 -0
-
-  @Override
-  protected Rule parseLine(String line) {
-    String[] fields = line.split(fieldDelimiter);
-    if (fields.length != 4) {
-      logger.severe("Rule line does not have four fields: " + line);
-      logger.severe("Skipped.");
-      return null;
-    }
-
-    int lhs = Vocabulary.id(adaptNonTerminalMarkup(fields[2]));
-
-    int arity = 0;
-
-    // foreign side
-    String[] foreignWords = fields[0].split("\\s+");
-    int[] french = new int[foreignWords.length];
-    for (int i = 0; i < foreignWords.length; i++) {
-      if (isNonTerminal(foreignWords[i])) {
-        arity++;
-        french[i] = Vocabulary.id(adaptNonTerminalMarkup(foreignWords[i], arity));
-      } else {
-        french[i] = Vocabulary.id(foreignWords[i]);
-      }
-    }
-
-    // english side
-    String[] englishWords = fields[1].split("\\s+");
-    int[] english = new int[englishWords.length];
-    for (int i = 0; i < englishWords.length; i++) {
-      if (isNonTerminal(englishWords[i])) {
-        english[i] = -Integer.parseInt(cleanSamtNonTerminal(englishWords[i]));
-      } else {
-        english[i] = Vocabulary.id(englishWords[i]);
-      }
-    }
-
-    // feature scores
-    String sparseFeatures = fields[3];
-
-    return new Rule(lhs, french, english, sparseFeatures, arity);
-  }
-
-  protected String cleanSamtNonTerminal(String word) {
-    // changes SAMT markup to Hiero-style
-    return word.replaceAll(samtNonTerminalMarkup, "");
-  }
-
-  protected String adaptNonTerminalMarkup(String word) {
-    // changes SAMT markup to Hiero-style
-    return "["
-        + word.replaceAll(",", "_COMMA_").replaceAll("\\$", "_DOLLAR_")
-            .replaceAll(samtNonTerminalMarkup, "") + "]";
-  }
-
-  protected String adaptNonTerminalMarkup(String word, int ntIndex) {
-    // changes SAMT markup to Hiero-style
-    return "["
-        + word.replaceAll(",", "_COMMA_").replaceAll("\\$", "_DOLLAR_")
-            .replaceAll(samtNonTerminalMarkup, "") + "," + ntIndex + "]";
-  }
-
-  @Override
-  public String toWords(Rule rule) {
-    StringBuffer sb = new StringBuffer();
-    sb.append(Vocabulary.word(rule.getLHS()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getFrench()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getEnglish()));
-    sb.append(" ||| " + rule.getFeatureString());
-
-    return sb.toString();
-  }
-
-  @Override
-  public String toWordsWithoutFeatureScores(Rule rule) {
-    StringBuffer sb = new StringBuffer();
-    sb.append(Vocabulary.word(rule.getLHS()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getFrench()));
-    sb.append(" ||| ");
-    sb.append(Vocabulary.getWords(rule.getEnglish()));
-    sb.append(" |||");
-
-    return sb.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java b/src/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
deleted file mode 100644
index d6b5b97..0000000
--- a/src/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff.tm.hash_based;
-
-import java.util.HashMap;
-import java.util.Iterator;
-
-public class ExtensionIterator implements Iterator<Integer> {
-
-  private Iterator<Integer> iterator;
-  private boolean terminal;
-  private boolean done;
-  private int next;
-
-  public ExtensionIterator(HashMap<Integer, ?> map, boolean terminal) {
-    this.terminal = terminal;
-    done = false;
-    if (map == null) {
-      done = true;
-    } else {
-      this.iterator = map.keySet().iterator();
-      forward();
-    }
-  }
-
-  private void forward() {
-    if (done)
-      return;
-    while (iterator.hasNext()) {
-      int candidate = iterator.next();
-      if ((terminal && candidate > 0) || (!terminal && candidate < 0)) {
-        next = candidate;
-        return;
-      }
-    }
-    done = true;
-  }
-
-  @Override
-  public boolean hasNext() {
-    return !done;
-  }
-
-  @Override
-  public Integer next() {
-    if (done)
-      throw new RuntimeException();
-    int consumed = next;
-    forward();
-    return consumed;
-  }
-
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-}