You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:54 UTC

[38/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/PhraseTable.java b/src/joshua/decoder/phrase/PhraseTable.java
deleted file mode 100644
index bcf7135..0000000
--- a/src/joshua/decoder/phrase/PhraseTable.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.tm.Grammar;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.RuleCollection;
-import joshua.decoder.ff.tm.Trie;
-import joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
-import joshua.decoder.ff.tm.packed.PackedGrammar;
-
-/**
- * Represents a phrase table, and is implemented as a wrapper around either a {@link PackedGrammar}
- * or a {@link MemoryBasedBatchGrammar}.
- * 
- * TODO: this should all be implemented as a two-level trie (source trie and target trie).
- */
-public class PhraseTable implements Grammar {
-  
-  private JoshuaConfiguration config;
-  private Grammar backend;
-  
-  /**
-   * Chain to the super with a number of defaults. For example, we only use a single nonterminal,
-   * and there is no span limit.
-   * 
-   * @param grammarFile
-   * @param owner
-   * @param config
-   * @throws IOException
-   */
-  public PhraseTable(String grammarFile, String owner, String type, JoshuaConfiguration config, int maxSource) 
-      throws IOException {
-    this.config = config;
-    int spanLimit = 0;
-    
-    if (grammarFile != null && new File(grammarFile).isDirectory()) {
-      this.backend = new PackedGrammar(grammarFile, spanLimit, owner, type, config);
-      if (this.backend.getMaxSourcePhraseLength() == -1) {
-        System.err.println("FATAL: Using a packed grammar for a phrase table backend requires that you");
-        System.err.println("       packed the grammar with Joshua 6.0.2 or greater");
-        System.exit(-1);
-      }
-
-    } else {
-      this.backend = new MemoryBasedBatchGrammar(type, grammarFile, owner, "[X]", spanLimit, config);
-    }
-  }
-  
-  public PhraseTable(String owner, JoshuaConfiguration config) {
-    this.config = config;
-    
-    this.backend = new MemoryBasedBatchGrammar(owner, config);
-  }
-      
-  /**
-   * Returns the longest source phrase read. For {@link MemoryBasedBatchGrammar}s, we subtract 1
-   * since the grammar includes the nonterminal. For {@link PackedGrammar}s, the value was either
-   * in the packed config file (Joshua 6.0.2+) or was passed in via the TM config line.
-   * 
-   * @return
-   */
-  @Override
-  public int getMaxSourcePhraseLength() {
-    if (backend instanceof MemoryBasedBatchGrammar)
-      return this.backend.getMaxSourcePhraseLength() - 1;
-    else
-      return this.backend.getMaxSourcePhraseLength();
-  }
-
-  /**
-   * Collect the set of target-side phrases associated with a source phrase.
-   * 
-   * @param sourceWords the sequence of source words
-   * @return the rules
-   */
-  public RuleCollection getPhrases(int[] sourceWords) {
-    if (sourceWords.length != 0) {
-      Trie pointer = getTrieRoot();
-      if (! (backend instanceof PackedGrammar))
-        pointer = pointer.match(Vocabulary.id("[X]"));
-      int i = 0;
-      while (pointer != null && i < sourceWords.length)
-        pointer = pointer.match(sourceWords[i++]);
-
-      if (pointer != null && pointer.hasRules()) {
-        return pointer.getRuleCollection();
-      }
-    }
-
-    return null;
-  }
-
-  /**
-   * Adds a rule to the grammar. Only supported when the backend is a MemoryBasedBatchGrammar.
-   * 
-   * @param rule the rule to add
-   */
-  public void addRule(Rule rule) {
-    ((MemoryBasedBatchGrammar)backend).addRule(rule);
-  }
-  
-  @Override
-  public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
-    // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now almost
-    // certainly is)
-    int targetWord = config.mark_oovs
-        ? Vocabulary.id(Vocabulary.word(sourceWord) + "_OOV")
-        : sourceWord;   
-
-    int nt_i = Vocabulary.id("[X]");
-    Rule oovRule = new Rule(nt_i, new int[] { nt_i, sourceWord },
-        new int[] { -1, targetWord }, "", 1, null);
-    addRule(oovRule);
-    oovRule.estimateRuleCost(featureFunctions);
-        
-//    String ruleString = String.format("[X] ||| [X,1] %s ||| [X,1] %s", 
-//        Vocabulary.word(sourceWord), Vocabulary.word(targetWord));
-//    BilingualRule oovRule = new HieroFormatReader().parseLine(ruleString);
-//    oovRule.setOwner(Vocabulary.id("oov"));
-//    addRule(oovRule);
-//    oovRule.estimateRuleCost(featureFunctions);
-  }
-
-  @Override
-  public Trie getTrieRoot() {
-    return backend.getTrieRoot();
-  }
-
-  @Override
-  public void sortGrammar(List<FeatureFunction> models) {
-    backend.sortGrammar(models);    
-  }
-
-  @Override
-  public boolean isSorted() {
-    return backend.isSorted();
-  }
-
-  /**
-   * This should never be called. 
-   */
-  @Override
-  public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
-    return true;
-  }
-
-  @Override
-  public int getNumRules() {
-    return backend.getNumRules();
-  }
-
-  @Override
-  public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores,
-      int arity) {
-    return backend.constructManualRule(lhs,  sourceWords, targetWords, scores, arity);
-  }
-
-  @Override
-  public void writeGrammarOnDisk(String file) {
-    backend.writeGrammarOnDisk(file);
-  }
-
-  @Override
-  public boolean isRegexpGrammar() {
-    return backend.isRegexpGrammar();
-  }
-
-  @Override
-  public int getOwner() {
-    return backend.getOwner();
-  }
-
-  @Override
-  public int getNumDenseFeatures() {
-    return backend.getNumDenseFeatures();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Stack.java b/src/joshua/decoder/phrase/Stack.java
deleted file mode 100644
index 88b529a..0000000
--- a/src/joshua/decoder/phrase/Stack.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.Set;
-
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.chart_parser.ComputeNodeResult;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * Organizes all hypotheses containing the same number of source words. 
- *
- */
-public class Stack extends ArrayList<Hypothesis> {
-  
-  private static final long serialVersionUID = 7885252799032416068L;
-
-  private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
-  
-  private Sentence sentence;
-  private List<FeatureFunction> featureFunctions;
-  private JoshuaConfiguration config;
-
-  /* The list of states we've already visited. */
-  private HashSet<Candidate> visitedStates;
-  
-  /* A list of candidates sorted for consideration for entry to the chart (for cube pruning) */
-  private PriorityQueue<Candidate> candidates;
-  
-  /* Short-circuits adding a cube-prune state more than once */
-  private HashMap<Hypothesis, Hypothesis> deduper;
-  
-  /**
-   * Create a new stack. Stacks are organized one for each number of source words that are covered.
-   * 
-   * @param featureFunctions
-   * @param sentence
-   * @param config
-   */
-  public Stack(List<FeatureFunction> featureFunctions, Sentence sentence, JoshuaConfiguration config) {
-    this.featureFunctions = featureFunctions;
-    this.sentence = sentence;
-    this.config = config;
-    
-    this.candidates = new PriorityQueue<Candidate>(1, new CandidateComparator());
-    this.coverages = new HashMap<Coverage, ArrayList<Hypothesis>>();
-    this.visitedStates = new HashSet<Candidate>();
-    this.deduper = new HashMap<Hypothesis,Hypothesis>();
-  }
-
-  /**
-   * A Stack is an ArrayList; here, we intercept the add so we can maintain a list of the items
-   * stored under each distinct coverage vector
-   */
-  @Override
-  public boolean add(Hypothesis hyp) {
-    
-    if (! coverages.containsKey((hyp.getCoverage())))
-      coverages.put(hyp.getCoverage(), new ArrayList<Hypothesis>()); 
-    coverages.get(hyp.getCoverage()).add(hyp);
-    
-    return super.add(hyp);
-  }
-  
-  /**
-   * Intercept calls to remove() so that we can reduce the coverage vector
-   */
-  @Override
-  public boolean remove(Object obj) {
-    boolean found = super.remove(obj);
-    if (found) {
-      Hypothesis item = (Hypothesis) obj;
-      Coverage cov = item.getCoverage();
-      assert coverages.get(cov).remove(obj);
-      if (coverages.get(cov).size() == 0)
-        coverages.remove(cov);
-    }
-    return found;
-  }
-  
-  /** 
-   * Returns the set of coverages contained in this stack. This is used to iterate over them
-   * in the main decoding loop in Stacks.java.
-   */
-  public Set<Coverage> getCoverages() {
-    return coverages.keySet();
-  }
-  
-  /**
-   * Get all items with the same coverage vector.
-   * 
-   * @param cov
-   * @return
-   */
-  public ArrayList<Hypothesis> get(Coverage cov) {
-    ArrayList<Hypothesis> list = coverages.get(cov);
-    Collections.sort(list);
-    return list;
-  }
-  
-  /**
-   * Receives a partially-initialized translation candidate and places it on the
-   * priority queue after scoring it with all of the feature functions. In this
-   * respect it is like {@link CubePruneState} (it could make use of that class with
-   * a little generalization of spans / coverage).
-   * 
-   * This function is also used to (fairly concisely) implement constrained decoding. Before
-   * adding a candidate, we ensure that the sequence of English words match the sentence. If not,
-   * the code extends the dot in the cube-pruning chart to the next phrase, since that one might
-   * be a match.
-   * 
-   * @param cand
-   */
-  public void addCandidate(Candidate cand) {
-    if (visitedStates.contains(cand))
-      return;
-    
-    visitedStates.add(cand);
-
-    // Constrained decoding
-    if (sentence.target() != null) {
-      String oldWords = cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] ",  "");
-      String newWords = cand.getRule().getEnglishWords().replace("[X,1] ",  "");
-          
-      // If the string is not found in the target sentence, explore the cube neighbors
-      if (sentence.fullTarget().indexOf(oldWords + " " + newWords) == -1) {
-        Candidate next = cand.extendPhrase();
-        if (next != null)
-          addCandidate(next); 
-        return;
-      }
-    }
-
-    // TODO: sourcepath
-    ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, cand.getRule(),
-        cand.getTailNodes(), -1, cand.getSpan().end, null, this.sentence);
-    cand.setResult(result);
-    
-    candidates.add(cand);
-  }
-  
-  /**
-   * Cube pruning. Repeatedly pop the top candidate, creating a new hyperedge from it, adding it to
-   * the k-best list, and then extending the list of candidates with extensions of the current
-   * candidate.
-   * 
-   * @param context
-   * @param output
-   */
-  public void search() {
-    int to_pop = config.pop_limit;
-    
-    if (Decoder.VERBOSE >= 3) {
-      System.err.println("Stack::search(): pop: " + to_pop + " size: " + candidates.size());
-      for (Candidate c: candidates)
-        System.err.println("  " + c);
-    }
-    while (to_pop > 0 && !candidates.isEmpty()) {
-      Candidate got = candidates.poll();
-      if (got != null) {
-        addHypothesis(got);
-        --to_pop;
-        
-        for (Candidate c : got.extend())
-          if (c != null) {
-            addCandidate(c);
-          }
-      }
-    }
-  }
-
-  /**
-   * Adds a popped candidate to the chart / main stack. This is a candidate we have decided to
-   * keep around.
-   * 
-   */
-  public void addHypothesis(Candidate complete) {
-    Hypothesis added = new Hypothesis(complete);
-    
-    if (deduper.containsKey(added)) {
-      Hypothesis existing = deduper.get(added);
-      existing.absorb(added);
-      
-      if (Decoder.VERBOSE >= 3) {
-        System.err.println(String.format("recombining hypothesis from ( ... %s )", complete.getHypothesis().getRule().getEnglishWords()));
-        System.err.println(String.format("        base score %.3f", complete.getResult().getBaseCost()));
-        System.err.println(String.format("        covering %d-%d", complete.getSpan().start - 1, complete.getSpan().end - 2));
-        System.err.println(String.format("        translated as: %s", complete.getRule().getEnglishWords()));
-        System.err.println(String.format("        score %.3f + future cost %.3f = %.3f", 
-            complete.getResult().getTransitionCost(), complete.getFutureEstimate(),
-            complete.getResult().getTransitionCost() + complete.getFutureEstimate()));
-      }
-      
-    } else {
-      add(added);
-      deduper.put(added, added);
-      
-      if (Decoder.VERBOSE >= 3) {
-        System.err.println(String.format("creating new hypothesis from ( ... %s )", complete.getHypothesis().getRule().getEnglishWords()));
-        System.err.println(String.format("        base score %.3f", complete.getResult().getBaseCost()));
-        System.err.println(String.format("        covering %d-%d", complete.getSpan().start - 1, complete.getSpan().end - 2));
-        System.err.println(String.format("        translated as: %s", complete.getRule().getEnglishWords()));
-        System.err.println(String.format("        score %.3f + future cost %.3f = %.3f", 
-            complete.getResult().getTransitionCost(), complete.getFutureEstimate(),
-            complete.getResult().getTransitionCost() + complete.getFutureEstimate()));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Stacks.java b/src/joshua/decoder/phrase/Stacks.java
deleted file mode 100644
index eda7d8b..0000000
--- a/src/joshua/decoder/phrase/Stacks.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-/***
- * Entry point for phrase-based decoding, analogous to {@link Chart} for the CKY algorithm. This
- * class organizes all the stacks used for decoding, and is responsible for building them. Stack
- * construction is stack-centric: that is, we loop over the number of source words in increasing sizes;
- * at each step of this iteration, we break the search between smaller stack sizes and source-side
- * phrase sizes.
- * 
- * The end result of decoding is a {@link Hypergraph} with the same format as hierarchical decoding.
- * Phrases are treating as left-branching rules, and the span information (i,j) is overloaded so
- * that i means nothing and j represents the index of the last-translated source word in each
- * hypothesis. This means that most hypergraph code can work without modification. The algorithm 
- * ensures that the coverage vector is consistent but the resulting hypergraph may not be projective,
- * which is different from the CKY algorithm, which does produce projective derivations. 
- * 
- * Lattice decoding is not yet supported (March 2015).
- */
-
-import java.util.ArrayList;
-import java.util.List;
-
-import joshua.corpus.Span;
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.chart_parser.ComputeNodeResult;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.tm.AbstractGrammar;
-import joshua.decoder.ff.tm.Grammar;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.hypergraph.HyperEdge;
-import joshua.decoder.hypergraph.HyperGraph;
-import joshua.decoder.segment_file.Sentence;
-
-public class Stacks {
-
-  // The list of stacks, grouped according to number of source words covered
-  private List<Stack> stacks;
-
-  // The end state
-  private Hypothesis end;
-  
-  List<FeatureFunction> featureFunctions;
-
-  private Sentence sentence;
-
-  private JoshuaConfiguration config;
-
-  /* Contains all the phrase tables */
-  private PhraseChart chart;
-  
-  /**
-   * Entry point. Initialize everything. Create pass-through (OOV) phrase table and glue phrase
-   * table (with start-of-sentence and end-of-sentence rules).
-   * 
-   * @param sentence
-   * @param featureFunctions
-   * @param grammars
-   * @param config
-   */
-  public Stacks(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars, 
-      JoshuaConfiguration config) {
-
-    this.sentence = sentence;
-    this.featureFunctions = featureFunctions;
-    this.config = config;
-    
-    int num_phrase_tables = 0;
-    for (int i = 0; i < grammars.length; i++)
-      if (grammars[i] instanceof PhraseTable)
-        ++num_phrase_tables;
-    
-    PhraseTable[] phraseTables = new PhraseTable[num_phrase_tables + 2];
-    for (int i = 0, j = 0; i < grammars.length; i++)
-      if (grammars[i] instanceof PhraseTable)
-        phraseTables[j++] = (PhraseTable) grammars[i];
-    
-    phraseTables[phraseTables.length - 2] = new PhraseTable("null", config);
-    phraseTables[phraseTables.length - 2].addRule(Hypothesis.END_RULE);
-    
-    phraseTables[phraseTables.length - 1] = new PhraseTable("oov", config);
-    AbstractGrammar.addOOVRules(phraseTables[phraseTables.length - 1], sentence.getLattice(), featureFunctions, config.true_oovs_only);
-    
-    this.chart = new PhraseChart(phraseTables, featureFunctions, sentence, config.num_translation_options);
-  }
-  
-  
-  /**
-   * The main algorithm. Returns a hypergraph representing the search space.
-   * 
-   * @return
-   */
-  public HyperGraph search() {
-    
-    long startTime = System.currentTimeMillis();
-    
-    Future future = new Future(chart);
-    stacks = new ArrayList<Stack>();
-    
-    // <s> counts as the first word. Pushing null lets us count from one.
-    stacks.add(null);
-
-    // Initialize root hypothesis with <s> context and future cost for everything.
-    ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, Hypothesis.BEGIN_RULE,
-        null, -1, 1, null, this.sentence);
-    Stack firstStack = new Stack(featureFunctions, sentence, config);
-    firstStack.add(new Hypothesis(result.getDPStates(), future.Full()));
-    stacks.add(firstStack);
-    
-    // Decode with increasing numbers of source words. 
-    for (int source_words = 2; source_words <= sentence.length(); ++source_words) {
-      Stack targetStack = new Stack(featureFunctions, sentence, config);
-      stacks.add(targetStack);
-
-      // Iterate over stacks to continue from.
-      for (int phrase_length = 1; phrase_length <= Math.min(source_words - 1, chart.MaxSourcePhraseLength());
-          phrase_length++) {
-        int from_stack = source_words - phrase_length;
-        Stack tailStack = stacks.get(from_stack);
-        
-        if (Decoder.VERBOSE >= 3)
-          System.err.println(String.format("\n  WORDS %d MAX %d (STACK %d phrase_length %d)", source_words,
-              chart.MaxSourcePhraseLength(), from_stack, phrase_length));
-        
-        // Iterate over antecedents in this stack.
-        for (Coverage coverage: tailStack.getCoverages()) {
-          ArrayList<Hypothesis> hypotheses = tailStack.get(coverage); 
-          
-          // the index of the starting point of the first possible phrase
-          int begin = coverage.firstZero();
-          
-          // the absolute position of the ending spot of the last possible phrase
-          int last_end = Math.min(coverage.firstZero() + config.reordering_limit, chart.SentenceLength());
-          int last_begin = (last_end > phrase_length) ? (last_end - phrase_length) : 0;
-
-          for (begin = coverage.firstZero(); begin <= last_begin; begin++) {
-            if (!coverage.compatible(begin, begin + phrase_length) ||
-                ! permissible(coverage, begin, begin + phrase_length)) {
-              continue;
-            }
-
-            Span span = new Span(begin, begin + phrase_length);
-
-            // Don't append </s> until the end
-            if (begin == sentence.length() - 1 && source_words != sentence.length()) 
-              continue;            
-
-            TargetPhrases phrases = chart.getRange(begin, begin + phrase_length);
-            if (phrases == null)
-              continue;
-
-            if (Decoder.VERBOSE >= 3)
-              System.err.println(String.format("  Applying %d target phrases over [%d,%d]", phrases.size(), begin, begin + phrase_length));
-            
-            // TODO: could also compute some number of features here (e.g., non-LM ones)
-            // float score_delta = context.GetScorer().transition(ant, phrases, begin, begin + phrase_length);
-            
-            // Future costs: remove span to be filled.
-            float future_delta = future.Change(coverage, begin, begin + phrase_length);
-            
-            /* This associates with each span a set of hypotheses that can be extended by
-             * phrases from that span. The hypotheses are wrapped in HypoState objects, which
-             * augment the hypothesis score with a future cost.
-             */
-            Candidate cand = new Candidate(hypotheses, phrases, span, future_delta);
-            targetStack.addCandidate(cand);
-          }
-        }
-      }
-
-      /* At this point, every vertex contains a list of all existing hypotheses that the target
-       * phrases in that vertex could extend. Now we need to create the search object, which
-       * implements cube pruning. There are up to O(n^2) cubes, n the size of the current stack,
-       * one cube each over each span of the input. Each "cube" has two dimensions: one representing
-       * the target phrases over the span, and one representing all of these incoming hypotheses.
-       * We seed the chart with the best item in each cube, and then repeatedly pop and extend.
-       */
-      
-//      System.err.println(String.format("\nBuilding cube-pruning chart for %d words", source_words));
-
-      targetStack.search();
-    }
-    
-    Decoder.LOG(1, String.format("Input %d: Search took %.3f seconds", sentence.id(),
-        (System.currentTimeMillis() - startTime) / 1000.0f));
-    
-    return createGoalNode();
-  }
-    
-  /**
-   * Enforces reordering constraints. Our version of Moses' ReorderingConstraint::Check() and
-   * SearchCubePruning::CheckDistortion(). 
-   * 
-   * @param coverage
-   * @param begin
-   * @param i
-   * @return
-   */
-  private boolean permissible(Coverage coverage, int begin, int end) {
-    int firstZero = coverage.firstZero();
-
-    if (config.reordering_limit < 0)
-      return true;
-    
-    /* We can always start with the first zero since it doesn't create a reordering gap
-     */
-    if (begin == firstZero)
-      return true;
-
-    /* If a gap is created by applying this phrase, make sure that you can reach the first
-     * zero later on without violating the distortion constraint.
-     */
-    if (end - firstZero > config.reordering_limit) {
-      return false;
-    }
-    
-    return true;
-  }
-
-
-  /**
-   * Searches through the goal stack, calling the final transition function on each node, and then returning
-   * the best item. Usually the final transition code doesn't add anything, because all features
-   * have already computed everything they need to. The standard exception is language models that
-   * have not yet computed their prefix probabilities (which is not the case with KenLM, the default).
-   * 
-   * @return
-   */
-  private HyperGraph createGoalNode() {
-    Stack lastStack = stacks.get(sentence.length());
-    
-    for (Hypothesis hyp: lastStack) {
-      float score = hyp.getScore();
-      List<HGNode> tailNodes = new ArrayList<HGNode>();
-      tailNodes.add(hyp);
-      
-      float finalTransitionScore = ComputeNodeResult.computeFinalCost(featureFunctions, tailNodes, 0, sentence.length(), null, sentence);
-
-      if (null == this.end)
-        this.end = new Hypothesis(null, score + finalTransitionScore, hyp, sentence.length(), null);
-
-      HyperEdge edge = new HyperEdge(null, score + finalTransitionScore, finalTransitionScore, tailNodes, null);
-      end.addHyperedgeInNode(edge);
-    }
-    
-    return new HyperGraph(end, -1, -1, this.sentence);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/TargetPhrases.java b/src/joshua/decoder/phrase/TargetPhrases.java
deleted file mode 100644
index 83b69d0..0000000
--- a/src/joshua/decoder/phrase/TargetPhrases.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.ArrayList;	
-import java.util.Collections;
-import java.util.List;
-
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.ff.tm.Rule;
-
-/**
- * Represents a sorted collection of target-side phrases. Typically, these are phrases
- * generated from the same source word sequence. The list of options is reduced to the number
- * of translation options.
- * 
- * @author Matt Post
- */
-
-public class TargetPhrases extends ArrayList<Rule> {
-
-  private static final long serialVersionUID = 1L;
-
-  public TargetPhrases() {
-    super();
-  }
-  
-  /**
-   * Initialize with a collection of rules.
-   * 
-   * @param list
-   */
-  public TargetPhrases(List<Rule> list) {
-    super();
-    
-    for (Rule rule: list) {
-      add(rule);
-    }
-  }
-  
-  /**
-   * Score the rules and sort them. Scoring is necessary because rules are only scored if they
-   * are used, in an effort to make reading in rules more efficient. This is starting to create
-   * some trouble and should probably be reworked.
-   */
-  public void finish(List<FeatureFunction> features, FeatureVector weights, int num_options) {
-    for (Rule rule: this) { 
-      rule.estimateRuleCost(features);
-//      System.err.println("TargetPhrases:finish(): " + rule);
-    }
-    Collections.sort(this, Rule.EstimatedCostComparator);
-    
-    if (this.size() > num_options)
-      this.removeRange(num_options, this.size());
-    
-//    System.err.println("TargetPhrases::finish()");
-//    for (Rule rule: this) 
-//      System.err.println("  " + rule);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/ConstraintRule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/ConstraintRule.java b/src/joshua/decoder/segment_file/ConstraintRule.java
deleted file mode 100644
index 9968640..0000000
--- a/src/joshua/decoder/segment_file/ConstraintRule.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import javax.swing.text.Segment;
-
-
-/**
- * This interface is for an individual (partial) item to seed the chart with. All rules should be
- * flat (no hierarchical nonterminals).
- * <p>
- * The {@link Segment}, {@link ConstraintSpan}, and {@link ConstraintRule} interfaces are for
- * defining an interchange format between a SegmentFileParser and the Chart class. These interfaces
- * <emph>should not</emph> be used internally by the Chart. The objects returned by a
- * SegmentFileParser will not be optimal for use during decoding. The Chart should convert each of
- * these objects into its own internal representation during construction. That is the contract
- * described by these interfaces.
- * 
- * @see Type
- * 
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @version $LastChangedDate: 2009-03-26 15:06:57 -0400 (Thu, 26 Mar 2009) $
- */
-public interface ConstraintRule {
-
-  /**
-   * There are three types of ConstraintRule. The RULE type returns non-null values for all methods.
-   * The LHS type provides a (non-null) value for the lhs method, but returns null for everything
-   * else. And the RHS type provides a (non-null) value for nativeRhs and foreignRhs but returns
-   * null for the lhs and features.
-   * <p>
-   * The interpretation of a RULE is that it adds a new rule to the grammar which only applies to
-   * the associated span. If the associated span is hard, then the set of rules for that span will
-   * override the regular grammar.
-   * <p>
-   * The intepretation of a LHS is that it provides a hard constraint that the associated span be
-   * treated as the nonterminal for that span, thus filtering the regular grammar.
-   * <p>
-   * The interpretation of a RHS is that it provides a hard constraint to filter the regular grammar
-   * such that only rules generating the desired translation can be used.
-   */
-  public enum Type {
-    RULE, LHS, RHS
-  };
-
-  /** Return the type of this ConstraintRule. */
-  Type type();
-
-
-  /**
-   * Return the left hand side of the constraint rule. If this is null, then this object is
-   * specifying a translation for the span, but that translation may be derived from any
-   * nonterminal. The nonterminal here must be one used by the regular grammar.
-   */
-  String lhs();
-
-
-  /**
-   * Return the native right hand side of the constraint rule. If this is null, then the regular
-   * grammar will be used to fill in the derivation from the lhs.
-   */
-  String nativeRhs();
-
-
-  /**
-   * Return the foreign right hand side of the constraint rule. This must be consistent with the
-   * sentence for the associated span, and is provided as a convenience method.
-   */
-  String foreignRhs();
-
-
-  /**
-   * Return the grammar feature values for the RULE. The length of this array must be the same as
-   * for the regular grammar. We cannot enforce this requirement, but the
-   * {@link joshua.decoder.chart_parser.Chart} must throw an error if there is a mismatch.
-   */
-  float[] features();
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/ConstraintSpan.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/ConstraintSpan.java b/src/joshua/decoder/segment_file/ConstraintSpan.java
deleted file mode 100644
index c8087bd..0000000
--- a/src/joshua/decoder/segment_file/ConstraintSpan.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import java.util.List;
-
-import javax.swing.text.Segment;
-
-/**
- * This interface represents a collection of constraints for a given span in the associated segment.
- * Intuitively, each constraint corresponds to one or more items in the chart for parsing, except
- * that we pre-seed the chart with these items before beginning the parsing algorithm. Some
- * constraints can be "hard", in which case the regular grammar is not consulted for these spans. It
- * is an error to have hard constraints for overlapping spans.
- * <p>
- * Indices for the span boundaries mark the transitions between words. Thus, the 0 index occurs
- * before the first word, the 1 index occurs between the first and second words, 2 is between the
- * second and third, etc. Consequently, it is an error for the end index to be equal to or less than
- * the start index. It is also an error to have negative indices or to have indices larger than the
- * count of words in the segment. Clients may assume that no <code>ConstraintSpan</code> objects are
- * constructed which violate these laws.
- * <p>
- * The {@link Segment}, {@link ConstraintSpan}, and {@link ConstraintRule} interfaces are for
- * defining an interchange format between a SegmentFileParser and the Chart class. These interfaces
- * <emph>should not</emph> be used internally by the Chart. The objects returned by a
- * SegmentFileParser will not be optimal for use during decoding. The Chart should convert each of
- * these objects into its own internal representation during construction. That is the contract
- * described by these interfaces.
- * 
- * @author wren ng thornton <wr...@users.sourceforge.net>
- */
-public interface ConstraintSpan {
-
-  /**
-   * Return the starting index of the span covered by this constraint.
-   */
-  int start();
-
-  /**
-   * Return the ending index of the span covered by this constraint. Clients may assume
-   * <code>this.end() &gt;= 1 + this.start()</code>.
-   */
-  int end();
-
-  /**
-   * Return whether this is a hard constraint which should override the grammar. This value only
-   * really matters for sets of <code>RULE</code> type constraints.
-   */
-  boolean isHard();
-
-  /**
-   * Return a collection of the "rules" for this constraint span.
-   * <p>
-   * This return type is suboptimal for some SegmentFileParsers. It should be an
-   * {@link java.util.Iterator} instead in order to reduce the coupling between this class and
-   * Chart. See the note above about the fact that this interface should not be used internally by
-   * the Chart class because it will not be performant.
-   */
-  List<ConstraintRule> rules();
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/ParseTreeInput.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/ParseTreeInput.java b/src/joshua/decoder/segment_file/ParseTreeInput.java
deleted file mode 100644
index 5feb051..0000000
--- a/src/joshua/decoder/segment_file/ParseTreeInput.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import joshua.decoder.JoshuaConfiguration;
-
-public class ParseTreeInput extends Sentence {
-
-  public ParseTreeInput(String input, int id, JoshuaConfiguration joshuaConfiguration) {
-    super(input, id,joshuaConfiguration);
-  }
-
-  // looks_like_parse_tree = sentence.sentence().matches("^\\(+[A-Z]+ .*");
-
-  // private SyntaxTree syntax_tree;
-
-  // ParseTreeInput() {
-  // SyntaxTree syntax_tree = new ArraySyntaxTree(sentence.sentence(), Vocabulary);
-  // }
-
-  // public int[] int_sentence() {
-  // return syntax_tree.getTerminals();
-  // }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/ParsedSentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/ParsedSentence.java b/src/joshua/decoder/segment_file/ParsedSentence.java
deleted file mode 100644
index 9273b96..0000000
--- a/src/joshua/decoder/segment_file/ParsedSentence.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import joshua.corpus.Vocabulary;
-import joshua.corpus.syntax.ArraySyntaxTree;
-import joshua.corpus.syntax.SyntaxTree;
-import joshua.decoder.JoshuaConfiguration;
-
-public class ParsedSentence extends Sentence {
-
-  private SyntaxTree syntaxTree = null;
-
-  public ParsedSentence(String input, int id,JoshuaConfiguration joshuaConfiguration) {
-    super(input, id, joshuaConfiguration);
-  }
-
-  public int[] getWordIDs() {
-    int[] terminals = syntaxTree().getTerminals();
-    int[] annotated = new int[terminals.length + 2];
-    System.arraycopy(terminals, 0, annotated, 1, terminals.length);
-    annotated[0] = Vocabulary.id(Vocabulary.START_SYM);
-    annotated[annotated.length - 1] = Vocabulary.id(Vocabulary.STOP_SYM);
-    return annotated;
-  }
-
-  public SyntaxTree syntaxTree() {
-    if (syntaxTree == null)
-      syntaxTree = new ArraySyntaxTree(this.source());
-    return syntaxTree;
-  }
-
-  public static boolean matches(String input) {
-    return input.matches("^\\(+[A-Z]+ .*");
-  }
-
-  public String fullSource() {
-    return Vocabulary.getWords(this.getWordIDs());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
deleted file mode 100644
index 588850b..0000000
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import static joshua.util.FormatUtils.addSentenceMarkers;
-import static joshua.util.FormatUtils.escapeSpecialSymbols;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.StringTokenizer;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;	
-import joshua.decoder.ff.tm.Grammar;
-import joshua.lattice.Arc;
-import joshua.lattice.Lattice;
-import joshua.lattice.Node;
-import joshua.util.ChartSpan;
-import joshua.util.Regex;
-
-/**
- * This class represents lattice input. The lattice is contained on a single line and is represented
- * in PLF (Python Lattice Format), e.g.,
- * 
- * ((('ein',0.1,1),('dieses',0.2,1),('haus',0.4,2),),(('haus',0.8,1),),)
- * 
- * @author Matt Post <po...@cs.jhu.edu>
- */
-
-public class Sentence {
-
-  /* The sentence number. */
-  public int id = -1;
-
-  /*
-   * The source and target sides of the input sentence. Target sides are present when doing
-   * alignment or forced decoding.
-   */
-  protected String source = null;
-  protected String fullSource = null;
-  
-  protected String target = null;
-  protected String fullTarget = null;
-  protected String[] references = null;
-
-  /* Lattice representation of the source sentence. */
-  protected Lattice<Token> sourceLattice = null;
-
-  /* List of constraints */
-  private final List<ConstraintSpan> constraints;
-  
-  private JoshuaConfiguration config = null;
-
-  /**
-   * Constructor. Receives a string representing the input sentence. This string may be a
-   * string-encoded lattice or a plain text string for decoding.
-   * 
-   * @param inputString
-   * @param id
-   */
-  public Sentence(String inputString, int id, JoshuaConfiguration joshuaConfiguration) {
-  
-    inputString = Regex.spaces.replaceAll(inputString, " ").trim();
-    
-    config = joshuaConfiguration;
-    
-    this.constraints = new LinkedList<ConstraintSpan>();
-  
-    // Check if the sentence has SGML markings denoting the
-    // sentence ID; if so, override the id passed in to the
-    // constructor
-    Matcher start = SEG_START.matcher(inputString);
-    if (start.find()) {
-      source = SEG_END.matcher(start.replaceFirst("")).replaceFirst("");
-      String idstr = start.group(1);
-      this.id = Integer.parseInt(idstr);
-    } else {
-      if (inputString.indexOf(" ||| ") != -1) {
-        String[] pieces = inputString.split("\\s?\\|{3}\\s?");
-        source = pieces[0];
-        target = pieces[1];
-        if (target.equals(""))
-          target = null;
-        if (pieces.length > 2) {
-          references = new String[pieces.length - 2];
-          System.arraycopy(pieces, 2, references, 0, pieces.length - 2);
-        }
-      } else {
-        source = inputString;
-      }
-      this.id = id;
-    }
-    
-    // Only trim strings
-    if (! (joshuaConfiguration.lattice_decoding && source.startsWith("(((")))
-      adjustForLength(joshuaConfiguration.maxlen);
-  }
-  
-  /**
-   * Indicates whether the underlying lattice is a linear chain, i.e., a sentence.
-   * 
-   * @return true if this is a linear chain, false otherwise
-   */
-  public boolean isLinearChain() {
-    return ! this.getLattice().hasMoreThanOnePath();
-  }
-
-  // Matches the opening and closing <seg> tags, e.g.,
-  // <seg id="72">this is a test input sentence</seg>.
-  protected static final Pattern SEG_START = Pattern
-      .compile("^\\s*<seg\\s+id=\"?(\\d+)\"?[^>]*>\\s*");
-  protected static final Pattern SEG_END = Pattern.compile("\\s*</seg\\s*>\\s*$");
-
-  /**
-   * Returns the length of the sentence. For lattices, the length is the shortest path through the
-   * lattice. The length includes the <s> and </s> sentence markers. 
-   * 
-   * @return number of input tokens + 2 (for start and end of sentence markers)
-   */
-  public int length() {
-    return this.getLattice().getShortestDistance();
-  }
-
-  /**
-   * Returns the annotations for a specific word (specified by an index) in the 
-   * sentence
-   * 
-   * @param index The location of the word in the sentence
-   * @param key The annotation identity
-   * @return The annotations associated with this word
-   */
-  public String getAnnotation(int index, String key) {
-    return getTokens().get(index).getAnnotation(key);
-  }
-
-  /**
-   * This function computes the intersection of \sigma^+ (where \sigma is the terminal vocabulary)
-   * with all character-level segmentations of each OOV in the input sentence.
-   * 
-   * The idea is to break apart noun compounds in languages like German (such as the word "golfloch"
-   * = "golf" (golf) + "loch" (hole)), allowing them to be translated.
-   * 
-   * @param grammars a list of grammars to consult to find in- and out-of-vocabulary items
-   */
-  public void segmentOOVs(Grammar[] grammars) {
-    Lattice<Token> oldLattice = this.getLattice();
-
-    /* Build a list of terminals across all grammars */
-    HashSet<Integer> vocabulary = new HashSet<Integer>();
-    for (Grammar grammar : grammars) {
-      Iterator<Integer> iterator = grammar.getTrieRoot().getTerminalExtensionIterator();
-      while (iterator.hasNext())
-        vocabulary.add(iterator.next());
-    }
-
-    List<Node<Token>> oldNodes = oldLattice.getNodes();
-
-    /* Find all the subwords that appear in the vocabulary, and create the lattice */
-    for (int nodeid = oldNodes.size() - 3; nodeid >= 1; nodeid -= 1) {
-      if (oldNodes.get(nodeid).getOutgoingArcs().size() == 1) {
-        Arc<Token> arc = oldNodes.get(nodeid).getOutgoingArcs().get(0);
-        String word = Vocabulary.word(arc.getLabel().getWord());
-        if (!vocabulary.contains(arc.getLabel())) {
-          // System.err.println(String.format("REPL: '%s'", word));
-          List<Arc<Token>> savedArcs = oldNodes.get(nodeid).getOutgoingArcs();
-
-          char[] chars = word.toCharArray();
-          ChartSpan<Boolean> wordChart = new ChartSpan<Boolean>(chars.length + 1, false);
-          ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>(chars.length + 1);
-          nodes.add(oldNodes.get(nodeid));
-          for (int i = 1; i < chars.length; i++)
-            nodes.add(new Node<Token>(i));
-          nodes.add(oldNodes.get(nodeid + 1));
-          for (int width = 1; width <= chars.length; width++) {
-            for (int i = 0; i <= chars.length - width; i++) {
-              int j = i + width;
-              if (width != chars.length) {
-                Token token = new Token(word.substring(i, j), config);
-                if (vocabulary.contains(id)) {
-                  nodes.get(i).addArc(nodes.get(j), 0.0f, token);
-                  wordChart.set(i, j, true);
-                  //                    System.err.println(String.format("  FOUND '%s' at (%d,%d)", word.substring(i, j),
-                  //                        i, j));
-                }
-              }
-
-              for (int k = i + 1; k < j; k++) {
-                if (wordChart.get(i, k) && wordChart.get(k, j)) {
-                  wordChart.set(i, j, true);
-                  //                    System.err.println(String.format("    PATH FROM %d-%d-%d", i, k, j));
-                }
-              }
-            }
-          }
-
-          /* If there's a path from beginning to end */
-          if (wordChart.get(0, chars.length)) {
-            // Remove nodes not part of a complete path
-            HashSet<Node<Token>> deletedNodes = new HashSet<Node<Token>>();
-            for (int k = 1; k < nodes.size() - 1; k++)
-              if (!(wordChart.get(0, k) && wordChart.get(k, chars.length)))
-                nodes.set(k, null);
-
-            int delIndex = 1;
-            while (delIndex < nodes.size())
-              if (nodes.get(delIndex) == null) {
-                deletedNodes.add(nodes.get(delIndex));
-                nodes.remove(delIndex);
-              } else
-                delIndex++;
-
-            for (Node<Token> node : nodes) {
-              int arcno = 0;
-              while (arcno != node.getOutgoingArcs().size()) {
-                Arc<Token> delArc = node.getOutgoingArcs().get(arcno);
-                if (deletedNodes.contains(delArc.getHead()))
-                  node.getOutgoingArcs().remove(arcno);
-                else {
-                  arcno++;
-                  //                    System.err.println("           ARC: " + Vocabulary.word(delArc.getLabel()));
-                }
-              }
-            }
-
-            // Insert into the main lattice
-            this.getLattice().insert(nodeid, nodeid + 1, nodes);
-          } else {
-            nodes.get(0).setOutgoingArcs(savedArcs);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * If the input sentence is too long (not counting the <s> and </s> tokens), it is truncated to
-   * the maximum length, specified with the "maxlen" parameter.
-   * 
-   * Note that this code assumes the underlying representation is a sentence, and not a lattice. Its
-   * behavior is undefined for lattices.
-   * 
-   * @param length
-   */
-  protected void adjustForLength(int length) {
-    int size = this.getLattice().size() - 2; // subtract off the start- and end-of-sentence tokens
-
-    if (size > length) {
-      Decoder.LOG(1, String.format("* WARNING: sentence %d too long (%d), truncating to length %d",
-          id(), size, length));
-
-      // Replace the input sentence (and target) -- use the raw string, not source()
-      String[] tokens = source.split("\\s+");
-      source = tokens[0];
-      for (int i = 1; i < length; i++)
-        source += " " + tokens[i];
-      sourceLattice = null;
-      if (target != null) {
-        target = "";
-      }
-    }
-  }
-
-  public boolean isEmpty() {
-    return source.matches("^\\s*$");
-  }
-
-  public int id() {
-    return id;
-  }
-
-  /**
-   * Returns the raw source-side input string.
-   */
-  public String rawSource() {
-    return source;
-  }
-  
-  /**
-   * Returns the source-side string with annotations --- if any --- stripped off.
-   * 
-   * @return
-   */
-  public String source() {
-    StringBuilder str = new StringBuilder();
-    int[] ids = getWordIDs();
-    for (int i = 1; i < ids.length - 1; i++) {
-      str.append(Vocabulary.word(ids[i])).append(" ");
-    }
-    return str.toString().trim();
-  }
-
-  /**
-   * Returns a sentence with the start and stop symbols added to the 
-   * beginning and the end of the sentence respectively
-   * 
-   * @return String The input sentence with start and stop symbols
-   */
-  public String fullSource() {
-    if (fullSource == null) {
-      fullSource = addSentenceMarkers(source());
-    }
-    return fullSource;  
-  }
-
-  /**
-   * If a target side was supplied with the sentence, this will be non-null. This is used when doing
-   * synchronous parsing or constrained decoding. The input format is:
-   * 
-   * Bill quiere ir a casa ||| Bill wants to go home
-   * 
-   * If the parameter parse=true is set, parsing will be triggered, otherwise constrained decoding.
-   * 
-   * @return
-   */
-  public String target() {
-    return target;
-  }
-
-  public String fullTarget() {
-    if (fullTarget == null) {
-      fullTarget = addSentenceMarkers(target());
-    }
-    return fullTarget; 
-  }
-
-  public String source(int i, int j) {
-    StringTokenizer st = new StringTokenizer(fullSource());
-    int index = 0;
-    StringBuilder substring = new StringBuilder();
-    while (st.hasMoreTokens()) {
-      String token = st.nextToken();
-      if (index >= j)
-        break;
-      if (index >= i)
-        substring.append(token).append(" ");
-      index++;
-    }
-    return substring.toString().trim();
-  }
-
-  public String[] references() {
-    return references;
-  }
-
-  /**
-   * Returns the sequence of tokens comprising the sentence. This assumes you've done the checking
-   * to makes sure the input string (the source side) isn't a PLF waiting to be parsed.
-   * 
-   * @return
-   */
-  public List<Token> getTokens() {
-    assert isLinearChain();
-    List<Token> tokens = new ArrayList<Token>();
-    for (Node<Token> node: getLattice().getNodes())
-      if (node != null && node.getOutgoingArcs().size() > 0) 
-        tokens.add(node.getOutgoingArcs().get(0).getLabel());
-    return tokens;
-  }
-  
-  /**
-   * Returns the sequence of word IDs comprising the input sentence. Assumes this is not a general
-   * lattice, but a linear chain.
-   */
-  public int[] getWordIDs() {
-    List<Token> tokens = getTokens();
-    int[] ids = new int[tokens.size()];
-    for (int i = 0; i < tokens.size(); i++)
-      ids[i] = tokens.get(i).getWord();
-    return ids;
-  }
-  
-  /**
-   * Returns the sequence of word ids comprising the sentence. Assumes this is a sentence and
-   * not a lattice.
-   *  
-   * @return
-   */
-  public Lattice<String> stringLattice() {
-    assert isLinearChain();
-    return Lattice.createStringLatticeFromString(source(), config);
-  }
-
-  public List<ConstraintSpan> constraints() {
-    return constraints;
-  }
-
-  public Lattice<Token> getLattice() {
-    if (this.sourceLattice == null) {
-      if (config.lattice_decoding && rawSource().startsWith("(((")) {
-        if (config.search_algorithm.equals("stack")) {
-          System.err.println("* FATAL: lattice decoding currently not supported for stack-based search algorithm.");
-          System.exit(12);
-        }
-        this.sourceLattice = Lattice.createTokenLatticeFromPLF(rawSource(), config);
-      } else
-        this.sourceLattice = Lattice.createTokenLatticeFromString(String.format("%s %s %s", Vocabulary.START_SYM,
-            rawSource(), Vocabulary.STOP_SYM), config);
-    }
-    return this.sourceLattice;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder(source());
-    if (target() != null) {
-      sb.append(" ||| " + target());
-    }
-    return sb.toString();
-  }
-
-  public boolean hasPath(int begin, int end) {
-    return getLattice().distance(begin, end) != -1;
-  }
-
-  public Node<Token> getNode(int i) {
-    return getLattice().getNode(i);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/Token.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Token.java b/src/joshua/decoder/segment_file/Token.java
deleted file mode 100644
index bddfd68..0000000
--- a/src/joshua/decoder/segment_file/Token.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.segment_file;
-
-import static joshua.util.FormatUtils.escapeSpecialSymbols;
-
-import java.util.HashMap;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.util.FormatUtils;
-
-/**
- * Stores the identity of a word and its annotations in a sentence.
-
- * @author "Gaurav Kumar"
- * @author Matt Post
- */
-public class Token {
-  // The token without the annotations
-  private String token; 
-  private int tokenID;
-
-  private HashMap<String,String> annotations = null;
-  private JoshuaConfiguration joshuaConfiguration;
-
-  /**
-   * Constructor : Creates a Token object from a raw word
-   * Extracts and assigns an annotation when available.
-   * Any word can be marked with annotations, which are arbitrary semicolon-delimited
-   * key[=value] pairs (the value is optional) listed in brackets after a word, e.g.,
-   * 
-   *    Je[ref=Samuel;PRO] voudrais[FUT;COND] ...
-   * 
-   * This will create a dictionary annotation on the word of the following form for "Je"
-   * 
-   *   ref -> Samuel
-   *   PRO -> PRO
-   *   
-   * and the following for "voudrais":
-   * 
-   *   FUT  -> FUT
-   *   COND -> COND
-   * 
-   * @param rawWord A word with annotation information (possibly)
-   *  
-   */
-  public Token(String rawWord, JoshuaConfiguration config) {
-    
-    this.joshuaConfiguration = config;
-    
-    annotations = new HashMap<String,String>();
-    
-    // Matches a word with an annotation
-    // Check guidelines in constructor description
-    Pattern pattern = Pattern.compile("(\\S+)\\[(\\S+)\\]");
-    Matcher tag = pattern.matcher(rawWord);
-    if (tag.find()) {
-      // Annotation match found
-      token = tag.group(1);
-      String tagStr = tag.group(2);
-
-      for (String annotation: tagStr.split(";")) {
-        int where = annotation.indexOf("=");
-        if (where != -1) {
-          annotations.put(annotation.substring(0, where), annotation.substring(where + 1));
-        } else {
-          annotations.put(annotation, annotation);
-        }
-      }
-    } else {
-      // No match found, which implies that this token does not have any annotations 
-      token = rawWord;
-    }
-
-    // Mask strings that cause problems for the decoder. This has to be done *after* parsing for
-    // annotations.
-    token = escapeSpecialSymbols(token);
-
-    if (joshuaConfiguration != null && joshuaConfiguration.lowercase) {
-      if (FormatUtils.ISALLUPPERCASE(token))
-        annotations.put("lettercase", "all-upper");
-      else if (Character.isUpperCase(token.charAt(0)))
-        annotations.put("lettercase",  "upper");
-      else
-        annotations.put("lettercase",  "lower");
-      
-      Decoder.LOG(2, String.format("TOKEN: %s -> %s (%s)", token, token.toLowerCase(), annotations.get("lettercase")));
-      token = token.toLowerCase(); 
-    }
-    
-    tokenID = Vocabulary.id(token);
-  }
-  
-  /**
-   * Returns the word ID (vocab ID) for this token
-   * 
-   * @return int A word ID
-   */
-  public int getWord() {
-    return tokenID;
-  }
-
-  /**
-   * Returns the string associated with this token
-   * @return String A word
-   */
-  public String getWordIdentity() {
-    return token;
-  }
-  
-  public String toString() {
-    return token;
-  }
-
-  /**
-   * Returns the annotationID (vocab ID)
-   * associated with this token
-   * @return int A type ID
-   */
-  public String getAnnotation(String key) {
-    if (annotations.containsKey(key)) {
-      return annotations.get(key);
-    }
-    
-    return null;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/segment_file/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/package.html b/src/joshua/decoder/segment_file/package.html
deleted file mode 100644
index 8f06ebc..0000000
--- a/src/joshua/decoder/segment_file/package.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
--->
-
-Provides common interfaces for parsing segment files (aka test corpora to be translated). In order to support constraint annotations, we provide a general API for use by JoshuaDecoder and Chart.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/lattice/Arc.java
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/Arc.java b/src/joshua/lattice/Arc.java
deleted file mode 100644
index 793a128..0000000
--- a/src/joshua/lattice/Arc.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.lattice;
-
-
-/**
- * An arc in a directed graph.
- * 
- * @author Lane Schwartz
- * @since 2008-07-08
- * 
- * @param Label Type of label associated with an arc.
- */
-public class Arc<Label> {
-
-  /**
-   * Weight of this arc.
-   */
-  private float cost;
-
-  /**
-   * Node where this arc ends. 
-   */
-  private Node<Label> head;
-
-  /**
-   * Node where this arc begins.
-   */
-  private Node<Label> tail;
-
-  /**
-   * Label associated with this arc.
-   */
-  private Label label;
-  
-  /**
-   * Creates an arc with the specified head, tail, cost, and label.
-   * 
-   * @param head The node where this arc begins.
-   * @param tail The node where this arc ends.
-   * @param cost The cost of this arc.
-   * @param label The label associated with this arc.
-   */
-  public Arc(Node<Label> tail, Node<Label> head, float cost, Label label) {
-    this.tail = tail;
-    this.head = head;
-    this.cost = cost;
-    this.label = label;
-  }
-
-  /**
-   * Gets the cost of this arc.
-   * 
-   * @return The cost of this arc.
-   */
-  public float getCost() {
-    return cost;
-  }
-
-  /**
-   * Gets the tail of this arc (the node where this arc begins).
-   * 
-   * @return The tail of this arc.
-   */
-  public Node<Label> getTail() {
-    return tail;
-  }
-
-  /**
-   * Gets the head of this arc (the node where this arc ends).
-   * 
-   * @return The head of this arc.
-   */
-  public Node<Label> getHead() {
-    return head;
-  }
-
-  /**
-   * Gets the label associated with this arc.
-   * 
-   * @return The label associated with this arc.
-   */
-  public Label getLabel() {
-    return label;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder s = new StringBuilder();
-
-    s.append(label.toString());
-    s.append("  :  ");
-    s.append(tail.toString());
-    s.append(" ==> ");
-    s.append(head.toString());
-    s.append("  :  ");
-    s.append(cost);
-
-    return s.toString();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/lattice/Lattice.java
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/Lattice.java b/src/joshua/lattice/Lattice.java
deleted file mode 100644
index b0ef40f..0000000
--- a/src/joshua/lattice/Lattice.java
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.lattice;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.segment_file.Token;
-import joshua.util.ChartSpan;
-
-/**
- * A lattice representation of a directed graph.
- * 
- * @author Lane Schwartz
- * @author Matt Post <po...@cs.jhu.edu>
- * @since 2008-07-08
- * 
- * @param Label Type of label associated with an arc.
- */
-public class Lattice<Value> implements Iterable<Node<Value>> {
-
-  /**
-   * True if there is more than one path through the lattice.
-   */
-  private boolean latticeHasAmbiguity;
-
-  /**
-   * Costs of the best path between each pair of nodes in the lattice.
-   */
-  private ChartSpan<Integer> distances = null;
-
-  /**
-   * List of all nodes in the lattice. Nodes are assumed to be in topological order.
-   */
-  private List<Node<Value>> nodes;
-
-  /** Logger for this class. */
-  private static final Logger logger = Logger.getLogger(Lattice.class.getName());
-  
-  JoshuaConfiguration config = null;
-
-  /**
-   * Constructs a new lattice from an existing list of (connected) nodes.
-   * <p>
-   * The list of nodes must already be in topological order. If the list is not in topological
-   * order, the behavior of the lattice is not defined.
-   * 
-   * @param nodes A list of nodes which must be in topological order.
-   */
-  public Lattice(List<Node<Value>> nodes, JoshuaConfiguration config) {
-    this.nodes = nodes;
-//    this.distances = calculateAllPairsShortestPath();
-    this.latticeHasAmbiguity = true;
-  }
-
-  public Lattice(List<Node<Value>> nodes, boolean isAmbiguous, JoshuaConfiguration config) {
-    // Node<Value> sink = new Node<Value>(nodes.size());
-    // nodes.add(sink);
-    this.nodes = nodes;
-//    this.distances = calculateAllPairsShortestPath();
-    this.latticeHasAmbiguity = isAmbiguous;
-  }
-
-  /**
-   * Instantiates a lattice from a linear chain of values, i.e., a sentence.
-   * 
-   * @param linearChain a sequence of Value objects
-   */
-  public Lattice(Value[] linearChain, JoshuaConfiguration config) {
-    this.latticeHasAmbiguity = false;
-    this.nodes = new ArrayList<Node<Value>>();
-
-    Node<Value> previous = new Node<Value>(0);
-    nodes.add(previous);
-
-    int i = 1;
-
-    for (Value value : linearChain) {
-
-      Node<Value> current = new Node<Value>(i);
-      float cost = 0.0f;
-      // if (i > 4) cost = (float)i/1.53432f;
-      previous.addArc(current, cost, value);
-
-      nodes.add(current);
-
-      previous = current;
-      i++;
-    }
-
-//    this.distances = calculateAllPairsShortestPath();
-  }
-
-  public final boolean hasMoreThanOnePath() {
-    return latticeHasAmbiguity;
-  }
-
-  /**
-   * Computes the shortest distance between two nodes, which is used (perhaps among other places) in
-   * computing which rules can apply over which spans of the input
-   * 
-   * @param tail
-   * @param head
-   * @return the distance, a positive number, or -1 if there is no path between the nodes
-   */
-  public int distance(Arc<Value> arc) {
-    return this.getShortestPath(arc.getTail().getNumber(), arc.getHead().getNumber());
-  }
-
-  public int distance(int i, int j) {
-    return this.getShortestPath(i, j);
-  }
-
-  /**
-   * Convenience method to get a lattice from a linear sequence of {@link Token} objects.
-   * 
-   * @param linearChain
-   * @return Lattice representation of the linear chain.
-   */
-  public static Lattice<Token> createTokenLatticeFromString(String source, JoshuaConfiguration config) {
-    String[] tokens = source.split("\\s+");
-    Token[] integerSentence = new Token[tokens.length];
-    for (int i = 0; i < tokens.length; i++) {
-      integerSentence[i] = new Token(tokens[i], config);
-    }
-
-    return new Lattice<Token>(integerSentence, config);
-  }
-
-  public static Lattice<Token> createTokenLatticeFromPLF(String data, JoshuaConfiguration config) {
-    ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>();
-    
-    // This matches a sequence of tuples, which describe arcs leaving this node
-    Pattern nodePattern = Pattern.compile("(.+?)\\(\\s*(\\(.+?\\),\\s*)\\s*\\)(.*)");
-
-    /*
-     * This matches a comma-delimited, parenthesized tuple of a (a) single-quoted word (b) a number,
-     * optionally in scientific notation (c) an offset (how many states to jump ahead)
-     */
-    Pattern arcPattern = Pattern
-        .compile("\\s*\\('(.+?)',\\s*(-?\\d+\\.?\\d*?(?:[eE]-?\\d+)?),\\s*(\\d+)\\),\\s*(.*)");
-
-    Matcher nodeMatcher = nodePattern.matcher(data);
-
-    boolean latticeIsAmbiguous = false;
-
-    int nodeID = 0;
-    Node<Token> startNode = new Node<Token>(nodeID);
-    nodes.add(startNode);
-
-    while (nodeMatcher.matches()) {
-
-      String nodeData = nodeMatcher.group(2);
-      String remainingData = nodeMatcher.group(3);
-
-      nodeID++;
-
-      Node<Token> currentNode = null;
-      if (nodeID < nodes.size() && nodes.get(nodeID) != null) {
-        currentNode = nodes.get(nodeID);
-      } else {
-        currentNode = new Node<Token>(nodeID);
-        while (nodeID > nodes.size())
-          nodes.add(new Node<Token>(nodes.size()));
-        nodes.add(currentNode);
-      }
-
-      Matcher arcMatcher = arcPattern.matcher(nodeData);
-      int numArcs = 0;
-      if (!arcMatcher.matches()) {
-        throw new RuntimeException("Parse error!");
-      }
-      while (arcMatcher.matches()) {
-        numArcs++;
-        String arcLabel = arcMatcher.group(1);
-        float arcWeight = Float.parseFloat(arcMatcher.group(2));
-        int destinationNodeID = nodeID + Integer.parseInt(arcMatcher.group(3));
-
-        Node<Token> destinationNode;
-        if (destinationNodeID < nodes.size() && nodes.get(destinationNodeID) != null) {
-          destinationNode = nodes.get(destinationNodeID);
-        } else {
-          destinationNode = new Node<Token>(destinationNodeID);
-          while (destinationNodeID > nodes.size())
-            nodes.add(new Node<Token>(nodes.size()));
-          nodes.add(destinationNode);
-        }
-
-        String remainingArcs = arcMatcher.group(4);
-
-        Token arcToken = new Token(arcLabel, config);
-        currentNode.addArc(destinationNode, arcWeight, arcToken);
-
-        arcMatcher = arcPattern.matcher(remainingArcs);
-      }
-      if (numArcs > 1)
-        latticeIsAmbiguous = true;
-
-      nodeMatcher = nodePattern.matcher(remainingData);
-    }
-
-    /* Add <s> to the start of the lattice. */
-    if (nodes.size() > 1 && nodes.get(1) != null) {
-      Node<Token> firstNode = nodes.get(1);
-      startNode.addArc(firstNode, 0.0f, new Token(Vocabulary.START_SYM, config));
-    }
-
-    /* Add </s> as a final state, connect it to the previous end-state */
-    nodeID = nodes.get(nodes.size()-1).getNumber() + 1;
-    Node<Token> endNode = new Node<Token>(nodeID);
-    nodes.get(nodes.size()-1).addArc(endNode, 0.0f, new Token(Vocabulary.STOP_SYM, config));
-    nodes.add(endNode);
-
-    return new Lattice<Token>(nodes, latticeIsAmbiguous, config);
-  }
-
-  /**
-   * Constructs a lattice from a given string representation.
-   * 
-   * @param data String representation of a lattice.
-   * @return A lattice that corresponds to the given string.
-   */
-  public static Lattice<String> createStringLatticeFromString(String data, JoshuaConfiguration config) {
-
-    Map<Integer, Node<String>> nodes = new HashMap<Integer, Node<String>>();
-
-    Pattern nodePattern = Pattern.compile("(.+?)\\((\\(.+?\\),)\\)(.*)");
-    Pattern arcPattern = Pattern.compile("\\('(.+?)',(\\d+.\\d+),(\\d+)\\),(.*)");
-
-    Matcher nodeMatcher = nodePattern.matcher(data);
-
-    int nodeID = -1;
-
-    while (nodeMatcher.matches()) {
-
-      String nodeData = nodeMatcher.group(2);
-      String remainingData = nodeMatcher.group(3);
-
-      nodeID++;
-
-      Node<String> currentNode;
-      if (nodes.containsKey(nodeID)) {
-        currentNode = nodes.get(nodeID);
-      } else {
-        currentNode = new Node<String>(nodeID);
-        nodes.put(nodeID, currentNode);
-      }
-
-      logger.fine("Node " + nodeID + ":");
-
-      Matcher arcMatcher = arcPattern.matcher(nodeData);
-
-      while (arcMatcher.matches()) {
-        String arcLabel = arcMatcher.group(1);
-        float arcWeight = Float.valueOf(arcMatcher.group(2));
-        int destinationNodeID = nodeID + Integer.parseInt(arcMatcher.group(3));
-
-        Node<String> destinationNode;
-        if (nodes.containsKey(destinationNodeID)) {
-          destinationNode = nodes.get(destinationNodeID);
-        } else {
-          destinationNode = new Node<String>(destinationNodeID);
-          nodes.put(destinationNodeID, destinationNode);
-        }
-
-        String remainingArcs = arcMatcher.group(4);
-
-        logger.fine("\t" + arcLabel + " " + arcWeight + " " + destinationNodeID);
-
-        currentNode.addArc(destinationNode, arcWeight, arcLabel);
-
-        arcMatcher = arcPattern.matcher(remainingArcs);
-      }
-
-      nodeMatcher = nodePattern.matcher(remainingData);
-    }
-
-    List<Node<String>> nodeList = new ArrayList<Node<String>>(nodes.values());
-    Collections.sort(nodeList, new NodeIdentifierComparator());
-
-    logger.fine(nodeList.toString());
-
-    return new Lattice<String>(nodeList, config);
-  }
-
-  /**
-   * Gets the cost of the shortest path between two nodes.
-   * 
-   * @param from ID of the starting node.
-   * @param to ID of the ending node.
-   * @return The cost of the shortest path between the two nodes.
-   */
-  public int getShortestPath(int from, int to) {
-    // System.err.println(String.format("DISTANCE(%d,%d) = %f", from, to, costs[from][to]));
-    if (distances == null)
-      this.distances = calculateAllPairsShortestPath();
-    
-    return distances.get(from, to);
-  }
-
-  /**
-   * Gets the shortest distance through the lattice.
-   * 
-   */
-  public int getShortestDistance() {
-    if (distances == null)
-      distances = calculateAllPairsShortestPath();
-    return distances.get(0, nodes.size()-1);
-  }
-
-  /**
-   * Gets the node with a specified integer identifier. If the identifier is negative, we count
-   * backwards from the end of the array, Perl-style (-1 is the last element, -2 the penultimate,
-   * etc).
-   * 
-   * @param index Integer identifier for a node.
-   * @return The node with the specified integer identifier
-   */
-  public Node<Value> getNode(int index) {
-    if (index >= 0)
-      return nodes.get(index);
-    else
-      return nodes.get(size() + index);
-  }
-
-  public List<Node<Value>> getNodes() {
-    return nodes;
-  }
-
-  /**
-   * Returns an iterator over the nodes in this lattice.
-   * 
-   * @return An iterator over the nodes in this lattice.
-   */
-  public Iterator<Node<Value>> iterator() {
-    return nodes.iterator();
-  }
-
-  /**
-   * Returns the number of nodes in this lattice.
-   * 
-   * @return The number of nodes in this lattice.
-   */
-  public int size() {
-    return nodes.size();
-  }
-
-  /**
-   * Calculate the all-pairs shortest path for all pairs of nodes.
-   * <p>
-   * Note: This method assumes no backward arcs. If there are backward arcs, the returned shortest
-   * path costs for that node may not be accurate.
-   * 
-   * @param nodes A list of nodes which must be in topological order.
-   * @return The all-pairs shortest path for all pairs of nodes.
-   */
-  private ChartSpan<Integer> calculateAllPairsShortestPath() {
-
-    ChartSpan<Integer> distance = new ChartSpan<Integer>(nodes.size() - 1, Integer.MAX_VALUE);
-    distance.setDiagonal(0);
-
-    /* Mark reachability between immediate neighbors */
-    for (Node<Value> tail : nodes) {
-      for (Arc<Value> arc : tail.getOutgoingArcs()) {
-        Node<Value> head = arc.getHead();
-        distance.set(tail.id(), head.id(), 1);
-      }
-    }
-
-    int size = nodes.size();
-
-    for (int width = 2; width <= size; width++) {
-      for (int i = 0; i < size - width; i++) {
-        int j = i + width;
-        for (int k = i + 1; k < j; k++) {
-          distance.set(i, j, Math.min(distance.get(i, j), distance.get(i, k) + distance.get(k, j)));
-        }
-      }
-    }
-
-    return distance;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder s = new StringBuilder();
-
-    for (Node<Value> start : this) {
-      for (Arc<Value> arc : start.getOutgoingArcs()) {
-        s.append(arc.toString());
-        s.append('\n');
-      }
-    }
-
-    return s.toString();
-  }
-
-  public static void main(String[] args) {
-
-    List<Node<String>> nodes = new ArrayList<Node<String>>();
-    for (int i = 0; i < 4; i++) {
-      nodes.add(new Node<String>(i));
-    }
-
-    nodes.get(0).addArc(nodes.get(1), 1.0f, "x");
-    nodes.get(1).addArc(nodes.get(2), 1.0f, "y");
-    nodes.get(0).addArc(nodes.get(2), 1.5f, "a");
-    nodes.get(2).addArc(nodes.get(3), 3.0f, "b");
-    nodes.get(2).addArc(nodes.get(3), 5.0f, "c");
-
-    Lattice<String> graph = new Lattice<String>(nodes, null);
-
-    System.out.println("Shortest path from 0 to 3: " + graph.getShortestPath(0, 3));
-  }
-
-  /**
-   * Replaced the arc from node i to j with the supplied lattice. This is used to do OOV
-   * segmentation of words in a lattice.
-   * 
-   * @param i
-   * @param j
-   * @param lattice
-   */
-  public void insert(int i, int j, List<Node<Value>> newNodes) {
-    
-    nodes.get(i).setOutgoingArcs(newNodes.get(0).getOutgoingArcs());
-    
-    newNodes.remove(0);
-    nodes.remove(j);
-    Collections.reverse(newNodes);
-    
-    for (Node<Value> node: newNodes)
-      nodes.add(j, node);
-  
-    this.latticeHasAmbiguity = false;
-    for (int x = 0; x < nodes.size(); x++) {
-      nodes.get(x).setID(x);
-      this.latticeHasAmbiguity |= (nodes.get(x).getOutgoingArcs().size() > 1);
-    }
-    
-    this.distances = null;
-  }
-
-  /**
-   * Topologically sorts the nodes and reassigns their numbers. Assumes that the first node is the
-   * source, but otherwise assumes nothing about the input.
-   * 
-   * Probably correct, but untested.
-   */
-  @SuppressWarnings("unused")
-  private void topologicalSort() {
-    HashMap<Node<Value>, List<Arc<Value>>> outgraph = new HashMap<Node<Value>, List<Arc<Value>>>();
-    HashMap<Node<Value>, List<Arc<Value>>> ingraph = new HashMap<Node<Value>, List<Arc<Value>>>();
-    for (Node<Value> node: nodes) {
-      ArrayList<Arc<Value>> arcs = new ArrayList<Arc<Value>>();
-      for (Arc<Value> arc: node.getOutgoingArcs()) {
-        arcs.add(arc);
-        
-        if (! ingraph.containsKey(arc.getHead()))
-          ingraph.put(arc.getHead(), new ArrayList<Arc<Value>>());
-        ingraph.get(arc.getHead()).add(arc);
-        
-        outgraph.put(node, arcs);
-      }
-    }
-    
-    ArrayList<Node<Value>> sortedNodes = new ArrayList<Node<Value>>();
-    Stack<Node<Value>> stack = new Stack<Node<Value>>();
-    stack.push(nodes.get(0));
-    
-    while (! stack.empty()) {
-      Node<Value> node = stack.pop();
-      sortedNodes.add(node);
-      for (Arc<Value> arc: outgraph.get(node)) {
-        outgraph.get(node).remove(arc);
-        ingraph.get(arc.getHead()).remove(arc);
-        
-        if (ingraph.get(arc.getHead()).size() == 0)
-          sortedNodes.add(arc.getHead());
-      }
-    }
-    
-    int id = 0;
-    for (Node<Value> node : sortedNodes)
-      node.setID(id++);
-    
-    this.nodes = sortedNodes;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/lattice/Node.java
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/Node.java b/src/joshua/lattice/Node.java
deleted file mode 100644
index 31dcea9..0000000
--- a/src/joshua/lattice/Node.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.lattice;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * A node in a directed graph.
- * 
- * @author Lane Schwartz
- * @since 2008-07-08
- * 
- * @param <Label> Type of label associated with an arc.
- */
-public class Node<Label> {
-
-  // ===============================================================
-  // Member variables
-  // ===============================================================
-
-  /**
-   * Numeric integer identifier of this node. Package-private scope so that Lattice can quickly
-   * access this variable.
-   */
-  private Integer id;
-
-  /**
-   * Arcs which begin at this node. Package-private scope so that Lattice can quickly access this
-   * variable.
-   */
-  private List<Arc<Label>> outgoingArcs;
-
-
-  // ===============================================================
-  // Constructor(s)
-  // ===============================================================
-
-  /**
-   * Constructs a new node with the specified numeric identifier.
-   */
-  public Node(int id) {
-    this.id = id;
-    this.outgoingArcs = new ArrayList<Arc<Label>>();
-  }
-
-
-  // ===========================================================
-  // Accessor methods (set/get)
-  // ===========================================================
-
-  /**
-   * Gets the numeric integer identifier of this node.
-   * 
-   * @return Numeric integer identifier of this node.
-   */
-  public int getNumber() {
-    return id;
-    
-  }
-  
-  public int id() {
-    return id;
-  }
-  
-  public void setID(int i) {
-    this.id = i;
-  }
-
-  /**
-   * Gets the arcs that begin at this node.
-   * 
-   * @return The arcs that begin at this node.
-   */
-  public List<Arc<Label>> getOutgoingArcs() {
-    return outgoingArcs;
-  }
-
-  public void setOutgoingArcs(List<Arc<Label>> arcs) {
-    outgoingArcs = arcs;
-  }
-
-  /**
-   * Gets an iterable object capable of iterating over all nodes directly reachable from this node.
-   * This will be all nodes which are the target of an outgoing arc from this node.
-   * 
-   * @return An iterable object capable of iterating over all nodes directly reachable from this
-   *         node.
-   */
-  public Iterable<Node<Label>> reachableNodes() {
-    final Iterator<Arc<Label>> arcIterator = outgoingArcs.iterator();
-
-    return new Iterable<Node<Label>>() {
-      public Iterator<Node<Label>> iterator() {
-        return new Iterator<Node<Label>>() {
-
-          public boolean hasNext() {
-            return arcIterator.hasNext();
-          }
-
-          public Node<Label> next() {
-            return arcIterator.next().getHead();
-          }
-
-          public void remove() {
-            throw new UnsupportedOperationException();
-          }
-        };
-      }
-    };
-  }
-
-
-  /**
-   * Adds a new outgoing arc to this node that points to the specified destination. The new arc will
-   * have the specified weight and specified label.
-   * 
-   * @param destination Destination node of the new outgoing arc.
-   * @param weight Weight of the new outgoing arc.
-   * @param label Label of the new outgoing arc.
-   */
-  public void addArc(Node<Label> destination, float weight, Label label) {
-    outgoingArcs.add(new Arc<Label>(this, destination, weight, label));
-  }
-
-
-  /**
-   * Gets the number of outgoing arcs that begin at this node.
-   * 
-   * @return The number of outgoing arcs that begin at this node.
-   */
-  public int size() {
-    return outgoingArcs.size();
-  }
-
-  @Override
-  public String toString() {
-    return "Node-" + id;
-  }
-
-}