You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/23 22:17:55 UTC

[38/50] [abbrv] incubator-joshua git commit: Merge branch 'master' into 7

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 355a6f1,0000000..5c123f9
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@@ -1,746 -1,0 +1,743 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.PriorityQueue;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.chart_parser.DotChart.DotNode;
 +import org.apache.joshua.decoder.ff.FeatureFunction;
 +import org.apache.joshua.decoder.ff.SourceDependentFF;
 +import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
 +import org.apache.joshua.decoder.ff.tm.Grammar;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.RuleCollection;
 +import org.apache.joshua.decoder.ff.tm.Trie;
 +import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.hypergraph.HyperGraph;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.decoder.segment_file.Token;
 +import org.apache.joshua.lattice.Arc;
 +import org.apache.joshua.lattice.Lattice;
 +import org.apache.joshua.lattice.Node;
 +import org.apache.joshua.util.ChartSpan;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * Chart class this class implements chart-parsing: (1) seeding the chart (2)
 + * cky main loop over bins, (3) identify applicable rules in each bin
 + * 
 + * Note: the combination operation will be done in Cell
 + * 
 + * Signatures of class: Cell: i, j SuperNode (used for CKY check): i,j, lhs
 + * HGNode ("or" node): i,j, lhs, edge ngrams HyperEdge ("and" node)
 + * 
 + * index of sentences: start from zero index of cell: cell (i,j) represent span
 + * of words indexed [i,j-1] where i is in [0,n-1] and j is in [1,n]
 + * 
 + * @author Zhifei Li, zhifei.work@gmail.com
 + * @author Matt Post post@cs.jhu.edu
 + */
 +
 +public class Chart {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(Chart.class);
 +  private final JoshuaConfiguration config;
 +  // ===========================================================
 +  // Statistics
 +  // ===========================================================
 +
 +  /**
 +   * how many items have been pruned away because its cost is greater than the
 +   * cutoff in calling chart.add_deduction_in_chart()
 +   */
 +  int nMerged = 0;
 +  int nAdded = 0;
 +  int nDotitemAdded = 0; // note: there is no pruning in dot-item
 +
 +  public Sentence getSentence() {
 +    return this.sentence;
 +  }
 +  
 +  // ===============================================================
 +  // Private instance fields (maybe could be protected instead)
 +  // ===============================================================
-   private ChartSpan<Cell> cells; // note that in some cell, it might be null
-   private int sourceLength;
-   private List<FeatureFunction> featureFunctions;
-   private Grammar[] grammars;
-   private DotChart[] dotcharts; // each grammar should have a dotchart associated with it
++  private final ChartSpan<Cell> cells; // note that in some cell, it might be null
++  private final int sourceLength;
++  private final List<FeatureFunction> featureFunctions;
++  private final Grammar[] grammars;
++  private final DotChart[] dotcharts; // each grammar should have a dotchart associated with it
 +  private Cell goalBin;
 +  private int goalSymbolID = -1;
-   private Lattice<Token> inputLattice;
++  private final Lattice<Token> inputLattice;
 +
 +  private Sentence sentence = null;
 +//  private SyntaxTree parseTree;
 +  private StateConstraint stateConstraint;
 +
 +
 +  // ===============================================================
 +  // Constructors
 +  // ===============================================================
 +
 +  /*
 +   * TODO: Once the Segment interface is adjusted to provide a Lattice<String>
 +   * for the sentence() method, we should just accept a Segment instead of the
 +   * sentence, segmentID, and constraintSpans parameters. We have the symbol
 +   * table already, so we can do the integerization here instead of in
 +   * DecoderThread. GrammarFactory.getGrammarForSentence will want the
 +   * integerized sentence as well, but then we'll need to adjust that interface
 +   * to deal with (non-trivial) lattices too. Of course, we get passed the
 +   * grammars too so we could move all of that into here.
 +   */
 +
 +  public Chart(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars,
 +      String goalSymbol, JoshuaConfiguration config) {
 +    this.config = config;
 +    this.inputLattice = sentence.getLattice();
 +    this.sourceLength = inputLattice.size() - 1;
 +    this.featureFunctions = featureFunctions;
 +
 +    this.sentence = sentence;
 +
 +    // TODO: OOV handling no longer handles parse tree input (removed after
 +    // commit 748eb69714b26dd67cba8e7c25a294347603bede)
 +//    this.parseTree = null;
 +//    if (sentence instanceof ParsedSentence)
 +//      this.parseTree = ((ParsedSentence) sentence).syntaxTree();
 +//
-     this.cells = new ChartSpan<Cell>(sourceLength, null);
++    this.cells = new ChartSpan<>(sourceLength, null);
 +
 +    this.goalSymbolID = Vocabulary.id(goalSymbol);
 +    this.goalBin = new Cell(this, this.goalSymbolID);
 +
 +    /* Create the grammars, leaving space for the OOV grammar. */
 +    this.grammars = new Grammar[grammars.length + 1];
-     for (int i = 0; i < grammars.length; i++)
-       this.grammars[i + 1] = grammars[i];
++    System.arraycopy(grammars, 0, this.grammars, 1, grammars.length);
 +
 +    MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config, 20);
 +    AbstractGrammar.addOOVRules(oovGrammar, sentence.getLattice(), featureFunctions,
 +        this.config.true_oovs_only);
 +    this.grammars[0] = oovGrammar;
 +
 +    // each grammar will have a dot chart
 +    this.dotcharts = new DotChart[this.grammars.length];
 +    for (int i = 0; i < this.grammars.length; i++)
 +      this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this);
 +
 +    // Begin to do initialization work
 +
 +    stateConstraint = null;
 +    if (sentence.target() != null)
 +      // stateConstraint = new StateConstraint(sentence.target());
 +      stateConstraint = new StateConstraint(Vocabulary.START_SYM + " " + sentence.target() + " "
 +          + Vocabulary.STOP_SYM);
 +
 +    /* Find the SourceDependent feature and give it access to the sentence. */
-     for (FeatureFunction ff : this.featureFunctions)
-       if (ff instanceof SourceDependentFF)
-         ((SourceDependentFF) ff).setSource(sentence);
++    this.featureFunctions.stream().filter(ff -> ff instanceof SourceDependentFF)
++        .forEach(ff -> ((SourceDependentFF) ff).setSource(sentence));
 +
 +    LOG.debug("Finished seeding chart.");
 +  }
 +
 +  /**
 +   * Manually set the goal symbol ID. The constructor expects a String
 +   * representing the goal symbol, but there may be time (say, for example, in
 +   * the second pass of a synchronous parse) where we want to set the goal
 +   * symbol to a particular ID (regardless of String representation).
 +   * <p>
 +   * This method should be called before expanding the chart, as chart expansion
 +   * depends on the goal symbol ID.
 +   * 
 +   * @param i the id of the goal symbol to use
 +   */
 +  public void setGoalSymbolID(int i) {
 +    this.goalSymbolID = i;
 +    this.goalBin = new Cell(this, i);
-     return;
 +  }
 +
 +  // ===============================================================
 +  // The primary method for filling in the chart
 +  // ===============================================================
 +
 +  /**
 +   * Construct the hypergraph with the help from DotChart using cube pruning.
 +   * Cube pruning occurs at the span level, with all completed rules from the
 +   * dot chart competing against each other; that is, rules with different
 +   * source sides *and* rules sharing a source side but with different target
 +   * sides are all in competition with each other.
 +   * 
 +   * Terminal rules are added to the chart directly.
 +   * 
 +   * Rules with nonterminals are added to the list of candidates. The candidates
 +   * list is seeded with the list of all rules and, for each nonterminal in the
 +   * rule, the 1-best tail node for that nonterminal and subspan. If the maximum
 +   * arity of a rule is R, then the dimension of the hypercube is R + 1, since
 +   * the first dimension is used to record the rule.
 +   */
 +  private void completeSpan(int i, int j) {
 +
 +    /* STEP 1: create the heap, and seed it with all of the candidate states */
-     PriorityQueue<CubePruneState> candidates = new PriorityQueue<CubePruneState>();
++    PriorityQueue<CubePruneState> candidates = new PriorityQueue<>();
 +
 +    /*
 +     * Look at all the grammars, seeding the chart with completed rules from the
 +     * DotChart
 +     */
 +    for (int g = 0; g < grammars.length; g++) {
 +      if (!grammars[g].hasRuleForSpan(i, j, inputLattice.distance(i, j))
 +          || null == dotcharts[g].getDotCell(i, j))
 +        continue;
 +
 +      // for each rule with applicable rules
 +      for (DotNode dotNode : dotcharts[g].getDotCell(i, j).getDotNodes()) {
 +        RuleCollection ruleCollection = dotNode.getRuleCollection();
 +        if (ruleCollection == null)
 +          continue;
 +
 +        List<Rule> rules = ruleCollection.getSortedRules(this.featureFunctions);
 +        SourcePath sourcePath = dotNode.getSourcePath();
 +
 +        if (null == rules || rules.size() == 0)
 +          continue;
 +
 +        if (ruleCollection.getArity() == 0) {
 +          /*
 +           * The total number of arity-0 items (pre-terminal rules) that we add
 +           * is controlled by num_translation_options in the configuration.
 +           * 
 +           * We limit the translation options per DotNode; that is, per LHS.
 +           */
 +          int numTranslationsAdded = 0;
 +
 +          /* Terminal productions are added directly to the chart */
 +          for (Rule rule : rules) {
 +
 +            if (config.num_translation_options > 0
 +                && numTranslationsAdded >= config.num_translation_options) {
 +              break;
 +            }
 +
 +            ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, rule, null, i,
 +                j, sourcePath, this.sentence);
 +
 +            if (stateConstraint == null || stateConstraint.isLegal(result.getDPStates())) {
 +              getCell(i, j).addHyperEdgeInCell(result, rule, i, j, null, sourcePath, true);
 +              numTranslationsAdded++;
 +            }
 +          }
 +        } else {
 +          /* Productions with rank > 0 are subject to cube pruning */
 +
 +          Rule bestRule = rules.get(0);
 +
-           List<HGNode> currentTailNodes = new ArrayList<HGNode>();
++          List<HGNode> currentTailNodes = new ArrayList<>();
 +          List<SuperNode> superNodes = dotNode.getAntSuperNodes();
 +          for (SuperNode si : superNodes) {
 +            currentTailNodes.add(si.nodes.get(0));
 +          }
 +
 +          /*
 +           * `ranks` records the current position in the cube. the 0th index is
 +           * the rule, and the remaining indices 1..N correspond to the tail
 +           * nodes (= nonterminals in the rule). These tail nodes are
 +           * represented by SuperNodes, which group together items with the same
 +           * nonterminal but different DP state (e.g., language model state)
 +           */
 +          int[] ranks = new int[1 + superNodes.size()];
 +          Arrays.fill(ranks, 1);
 +
 +          ComputeNodeResult result = new ComputeNodeResult(featureFunctions, bestRule,
 +              currentTailNodes, i, j, sourcePath, sentence);
 +          CubePruneState bestState = new CubePruneState(result, ranks, rules, currentTailNodes,
 +              dotNode);
 +          candidates.add(bestState);
 +        }
 +      }
 +    }
 +
 +    applyCubePruning(i, j, candidates);
 +  }
 +
 +  /**
 +   * Applies cube pruning over a span.
 +   * 
 +   * @param i
 +   * @param j
 +   * @param stateConstraint
 +   * @param candidates
 +   */
 +  private void applyCubePruning(int i, int j, PriorityQueue<CubePruneState> candidates) {
 +
 +    // System.err.println(String.format("CUBEPRUNE: %d-%d with %d candidates",
 +    // i, j, candidates.size()));
 +    // for (CubePruneState cand: candidates) {
 +    // System.err.println(String.format("  CAND " + cand));
 +    // }
 +
 +    /*
 +     * There are multiple ways to reach each point in the cube, so short-circuit
 +     * that.
 +     */
-     HashSet<CubePruneState> visitedStates = new HashSet<CubePruneState>();
++    HashSet<CubePruneState> visitedStates = new HashSet<>();
 +
 +    int popLimit = config.pop_limit;
 +    int popCount = 0;
 +    while (candidates.size() > 0 && ((++popCount <= popLimit) || popLimit == 0)) {
 +      CubePruneState state = candidates.poll();
 +
 +      DotNode dotNode = state.getDotNode();
 +      List<Rule> rules = state.rules;
 +      SourcePath sourcePath = dotNode.getSourcePath();
 +      List<SuperNode> superNodes = dotNode.getAntSuperNodes();
 +
 +      /*
 +       * Add the hypothesis to the chart. This can only happen if (a) we're not
 +       * doing constrained decoding or (b) we are and the state is legal.
 +       */
 +      if (stateConstraint == null || stateConstraint.isLegal(state.getDPStates())) {
 +        getCell(i, j).addHyperEdgeInCell(state.computeNodeResult, state.getRule(), i, j,
 +            state.antNodes, sourcePath, true);
 +      }
 +
 +      /*
 +       * Expand the hypothesis by walking down a step along each dimension of
 +       * the cube, in turn. k = 0 means we extend the rule being used; k > 0
 +       * expands the corresponding tail node.
 +       */
 +
 +      for (int k = 0; k < state.ranks.length; k++) {
 +
 +        /* Copy the current ranks, then extend the one we're looking at. */
 +        int[] nextRanks = new int[state.ranks.length];
 +        System.arraycopy(state.ranks, 0, nextRanks, 0, state.ranks.length);
 +        nextRanks[k]++;
 +
 +        /*
 +         * We might have reached the end of something (list of rules or tail
 +         * nodes)
 +         */
 +        if (k == 0
 +            && (nextRanks[k] > rules.size() || (config.num_translation_options > 0 && nextRanks[k] > config.num_translation_options)))
 +          continue;
 +        else if ((k != 0 && nextRanks[k] > superNodes.get(k - 1).nodes.size()))
 +          continue;
 +
 +        /* Use the updated ranks to assign the next rule and tail node. */
 +        Rule nextRule = rules.get(nextRanks[0] - 1);
 +        // HGNode[] nextAntNodes = new HGNode[state.antNodes.size()];
-         List<HGNode> nextAntNodes = new ArrayList<HGNode>(state.antNodes.size());
++        List<HGNode> nextAntNodes = new ArrayList<>(state.antNodes.size());
 +        for (int x = 0; x < state.ranks.length - 1; x++)
 +          nextAntNodes.add(superNodes.get(x).nodes.get(nextRanks[x + 1] - 1));
 +
 +        /* Create the next state. */
 +        CubePruneState nextState = new CubePruneState(new ComputeNodeResult(featureFunctions,
 +            nextRule, nextAntNodes, i, j, sourcePath, this.sentence), nextRanks, rules,
 +            nextAntNodes, dotNode);
 +
 +        /* Skip states that have been explored before. */
 +        if (visitedStates.contains(nextState))
 +          continue;
 +
 +        visitedStates.add(nextState);
 +        candidates.add(nextState);
 +      }
 +    }
 +  }
 +
 +  /* Create a priority queue of candidates for each span under consideration */
 +  private PriorityQueue<CubePruneState>[] allCandidates;
 +
 +  private ArrayList<SuperNode> nodeStack;
 +
 +  /**
 +   * Translates the sentence using the CKY+ variation proposed in
 +   * "A CYK+ Variant for SCFG Decoding Without A Dot Chart" (Sennrich, SSST
 +   * 2014).
 +   */
 +  private int i = -1;
 +
 +  public HyperGraph expandSansDotChart() {
 +    for (i = sourceLength - 1; i >= 0; i--) {
 +      allCandidates = new PriorityQueue[sourceLength - i + 2];
 +      for (int id = 0; id < allCandidates.length; id++)
-         allCandidates[id] = new PriorityQueue<CubePruneState>();
++        allCandidates[id] = new PriorityQueue<>();
 +
-       nodeStack = new ArrayList<SuperNode>();
++      nodeStack = new ArrayList<>();
 +
 +      for (int j = i + 1; j <= sourceLength; j++) {
 +        if (!sentence.hasPath(i, j))
 +          continue;
 +
-         for (int g = 0; g < this.grammars.length; g++) {
++        for (Grammar grammar : this.grammars) {
 +          // System.err.println(String.format("\n*** I=%d J=%d GRAMMAR=%d", i, j, g));
 +
 +          if (j == i + 1) {
 +            /* Handle terminals */
 +            Node<Token> node = sentence.getNode(i);
 +            for (Arc<Token> arc : node.getOutgoingArcs()) {
 +              int word = arc.getLabel().getWord();
 +              // disallow lattice decoding for now
 +              assert arc.getHead().id() == j;
-               Trie trie = this.grammars[g].getTrieRoot().match(word);
++              Trie trie = grammar.getTrieRoot().match(word);
 +              if (trie != null && trie.hasRules())
 +                addToChart(trie, j, false);
 +            }
 +          } else {
 +            /* Recurse for non-terminal case */
-             consume(this.grammars[g].getTrieRoot(), i, j - 1);
++            consume(grammar.getTrieRoot(), i, j - 1);
 +          }
 +        }
 +
 +        // Now that we've accumulated all the candidates, apply cube pruning
 +        applyCubePruning(i, j, allCandidates[j - i]);
 +
 +        // Add unary nodes
 +        addUnaryNodes(this.grammars, i, j);
 +      }
 +    }
 +
 +    // transition_final: setup a goal item, which may have many deductions
 +    if (null == this.cells.get(0, sourceLength)
 +        || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
 +            this.sourceLength)) {
 +      LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
 +          sentence.id());
 +      return null;
 +    }
 +
 +    return new HyperGraph(this.goalBin.getSortedNodes().get(0), -1, -1, this.sentence);
 +  }
 +
 +  /**
 +   * Recursively consumes the trie, following input nodes, finding applicable
 +   * rules and adding them to bins for each span for later cube pruning.
 +   * 
 +   * @param dotNode data structure containing information about what's been
 +   *          already matched
 +   * @param l extension point we're looking at
 +   * 
 +   */
 +  private void consume(Trie trie, int j, int l) {
 +    /*
 +     * 1. If the trie node has any rules, we can add them to the collection
 +     * 
 +     * 2. Next, look at all the outgoing nonterminal arcs of the trie node. If
 +     * any of them match an existing chart item, then we know we can extend
 +     * (i,j) to (i,l). We then recurse for all m from l+1 to n (the end of the
 +     * sentence)
 +     * 
 +     * 3. We also try to match terminals if (j + 1 == l)
 +     */
 +
 +    // System.err.println(String.format("CONSUME %s / %d %d %d", dotNode,
 +    // dotNode.begin(), dotNode.end(), l));
 +
 +    // Try to match terminals
 +    if (inputLattice.distance(j, l) == 1) {
 +      // Get the current sentence node, and explore all outgoing arcs, since we
 +      // might be decoding
 +      // a lattice. For sentence decoding, this is trivial: there is only one
 +      // outgoing arc.
 +      Node<Token> inputNode = sentence.getNode(j);
 +      for (Arc<Token> arc : inputNode.getOutgoingArcs()) {
 +        int word = arc.getLabel().getWord();
 +        Trie nextTrie;
 +        if ((nextTrie = trie.match(word)) != null) {
 +          // add to chart item over (i, l)
 +          addToChart(nextTrie, arc.getHead().id(), i == j);
 +        }
 +      }
 +    }
 +
 +    // Now try to match nonterminals
 +    Cell cell = cells.get(j, l);
 +    if (cell != null) {
 +      for (int id : cell.getKeySet()) { // for each supernode (lhs), see if you
 +                                        // can match a trie
 +        Trie nextTrie = trie.match(id);
 +        if (nextTrie != null) {
 +          SuperNode superNode = cell.getSuperNode(id);
 +          nodeStack.add(superNode);
 +          addToChart(nextTrie, superNode.end(), i == j);
 +          nodeStack.remove(nodeStack.size() - 1);
 +        }
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Adds all rules at a trie node to the chart, unless its a unary rule. A
 +   * unary rule is the first outgoing arc of a grammar's root trie. For
 +   * terminals, these are added during the seeding stage; for nonterminals,
 +   * these confuse cube pruning and can result in infinite loops, and are
 +   * handled separately (see addUnaryNodes());
 +   * 
 +   * @param trie the grammar node
 +   * @param isUnary whether the rules at this dotnode are unary
 +   */
 +  private void addToChart(Trie trie, int j, boolean isUnary) {
 +
 +    // System.err.println(String.format("ADD TO CHART %s unary=%s", dotNode,
 +    // isUnary));
 +
 +    if (!isUnary && trie.hasRules()) {
-       DotNode dotNode = new DotNode(i, j, trie, new ArrayList<SuperNode>(nodeStack), null);
++      DotNode dotNode = new DotNode(i, j, trie, new ArrayList<>(nodeStack), null);
 +
 +      addToCandidates(dotNode);
 +    }
 +
 +    for (int l = j + 1; l <= sentence.length(); l++)
 +      consume(trie, j, l);
 +  }
 +
 +  /**
 +   * Record the completed rule with backpointers for later cube-pruning.
 +   * 
 +   * @param width
 +   * @param rules
 +   * @param tailNodes
 +   */
 +  private void addToCandidates(DotNode dotNode) {
 +    // System.err.println(String.format("ADD TO CANDIDATES %s AT INDEX %d",
 +    // dotNode, dotNode.end() - dotNode.begin()));
 +
 +    // TODO: one entry per rule, or per rule instantiation (rule together with
 +    // unique matching of input)?
 +    List<Rule> rules = dotNode.getRuleCollection().getSortedRules(featureFunctions);
 +    Rule bestRule = rules.get(0);
 +    List<SuperNode> superNodes = dotNode.getAntSuperNodes();
 +
-     List<HGNode> tailNodes = new ArrayList<HGNode>();
++    List<HGNode> tailNodes = new ArrayList<>();
 +    for (SuperNode superNode : superNodes)
 +      tailNodes.add(superNode.nodes.get(0));
 +
 +    int[] ranks = new int[1 + superNodes.size()];
 +    Arrays.fill(ranks, 1);
 +
 +    ComputeNodeResult result = new ComputeNodeResult(featureFunctions, bestRule, tailNodes,
 +        dotNode.begin(), dotNode.end(), dotNode.getSourcePath(), sentence);
 +    CubePruneState seedState = new CubePruneState(result, ranks, rules, tailNodes, dotNode);
 +
 +    allCandidates[dotNode.end() - dotNode.begin()].add(seedState);
 +  }
 +
 +  /**
 +   * This function performs the main work of decoding.
 +   * 
 +   * @return the hypergraph containing the translated sentence.
 +   */
 +  public HyperGraph expand() {
 +
 +    for (int width = 1; width <= sourceLength; width++) {
 +      for (int i = 0; i <= sourceLength - width; i++) {
 +        int j = i + width;
 +        if (LOG.isDebugEnabled())
 +          LOG.debug("Processing span ({}, {})", i, j);
 +
 +        /* Skips spans for which no path exists (possible in lattices). */
 +        if (inputLattice.distance(i, j) == Float.POSITIVE_INFINITY) {
 +          continue;
 +        }
 +
 +        /*
 +         * 1. Expand the dot through all rules. This is a matter of (a) look for
 +         * rules over (i,j-1) that need the terminal at (j-1,j) and looking at
 +         * all split points k to expand nonterminals.
 +         */
 +        if (LOG.isDebugEnabled())
 +          LOG.debug("Expanding cell");
 +        for (int k = 0; k < this.grammars.length; k++) {
 +          /**
 +           * Each dotChart can act individually (without consulting other
 +           * dotCharts) because it either consumes the source input or the
 +           * complete nonTerminals, which are both grammar-independent.
 +           **/
 +          this.dotcharts[k].expandDotCell(i, j);
 +        }
 +
 +        /*
 +         * 2. The regular CKY part: add completed items onto the chart via cube
 +         * pruning.
 +         */
 +        if (LOG.isDebugEnabled())
 +          LOG.debug("Adding complete items into chart");
 +        completeSpan(i, j);
 +
 +        /* 3. Process unary rules. */
 +        if (LOG.isDebugEnabled())
 +          LOG.debug("Adding unary items into chart");
 +        addUnaryNodes(this.grammars, i, j);
 +
 +        // (4)=== in dot_cell(i,j), add dot-nodes that start from the /complete/
 +        // superIterms in
 +        // chart_cell(i,j)
 +        if (LOG.isDebugEnabled())
 +          LOG.debug("Initializing new dot-items that start from complete items in this cell");
 +        for (int k = 0; k < this.grammars.length; k++) {
 +          if (this.grammars[k].hasRuleForSpan(i, j, inputLattice.distance(i, j))) {
 +            this.dotcharts[k].startDotItems(i, j);
 +          }
 +        }
 +
 +        /*
 +         * 5. Sort the nodes in the cell.
 +         * 
 +         * Sort the nodes in this span, to make them usable for future
 +         * applications of cube pruning.
 +         */
 +        if (null != this.cells.get(i, j)) {
 +          this.cells.get(i, j).getSortedNodes();
 +        }
 +      }
 +    }
 +
 +    logStatistics();
 +
 +    // transition_final: setup a goal item, which may have many deductions
 +    if (null == this.cells.get(0, sourceLength)
 +        || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
 +            this.sourceLength)) {
 +      LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
 +          sentence.id());
 +      return null;
 +    }
 +
 +    if (LOG.isDebugEnabled())
 +      LOG.debug("Finished expand");
 +    return new HyperGraph(this.goalBin.getSortedNodes().get(0), -1, -1, this.sentence);
 +  }
 +
 +  /**
 +   * Get the requested cell, creating the entry if it doesn't already exist.
 +   * 
 +   * @param i span start
 +   * @param j span end
 +   * @return the cell item
 +   */
 +  public Cell getCell(int i, int j) {
 +    assert i >= 0;
 +    assert i <= sentence.length();
 +    assert i <= j;
 +    if (cells.get(i, j) == null)
 +      cells.set(i, j, new Cell(this, goalSymbolID));
 +
 +    return cells.get(i, j);
 +  }
 +
 +  // ===============================================================
 +  // Private methods
 +  // ===============================================================
 +
 +  private void logStatistics() {
 +    if (LOG.isDebugEnabled())
 +      LOG.debug("Input {}: Chart: added {} merged {} dot-items added: {}",
 +          this.sentence.id(), this.nAdded, this.nMerged, this.nDotitemAdded);
 +  }
 +
 +  /**
 +   * Handles expansion of unary rules. Rules are expanded in an agenda-based
 +   * manner to avoid constructing infinite unary chains. Assumes a triangle
 +   * inequality of unary rule expansion (e.g., A -> B will always be cheaper
 +   * than A -> C -> B), which is not a true assumption.
 +   * 
 +   * @param grammars A list of the grammars for the sentence
 +   * @param i
 +   * @param j
 +   * @return the number of nodes added
 +   */
 +  private int addUnaryNodes(Grammar[] grammars, int i, int j) {
 +
 +    Cell chartBin = this.cells.get(i, j);
 +    if (null == chartBin) {
 +      return 0;
 +    }
 +    int qtyAdditionsToQueue = 0;
-     ArrayList<HGNode> queue = new ArrayList<HGNode>(chartBin.getSortedNodes());
-     HashSet<Integer> seen_lhs = new HashSet<Integer>();
++    ArrayList<HGNode> queue = new ArrayList<>(chartBin.getSortedNodes());
++    HashSet<Integer> seen_lhs = new HashSet<>();
 +
 +    if (LOG.isDebugEnabled())
 +      LOG.debug("Adding unary to [{}, {}]", i, j);
 +
 +    while (queue.size() > 0) {
 +      HGNode node = queue.remove(0);
 +      seen_lhs.add(node.lhs);
 +
 +      for (Grammar gr : grammars) {
 +        if (!gr.hasRuleForSpan(i, j, inputLattice.distance(i, j)))
 +          continue;
 +
 +        /*
 +         * Match against the node's LHS, and then make sure the rule collection
 +         * has unary rules
 +         */
 +        Trie childNode = gr.getTrieRoot().match(node.lhs);
 +        if (childNode != null && childNode.getRuleCollection() != null
 +            && childNode.getRuleCollection().getArity() == 1) {
 +
-           ArrayList<HGNode> antecedents = new ArrayList<HGNode>();
++          ArrayList<HGNode> antecedents = new ArrayList<>();
 +          antecedents.add(node);
 +
 +          List<Rule> rules = childNode.getRuleCollection().getSortedRules(this.featureFunctions);
 +          for (Rule rule : rules) { // for each unary rules
 +
 +            ComputeNodeResult states = new ComputeNodeResult(this.featureFunctions, rule,
 +                antecedents, i, j, new SourcePath(), this.sentence);
 +            HGNode resNode = chartBin.addHyperEdgeInCell(states, rule, i, j, antecedents,
 +                new SourcePath(), true);
 +
 +            if (LOG.isDebugEnabled())
 +              LOG.debug("{}", rule);
 +            if (null != resNode && !seen_lhs.contains(resNode.lhs)) {
 +              queue.add(resNode);
 +              qtyAdditionsToQueue++;
 +            }
 +          }
 +        }
 +      }
 +    }
 +    return qtyAdditionsToQueue;
 +  }
 +
 +  /***
 +   * Add a terminal production (X -&gt; english phrase) to the hypergraph.
 +   * 
 +   * @param i the start index
 +   * @param j stop index
 +   * @param rule the terminal rule applied
 +   * @param srcPath the source path cost
 +   */
 +  public void addAxiom(int i, int j, Rule rule, SourcePath srcPath) {
 +    if (null == this.cells.get(i, j)) {
 +      this.cells.set(i, j, new Cell(this, this.goalSymbolID));
 +    }
 +
 +    this.cells.get(i, j).addHyperEdgeInCell(
 +        new ComputeNodeResult(this.featureFunctions, rule, null, i, j, srcPath, sentence), rule, i,
 +        j, null, srcPath, false);
 +
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index 1fb1031,0000000..0e7cd6d
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@@ -1,226 -1,0 +1,223 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.util.ArrayList;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.ff.FeatureFunction;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.ff.StatefulFF;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.hypergraph.HyperEdge;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * This class computes the cost of applying a rule.
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + * @author Zhifei Li, zhifei.work@gmail.com
 + */
 +
 +public class ComputeNodeResult {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(ComputeNodeResult.class);
 +
 +  // The cost incurred by the rule itself (and all associated feature functions)
 +  private float transitionCost;
 +
 +  // transitionCost + the Viterbi costs of the tail nodes.
 +  private float viterbiCost;
 +
-   // viterbiCost + a future estimate (outside cost estimate).
-   private float pruningCostEstimate;
- 
++  // The future or outside cost (estimated)
++  private float futureCostEstimate;
++  
 +  // The StateComputer objects themselves serve as keys.
-   private List<DPState> dpStates;
++  private final List<DPState> dpStates;
 +
 +  /**
 +   * Computes the new state(s) that are produced when applying the given rule to the list of tail
 +   * nodes. Also computes a range of costs of doing so (the transition cost, the total (Viterbi)
 +   * cost, and a score that includes a future cost estimate).
 +   * 
 +   * Old version that doesn't use the derivation state.
 +   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
 +   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to use when computing th node result
 +   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source lattice
 +   * @param sentence the lattice input
 +   */
 +  public ComputeNodeResult(List<FeatureFunction> featureFunctions, Rule rule, List<HGNode> tailNodes,
 +      int i, int j, SourcePath sourcePath, Sentence sentence) {
 +
 +    // The total Viterbi cost of this edge. This is the Viterbi cost of the tail nodes, plus
 +    // whatever costs we incur applying this rule to create a new hyperedge.
-     float viterbiCost = 0.0f;
++    this.viterbiCost = 0.0f;
 +    
 +    if (LOG.isDebugEnabled()) {
 +      LOG.debug("ComputeNodeResult():");
 +      LOG.debug("-> RULE {}", rule);
 +    }
 +
 +    /*
 +     * Here we sum the accumulated cost of each of the tail nodes. The total cost of the new
 +     * hyperedge (the inside or Viterbi cost) is the sum of these nodes plus the cost of the
 +     * transition. Note that this could and should all be generalized to whatever semiring is being
 +     * used.
 +     */
 +    if (null != tailNodes) {
 +      for (HGNode item : tailNodes) {
 +        if (LOG.isDebugEnabled()) {
 +          LOG.debug("-> item.bestedge: {}", item);
 +          LOG.debug("-> TAIL NODE {}", item);
 +        }
 +        viterbiCost += item.bestHyperedge.getBestDerivationScore();
 +      }
 +    }
 +
-     List<DPState> allDPStates = new ArrayList<DPState>();
++    List<DPState> allDPStates = new ArrayList<>();
 +
 +    // The transition cost is the new cost incurred by applying this rule
-     float transitionCost = 0.0f;
++    this.transitionCost = 0.0f;
 +
 +    // The future cost estimate is a heuristic estimate of the outside cost of this edge.
-     float futureCostEstimate = 0.0f;
++    this.futureCostEstimate = 0.0f;
 +
 +    /*
 +     * We now iterate over all the feature functions, computing their cost and their expected future
 +     * cost.
 +     */
 +    for (FeatureFunction feature : featureFunctions) {
 +      FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator(); 
 +
 +      DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
-       transitionCost += acc.getScore();
++      this.transitionCost += acc.getScore();
 +
 +
 +      if (LOG.isDebugEnabled()) {
 +        LOG.debug("FEATURE {} = {} * {} = {}", feature.getName(),
 +            acc.getScore() / Decoder.weights.getOrDefault(hashFeature(feature.getName())),
 +            Decoder.weights.getOrDefault(hashFeature(feature.getName())), acc.getScore());
 +      }
 +
 +      if (feature.isStateful()) {
 +        futureCostEstimate += feature.estimateFutureCost(rule, newState, sentence);
 +        allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
 +      }
 +    }
-     viterbiCost += transitionCost;
++    this.viterbiCost += transitionCost;
 +    if (LOG.isDebugEnabled())
 +      LOG.debug("-> COST = {}", transitionCost);
-     // Set the final results.
-     this.pruningCostEstimate = viterbiCost + futureCostEstimate;
-     this.viterbiCost = viterbiCost;
-     this.transitionCost = transitionCost;
++
 +    this.dpStates = allDPStates;
 +  }
 +
 +  /**
 +   * This is called from {@link org.apache.joshua.decoder.chart_parser.Cell} 
 +   * when making the final transition to the goal state.
 +   * This is done to allow feature functions to correct for partial estimates, since
 +   * they now have the knowledge that the whole sentence is complete. Basically, this
 +   * is only used by LanguageModelFF, which does not score partial n-grams, and therefore
 +   * needs to correct for this when a short sentence ends. KenLMFF corrects for this by
 +   * always scoring partial hypotheses, and subtracting off the partial score when longer
 +   * context is available. This would be good to do for the LanguageModelFF feature function,
 +   * too: it makes search better (more accurate at the beginning, for example), and would
 +   * also do away with the need for the computeFinal* class of functions (and hooks in
 +   * the feature function interface).
 +   * 
 +   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
 +   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
 +   * @param i todo
 +   * @param j todo
 +   * @param sourcePath information about a path taken through the source lattice
 +   * @param sentence the lattice input
 +   * @return the final cost for the Node
 +   */
 +  public static float computeFinalCost(List<FeatureFunction> featureFunctions,
 +      List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence) {
 +
 +    float cost = 0;
 +    for (FeatureFunction ff : featureFunctions) {
 +      cost += ff.computeFinalCost(tailNodes.get(0), i, j, sourcePath, sentence);
 +    }
 +    return cost;
 +  }
 +
 +  public static FeatureVector computeTransitionFeatures(List<FeatureFunction> featureFunctions,
 +      HyperEdge edge, int i, int j, Sentence sentence) {
 +
 +    // Initialize the set of features with those that were present with the rule in the grammar.
 +    FeatureVector featureDelta = new FeatureVector(featureFunctions.size());
 +
 +    // === compute feature logPs
 +    for (FeatureFunction ff : featureFunctions) {
 +      // A null rule signifies the final transition.
 +      if (edge.getRule() == null)
 +        featureDelta.addInPlace(ff.computeFinalFeatures(edge.getTailNodes().get(0), i, j, edge.getSourcePath(), sentence));
 +      else {
 +        featureDelta.addInPlace(ff.computeFeatures(edge.getRule(), edge.getTailNodes(), i, j, edge.getSourcePath(), sentence));
 +      }
 +    }
 +
 +    return featureDelta;
 +  }
 +
++  public float getFutureEstimate() {
++    return this.futureCostEstimate;
++  }
++  
 +  public float getPruningEstimate() {
-     return this.pruningCostEstimate;
++    return getViterbiCost() + getFutureEstimate();
 +  }
 +
 +  /**
-    *  The complete cost of the Viterbi derivation at this point
++   *  The complete cost of the Viterbi derivation at this point.
++   *  
 +   *  @return float representing cost
 +   */
 +  public float getViterbiCost() {
 +    return this.viterbiCost;
 +  }
 +
 +  public float getBaseCost() {
 +    return getViterbiCost() - getTransitionCost();
 +  }
 +
 +  /**
 +   * The cost incurred by this edge alone
 +   * 
 +   * @return float representing cost
 +   */
 +  public float getTransitionCost() {
 +    return this.transitionCost;
 +  }
 +
 +  public List<DPState> getDPStates() {
 +    return this.dpStates;
 +  }
- 
-   public void printInfo() {
-     System.out.println("scores: " + transitionCost + "; " + viterbiCost + "; "
-         + pruningCostEstimate);
-   }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
index d57a6a2,0000000..1f06d30
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
@@@ -1,114 -1,0 +1,112 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import java.util.Arrays;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.chart_parser.DotChart.DotNode;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +
 +// ===============================================================
 +// CubePruneState class
 +// ===============================================================
 +public class CubePruneState implements Comparable<CubePruneState> {
-   int[] ranks;
-   ComputeNodeResult computeNodeResult;
-   List<HGNode> antNodes;
-   List<Rule> rules;
++  final int[] ranks;
++  final ComputeNodeResult computeNodeResult;
++  final List<HGNode> antNodes;
++  final List<Rule> rules;
 +  private DotNode dotNode;
 +
 +  public CubePruneState(ComputeNodeResult score, int[] ranks, List<Rule> rules, List<HGNode> antecedents, DotNode dotNode) {
 +    this.computeNodeResult = score;
 +    this.ranks = ranks;
 +    this.rules = rules;
 +    this.antNodes = antecedents;
 +    this.dotNode = dotNode;
 +  }
 +
 +  /**
 +   * This returns the list of DP states associated with the result.
 +   * 
 +   * @return
 +   */
 +  List<DPState> getDPStates() {
 +    return this.computeNodeResult.getDPStates();
 +  }
 +  
 +  Rule getRule() {
 +    return this.rules.get(this.ranks[0]-1);
 +  }
 +
 +  public String toString() {
-     StringBuilder sb = new StringBuilder();
-     sb.append("STATE ||| rule=" + getRule() + " inside cost = " + computeNodeResult.getViterbiCost()
-         + " estimate = " + computeNodeResult.getPruningEstimate());
-     return sb.toString();
++    String sb = "STATE ||| rule=" + getRule() + " inside cost = " +
++        computeNodeResult.getViterbiCost() + " estimate = " +
++        computeNodeResult.getPruningEstimate();
++    return sb;
 +  }
 +
 +  public void setDotNode(DotNode node) {
 +    this.dotNode = node;
 +  }
 +
 +  public DotNode getDotNode() {
 +    return this.dotNode;
 +  }
 +
 +  public boolean equals(Object obj) {
 +    if (obj == null)
 +      return false;
 +    if (!this.getClass().equals(obj.getClass()))
 +      return false;
 +    CubePruneState state = (CubePruneState) obj;
 +    if (state.ranks.length != ranks.length)
 +      return false;
 +    for (int i = 0; i < ranks.length; i++)
 +      if (state.ranks[i] != ranks[i])
 +        return false;
-     if (getDotNode() != state.getDotNode())
-       return false;
++    return getDotNode() == state.getDotNode();
 +
-     return true;
 +  }
 +
 +  public int hashCode() {
 +    int hash = (dotNode != null) ? dotNode.hashCode() : 0;
 +    hash += Arrays.hashCode(ranks);
 +
 +    return hash;
 +  }
 +
 +  /**
 +   * Compares states by ExpectedTotalLogP, allowing states to be sorted according to their inverse
 +   * order (high-prob first).
 +   */
 +  public int compareTo(CubePruneState another) {
 +    if (this.computeNodeResult.getPruningEstimate() < another.computeNodeResult
 +        .getPruningEstimate()) {
 +      return 1;
 +    } else if (this.computeNodeResult.getPruningEstimate() == another.computeNodeResult
 +        .getPruningEstimate()) {
 +      return 0;
 +    } else {
 +      return -1;
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
index 71c4f03,0000000..8b5c81a
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
@@@ -1,476 -1,0 +1,474 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.tm.Grammar;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.RuleCollection;
 +import org.apache.joshua.decoder.ff.tm.Trie;
 +import org.apache.joshua.decoder.segment_file.Token;
 +import org.apache.joshua.lattice.Arc;
 +import org.apache.joshua.lattice.Lattice;
 +import org.apache.joshua.lattice.Node;
 +import org.apache.joshua.util.ChartSpan;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * The DotChart handles Earley-style implicit binarization of translation rules.
 + * 
 + * The {@link DotNode} object represents the (possibly partial) application of a synchronous rule.
 + * The implicit binarization is maintained with a pointer to the {@link Trie} node in the grammar,
 + * for easy retrieval of the next symbol to be matched. At every span (i,j) of the input sentence,
 + * every incomplete DotNode is examined to see whether it (a) needs a terminal and matches against
 + * the final terminal of the span or (b) needs a nonterminal and matches against a completed
 + * nonterminal in the main chart at some split point (k,j).
 + * 
 + * Once a rule is completed, it is entered into the {@link DotChart}. {@link DotCell} objects are
 + * used to group completed DotNodes over a span.
 + * 
 + * There is a separate DotChart for every grammar.
 + * 
 + * @author Zhifei Li, <zh...@gmail.com>
 + * @author Matt Post <po...@cs.jhu.edu>
 + * @author Kristy Hollingshead Seitz
 + */
 +class DotChart {
 +
 +  // ===============================================================
 +  // Static fields
 +  // ===============================================================
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(DotChart.class);
 +
 +
 +  // ===============================================================
 +  // Package-protected instance fields
 +  // ===============================================================
 +  /**
 +   * Two-dimensional chart of cells. Some cells might be null. This could definitely be represented
 +   * more efficiently, since only the upper half of this triangle is every used.
 +   */
-   private ChartSpan<DotCell> dotcells;
++  private final ChartSpan<DotCell> dotcells;
 +
 +  public DotCell getDotCell(int i, int j) {
 +    return dotcells.get(i, j);
 +  }
 +
 +  // ===============================================================
 +  // Private instance fields (maybe could be protected instead)
 +  // ===============================================================
 +
 +  /**
 +   * CKY+ style parse chart in which completed span entries are stored.
 +   */
-   private Chart dotChart;
++  private final Chart dotChart;
 +
 +  /**
 +   * Translation grammar which contains the translation rules.
 +   */
-   private Grammar pGrammar;
++  private final Grammar pGrammar;
 +
 +  /* Length of input sentence. */
 +  private final int sentLen;
 +
 +  /* Represents the input sentence being translated. */
 +  private final Lattice<Token> input;
 +
 +  // ===============================================================
 +  // Constructors
 +  // ===============================================================
 +
 +  // TODO: Maybe this should be a non-static inner class of Chart. That would give us implicit
 +  // access to all the arguments of this constructor. Though we would need to take an argument, i,
 +  // to know which Chart.this.grammars[i] to use.
 +
 +  /**
 +   * Constructs a new dot chart from a specified input lattice, a translation grammar, and a parse
 +   * chart.
 +   * 
 +   * @param input A lattice which represents an input sentence.
 +   * @param grammar A translation grammar.
 +   * @param chart A CKY+ style chart in which completed span entries are stored.
 +   */
 +  public DotChart(Lattice<Token> input, Grammar grammar, Chart chart) {
 +
 +    this.dotChart = chart;
 +    this.pGrammar = grammar;
 +    this.input = input;
 +    this.sentLen = input.size();
-     this.dotcells = new ChartSpan<DotCell>(sentLen, null);
++    this.dotcells = new ChartSpan<>(sentLen, null);
 +
 +    seed();
 +  }
 +
 +  /**
 +   * Add initial dot items: dot-items pointer to the root of the grammar trie.
 +   */
 +  void seed() {
 +    for (int j = 0; j <= sentLen - 1; j++) {
 +      if (pGrammar.hasRuleForSpan(j, j, input.distance(j, j))) {
 +        if (null == pGrammar.getTrieRoot()) {
 +          throw new RuntimeException("trie root is null");
 +        }
 +        addDotItem(pGrammar.getTrieRoot(), j, j, null, null, new SourcePath());
 +      }
 +    }
 +  }
 +
 +  /**
 +   * This function computes all possible expansions of all rules over the provided span (i,j). By
 +   * expansions, we mean the moving of the dot forward (from left to right) over a nonterminal or
 +   * terminal symbol on the rule's source side.
 +   * 
 +   * There are two kinds of expansions:
 +   * 
 +   * <ol>
 +   * <li>Expansion over a nonterminal symbol. For this kind of expansion, a rule has a dot
 +   * immediately prior to a source-side nonterminal. The main Chart is consulted to see whether
 +   * there exists a completed nonterminal with the same label. If so, the dot is advanced.
 +   * 
 +   * Discovering nonterminal expansions is a matter of enumerating all split points k such that i <
 +   * k and k < j. The nonterminal symbol must exist in the main Chart over (k,j).
 +   * 
 +   * <li>Expansion over a terminal symbol. In this case, expansion is a simple matter of determing
 +   * whether the input symbol at position j (the end of the span) matches the next symbol in the
 +   * rule. This is equivalent to choosing a split point k = j - 1 and looking for terminal symbols
 +   * over (k,j). Note that phrases in the input rule are handled one-by-one as we consider longer
 +   * spans.
 +   * </ol>
 +   */
 +  void expandDotCell(int i, int j) {
 +    if (LOG.isDebugEnabled())
 +      LOG.debug("Expanding dot cell ({}, {})", i, j);
 +
 +    /*
 +     * (1) If the dot is just to the left of a non-terminal variable, we look for theorems or axioms
 +     * in the Chart that may apply and extend the dot position. We look for existing axioms over all
 +     * spans (k,j), i < k < j.
 +     */
 +    for (int k = i + 1; k < j; k++) {
 +      extendDotItemsWithProvedItems(i, k, j, false);
 +    }
 +
 +    /*
 +     * (2) If the the dot-item is looking for a source-side terminal symbol, we simply match against
 +     * the input sentence and advance the dot.
 +     */
 +    Node<Token> node = input.getNode(j - 1);
 +    for (Arc<Token> arc : node.getOutgoingArcs()) {
 +
 +      int last_word = arc.getLabel().getWord();
 +      int arc_len = arc.getHead().getNumber() - arc.getTail().getNumber();
 +
 +      // int last_word=foreign_sent[j-1]; // input.getNode(j-1).getNumber(); //
 +
 +      if (null != dotcells.get(i, j - 1)) {
 +        // dotitem in dot_bins[i][k]: looking for an item in the right to the dot
 +
 +
 +        for (DotNode dotNode : dotcells.get(i, j - 1).getDotNodes()) {
 +
 +          // String arcWord = Vocabulary.word(last_word);
 +          // Assert.assertFalse(arcWord.endsWith("]"));
 +          // Assert.assertFalse(arcWord.startsWith("["));
 +          // logger.info("DotChart.expandDotCell: " + arcWord);
 +
 +          // List<Trie> child_tnodes = ruleMatcher.produceMatchingChildTNodesTerminalevel(dotNode,
 +          // last_word);
 +
 +          List<Trie> child_tnodes = null;
 +
 +          Trie child_node = dotNode.trieNode.match(last_word);
 +          if (null != child_node) {
 +            addDotItem(child_node, i, j - 1 + arc_len, dotNode.antSuperNodes, null,
 +                dotNode.srcPath.extend(arc));
 +          }
 +        }
 +      }
 +    }
 +  }
 +
 +  /**
 +   * note: (i,j) is a non-terminal, this cannot be a cn-side terminal, which have been handled in
 +   * case2 of dotchart.expand_cell add dotitems that start with the complete super-items in
 +   * cell(i,j)
 +   */
 +  void startDotItems(int i, int j) {
 +    extendDotItemsWithProvedItems(i, i, j, true);
 +  }
 +
 +  // ===============================================================
 +  // Private methods
 +  // ===============================================================
 +
 +  /**
 +   * Attempt to combine an item in the dot chart with an item in the main chart to create a new item
 +   * in the dot chart. The DotChart item is a {@link DotNode} begun at position i with the dot
 +   * currently at position k, that is, a partially-applied rule.
 +   * 
 +   * In other words, this method looks for (proved) theorems or axioms in the completed chart that
 +   * may apply and extend the dot position.
 +   * 
 +   * @param i Start index of a dot chart item
 +   * @param k End index of a dot chart item; start index of a completed chart item
 +   * @param j End index of a completed chart item
 +   * @param skipUnary if true, don't extend unary rules
 +   */
 +  private void extendDotItemsWithProvedItems(int i, int k, int j, boolean skipUnary) {
 +    if (this.dotcells.get(i, k) == null || this.dotChart.getCell(k, j) == null) {
 +      return;
 +    }
 +
 +    // complete super-items (items over the same span with different LHSs)
-     List<SuperNode> superNodes = new ArrayList<SuperNode>(this.dotChart.getCell(k, j)
-         .getSortedSuperItems().values());
++    List<SuperNode> superNodes = new ArrayList<>(this.dotChart.getCell(k, j).getSortedSuperItems().values());
 +
 +    /* For every partially complete item over (i,k) */
 +    for (DotNode dotNode : dotcells.get(i, k).dotNodes) {
 +      /* For every completed nonterminal in the main chart */
 +      for (SuperNode superNode : superNodes) {
 +
 +        // String arcWord = Vocabulary.word(superNode.lhs);
 +        // logger.info("DotChart.extendDotItemsWithProvedItems: " + arcWord);
 +        // Assert.assertTrue(arcWord.endsWith("]"));
 +        // Assert.assertTrue(arcWord.startsWith("["));
 +
 +        /*
 +         * Regular Expression matching allows for a regular-expression style rules in the grammar,
 +         * which allows for a very primitive treatment of morphology. This is an advanced,
 +         * undocumented feature that introduces a complexity, in that the next "word" in the grammar
 +         * rule might match more than one outgoing arc in the grammar trie.
 +         */
 +        Trie child_node = dotNode.getTrieNode().match(superNode.lhs);
 +        if (child_node != null) {
 +          if ((!skipUnary) || (child_node.hasExtensions())) {
 +            addDotItem(child_node, i, j, dotNode.getAntSuperNodes(), superNode, dotNode
 +                .getSourcePath().extendNonTerminal());
 +          }
 +        }
 +      }
 +    }
 +  }
 +
 +  /*
 +   * We introduced the ability to have regular expressions in rules for matching against terminals.
 +   * For example, you could have the rule
 +   * 
 +   * <pre> [X] ||| l?s herman?s ||| siblings </pre>
 +   * 
 +   * When this is enabled for a grammar, we need to test against *all* (positive) outgoing arcs of
 +   * the grammar trie node to see if any of them match, and then return the whole set. This is quite
 +   * expensive, which is why you should only enable regular expressions for small grammars.
 +   */
 +
 +  private ArrayList<Trie> matchAll(DotNode dotNode, int wordID) {
 +    ArrayList<Trie> trieList = new ArrayList<>();
 +    HashMap<Integer, ? extends Trie> childrenTbl = dotNode.trieNode.getChildren();
 +
 +    if (childrenTbl != null && wordID >= 0) {
 +      // get all the extensions, map to string, check for *, build regexp
 +      for (Map.Entry<Integer, ? extends Trie> entry : childrenTbl.entrySet()) {
 +        Integer arcID = entry.getKey();
 +        if (arcID == wordID) {
 +          trieList.add(entry.getValue());
 +        } else {
 +          String arcWord = Vocabulary.word(arcID);
 +          if (Vocabulary.word(wordID).matches(arcWord)) {
 +            trieList.add(entry.getValue());
 +          }
 +        }
 +      }
 +    }
 +    return trieList;
 +  }
 +
 +
 +  /**
 +   * Creates a {@link DotNode} and adds it into the {@link DotChart} at the correct place. These
 +   * are (possibly incomplete) rule applications. 
 +   * 
 +   * @param tnode the trie node pointing to the location ("dot") in the grammar trie
 +   * @param i
 +   * @param j
 +   * @param antSuperNodesIn the supernodes representing the rule's tail nodes
 +   * @param curSuperNode the lefthand side of the rule being created
 +   * @param srcPath the path taken through the input lattice
 +   */
 +  private void addDotItem(Trie tnode, int i, int j, ArrayList<SuperNode> antSuperNodesIn,
 +      SuperNode curSuperNode, SourcePath srcPath) {
-     ArrayList<SuperNode> antSuperNodes = new ArrayList<SuperNode>();
++    ArrayList<SuperNode> antSuperNodes = new ArrayList<>();
 +    if (antSuperNodesIn != null) {
 +      antSuperNodes.addAll(antSuperNodesIn);
 +    }
 +    if (curSuperNode != null) {
 +      antSuperNodes.add(curSuperNode);
 +    }
 +
 +    DotNode item = new DotNode(i, j, tnode, antSuperNodes, srcPath);
 +    if (dotcells.get(i, j) == null) {
 +      dotcells.set(i, j, new DotCell());
 +    }
 +    dotcells.get(i, j).addDotNode(item);
 +    dotChart.nDotitemAdded++;
 +
 +    if (LOG.isDebugEnabled()) {
 +      LOG.debug("Add a dotitem in cell ({}, {}), n_dotitem={}, {}", i, j,
 +          dotChart.nDotitemAdded, srcPath);
 +
 +      RuleCollection rules = tnode.getRuleCollection();
 +      if (rules != null) {
 +        for (Rule r : rules.getRules()) {
 +          // System.out.println("rule: "+r.toString());
 +          LOG.debug("{}", r);
 +        }
 +      }
 +    }
 +  }
 +
 +  // ===============================================================
 +  // Package-protected classes
 +  // ===============================================================
 +
 +  /**
 +   * A DotCell groups together DotNodes that have been applied over a particular span. A DotNode, in
 +   * turn, is a partially-applied grammar rule, represented as a pointer into the grammar trie
 +   * structure.
 +   */
 +  static class DotCell {
 +
 +    // Package-protected fields
-     private List<DotNode> dotNodes = new ArrayList<DotNode>();
++    private final List<DotNode> dotNodes = new ArrayList<>();
 +
 +    public List<DotNode> getDotNodes() {
 +      return dotNodes;
 +    }
 +
 +    private void addDotNode(DotNode dt) {
 +      /*
 +       * if(l_dot_items==null) l_dot_items= new ArrayList<DotItem>();
 +       */
 +      dotNodes.add(dt);
 +    }
 +  }
 +
 +  /**
 +   * A DotNode represents the partial application of a rule rooted to a particular span (i,j). It
 +   * maintains a pointer to the trie node in the grammar for efficient mapping.
 +   */
 +  static class DotNode {
 +
-     private int i, j;
++    private final int i;
++    private final int j;
 +    private Trie trieNode = null;
 +    
 +    /* A list of grounded (over a span) nonterminals that have been crossed in traversing the rule */
 +    private ArrayList<SuperNode> antSuperNodes = null;
 +    
 +    /* The source lattice cost of applying the rule */
-     private SourcePath srcPath;
++    private final SourcePath srcPath;
 +
 +    @Override
 +    public String toString() {
 +      int size = 0;
 +      if (trieNode != null && trieNode.getRuleCollection() != null)
 +        size = trieNode.getRuleCollection().getRules().size();
 +      return String.format("DOTNODE i=%d j=%d #rules=%d #tails=%d", i, j, size, antSuperNodes.size());
 +    }
 +    
 +    /**
 +     * Initialize a dot node with the span, grammar trie node, list of supernode tail pointers, and
 +     * the lattice sourcepath.
 +     * 
 +     * @param i
 +     * @param j
 +     * @param trieNode
 +     * @param antSuperNodes
 +     * @param srcPath
 +     */
 +    public DotNode(int i, int j, Trie trieNode, ArrayList<SuperNode> antSuperNodes, SourcePath srcPath) {
 +      this.i = i;
 +      this.j = j;
 +      this.trieNode = trieNode;
 +      this.antSuperNodes = antSuperNodes;
 +      this.srcPath = srcPath;
 +    }
 +
 +    public boolean equals(Object obj) {
 +      if (obj == null)
 +        return false;
 +      if (!this.getClass().equals(obj.getClass()))
 +        return false;
 +      DotNode state = (DotNode) obj;
 +
 +      /*
 +       * Technically, we should be comparing the span inforamtion as well, but that would require us
 +       * to store it, increasing memory requirements, and we should be able to guarantee that we
 +       * won't be comparing DotNodes across spans.
 +       */
 +      // if (this.i != state.i || this.j != state.j)
 +      // return false;
 +
-       if (this.trieNode != state.trieNode)
-         return false;
++      return this.trieNode == state.trieNode;
 +
-       return true;
 +    }
 +
 +    /**
 +     * Technically the hash should include the span (i,j), but since DotNodes are grouped by span,
 +     * this isn't necessary, and we gain something by not having to store the span.
 +     */
 +    public int hashCode() {
 +      return this.trieNode.hashCode();
 +    }
 +
 +    // convenience function
 +    public boolean hasRules() {
 +      return getTrieNode().getRuleCollection() != null && getTrieNode().getRuleCollection().getRules().size() != 0;
 +    }
 +    
 +    public RuleCollection getRuleCollection() {
 +      return getTrieNode().getRuleCollection();
 +    }
 +
 +    public Trie getTrieNode() {
 +      return trieNode;
 +    }
 +
 +    public SourcePath getSourcePath() {
 +      return srcPath;
 +    }
 +
 +    public ArrayList<SuperNode> getAntSuperNodes() {
 +      return antSuperNodes;
 +    }
 +
 +    public int begin() {
 +      return i;
 +    }
 +    
 +    public int end() {
 +      return j;
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
index 1d96149,0000000..efc6688
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
@@@ -1,63 -1,0 +1,63 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import org.apache.joshua.decoder.segment_file.Token;
 +import org.apache.joshua.lattice.Arc;
 +
 +/**
 + * This class represents information about a path taken through the source lattice.
 + * 
 + * <p>This implementation only tracks the source path cost which is assumed to be a scalar value.
 + *       If you need multiple values, or want to recover more detailed path statistics, you'll need
 + *       to update this code.
 + */
 +public class SourcePath {
 +
 +  private final float pathCost;
 +
 +  public SourcePath() {
 +    pathCost = 0.0f;
 +  }
 +
 +  private SourcePath(float cost) {
 +    pathCost = cost;
 +  }
 +
 +  public float getPathCost() {
 +    return pathCost;
 +  }
 +
 +  public SourcePath extend(Arc<Token> srcEdge) {
-     float tcost = (float) srcEdge.getCost();
++    float tcost = srcEdge.getCost();
 +    if (tcost == 0.0)
 +      return this;
 +    else
-       return new SourcePath(pathCost + (float) srcEdge.getCost());
++      return new SourcePath(pathCost + srcEdge.getCost());
 +  }
 +
 +  public SourcePath extendNonTerminal() {
 +    return this;
 +  }
 +
 +  public String toString() {
 +    return "SourcePath.cost=" + pathCost;
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
index d21ceca,0000000..b6b27c8
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
@@@ -1,75 -1,0 +1,74 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import java.util.Collection;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
 +
 +/**
 + * This class provides constraints on the sorts of states that are permitted in the chart. Its
 + * original motivation was to be used as a means of doing forced decoding, which is accomplished by
 + * forcing all n-gram states that are created to match the target string.
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + * 
 + */
 +public class StateConstraint {
 +  private String target = null;
 +
 +  public StateConstraint(String target) {
 +    this.target = " <s> " + target + " </s> ";
 +  }
 +
 +  /**
 +   * Determines if all of the states passed in are legal in light of the input that was passed
 +   * earlier. Currently only defined for n-gram states.
 +   * 
 +   * @param dpStates {@link java.util.Collection} of {@link org.apache.joshua.decoder.ff.state_maintenance.DPState}'s
 +   * @return whether the states are legal in light of the target side sentence
 +   */
 +  public boolean isLegal(Collection<DPState> dpStates) {
 +    /*
 +     * Iterate over all the state-contributing objects associated with the new state, querying
 +     * n-gram ones (of which there is probably only one), allowing them to veto the move.
 +     */
 +    for (DPState dpState : dpStates) {
 +      if (dpState instanceof NgramDPState) {
 +        // Build a regular expression out of the state context.
 +        String leftWords = " "
 +            + Vocabulary.getWords(((NgramDPState) dpState).getLeftLMStateWords()) + " ";
 +        String rightWords = " "
 +            + Vocabulary.getWords(((NgramDPState) dpState).getRightLMStateWords()) + " ";
 +
 +        int leftPos = this.target.indexOf(leftWords);
 +        int rightPos = this.target.lastIndexOf(rightWords);
 +
-         boolean legal = (leftPos != -1 && leftPos <= rightPos);
- //        System.err.println(String.format("  isLegal(%s @ %d,%s @ %d) = %s", leftWords, leftPos,
++        //        System.err.println(String.format("  isLegal(%s @ %d,%s @ %d) = %s", leftWords, leftPos,
 +//         rightWords, rightPos, legal));
 +
-         return legal;
++        return (leftPos != -1 && leftPos <= rightPos);
 +      }
 +    }
 +
 +    return true;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
index a7c6e34,0000000..f228836
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
@@@ -1,62 -1,0 +1,62 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.chart_parser;
 +
 +import java.util.ArrayList;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +
 +/**
 + * Represents a list of items in the hypergraph that have the same left-hand side but may have
 + * different LM states.
 + * 
 + * @author Zhifei Li
 + */
 +class SuperNode {
 +
 +  /** Common left-hand side state. */
 +  final int lhs;
 +
 +  /**
 +   * List of hypergraph nodes, each of which has its own language model state.
 +   */
 +  final List<HGNode> nodes;
 +
 +  /**
 +   * All nodes in a SuperNode have the same start and end points, so we pick the first one and
 +   * return it.
 +   * 
 +   * @return
 +   */
 +  public int end() {
 +    return nodes.get(0).j;
 +  }
 +  
 +  
 +  /**
 +   * Constructs a super item defined by a common left-hand side.
 +   * 
 +   * @param lhs Left-hand side token
 +   */
 +  public SuperNode(int lhs) {
 +    this.lhs = lhs;
-     this.nodes = new ArrayList<HGNode>();
++    this.nodes = new ArrayList<>();
 +  }
 +}