You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/12 10:38:12 UTC

incubator-joshua git commit: Merge branch 'master' into 7

Repository: incubator-joshua
Updated Branches:
  refs/heads/7 b0b706272 -> bd09600be


Merge branch 'master' into 7

Some problems remain with phrase-based tests.

# Conflicts:
#	joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
#	joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
#	joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
#	joshua-core/src/test/resources/decoder/phrase/constrained/config
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/log
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/output
#	src/test/resources/decoder/phrase/constrained/config
#	src/test/resources/decoder/phrase/constrained/joshua.config


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/bd09600b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/bd09600b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/bd09600b

Branch: refs/heads/7
Commit: bd09600bee650ca04aa2e8519e3e4716fffec813
Parents: b0b7062
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Sep 12 12:27:13 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Sep 12 12:27:13 2016 +0200

----------------------------------------------------------------------
 demo/README.md                                  |  28 ++++++++---
 examples/training/ghkm.sh                       |   3 +-
 examples/training/hiero-europarl.sh             |   3 +-
 examples/training/hiero.sh                      |   3 +-
 examples/training/phrase.sh                     |   3 +-
 examples/training/samt.sh                       |   3 +-
 .../joshua/decoder/ff/phrase/Distortion.java    |  35 ++++++-------
 .../decoder/ff/tm/packed/PackedGrammar.java     |  23 ++++++---
 .../hypergraph/OutputStringExtractor.java       |  22 ++++++--
 .../apache/joshua/decoder/phrase/Candidate.java |   2 +-
 .../joshua/decoder/phrase/Hypothesis.java       |   8 +--
 .../org/apache/joshua/decoder/phrase/Stack.java |  24 +++++----
 .../apache/joshua/decoder/phrase/Stacks.java    |   3 +-
 .../org/apache/joshua/tools/GrammarPacker.java  |   8 ++-
 .../decoder/phrase/decode/rules.packed/config   |   2 -
 .../decoder/phrase/decode/rules.packed/encoding | Bin 87 -> 0 bytes
 .../decode/rules.packed/slice_00000.features    | Bin 4128858 -> 0 bytes
 .../decode/rules.packed/slice_00000.source      | Bin 1982228 -> 0 bytes
 .../decode/rules.packed/slice_00000.target      | Bin 1463856 -> 0 bytes
 .../rules.packed/slice_00000.target.lookup      | Bin 28 -> 0 bytes
 .../phrase/decode/rules.packed/vocabulary       | Bin 169225 -> 0 bytes
 .../decoder/phrase/include-align-index/log      |  50 -------------------
 .../decoder/phrase/include-align-index/output   |   1 -
 .../decoder/phrase/include-align-index/test.sh  |   2 +-
 .../grammar/sparse-features/test-packed.sh      |   2 +-
 .../resources/grammar/sparse-features/test.sh   |   2 +-
 .../src/test/resources/server/http/test.sh      |   2 +-
 .../src/test/resources/server/tcp-text/test.sh  |   2 +-
 scripts/training/pipeline.pl                    |   7 ++-
 scripts/training/run_thrax.py                   |   7 +--
 scripts/training/templates/thrax-phrase.conf    |  14 ++----
 31 files changed, 120 insertions(+), 139 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/demo/README.md
----------------------------------------------------------------------
diff --git a/demo/README.md b/demo/README.md
index d086532..b2520ec 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -3,15 +3,22 @@ feed it sentences, add custom rules, and view the translations.
 
 There are two steps:
 
-1. Start Joshua in server mode
-
+1. Start Joshua in server mode. The minimal set of parameters needed
+   are.
+   
+       -server-type http -server-port 5674
+       
+   So, for example, if you have a pre-built model or language pack,
+   you can simply add these parameters to the invocation. To start a
+   server with an empty model, you can type the following:
+    
        $JOSHUA/bin/joshua -server-type http -server-port 5674 \
          -feature-function OOVPenalty \
          -feature-function "PhrasePenalty -owner custom" \
          -weight-overwrite "OOVPenalty 1 PhrasePenalty -1" \
          -mark-oovs -lowercase -projectcase -output-format %S
 
-   Alternately, you can use the config file in this directory, which
+   Equivalently, you can use the config file in this directory, which
    contains all the above parameteres, and simply run it like this:
 
        $JOSHUA/bin/joshua -config demo.config
@@ -20,14 +27,19 @@ There are two steps:
    pre-built model, such as Joshua's language packs.
 
    Command-line arguments override values in the config file, so if
-   you need to change the port only, you can do:
+   you need to change the port only, you can use the following
    
        $JOSHUA/bin/joshua -config demo.config -server-port 5675
        
-1. Load the index.html file, and make sure the values in the "Parameters"
-   tab match your server settings above. You can also pass these values
-   in the query string, e.g.,
+2. Next, load the index.html file, and make sure the values in the
+   "Parameters" tab match your server settings above. You can also
+   pass these values in the query string, e.g.,
 
        index.html?port=5674&server=localhost
    
-That's it!
+   The web demo will connect to the server via AJAX queries using
+   Joshua's RESTful interface. You can translate data, experiment with
+   runtime parameters, and add new rules.
+
+That's it! Please direct comments or questions to Joshua's user
+mailing list: user@joshua.apache.org.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/examples/training/ghkm.sh
----------------------------------------------------------------------
diff --git a/examples/training/ghkm.sh b/examples/training/ghkm.sh
index 5ac77ec..18d36d6 100644
--- a/examples/training/ghkm.sh
+++ b/examples/training/ghkm.sh
@@ -45,5 +45,4 @@ $JOSHUA/scripts/training/pipeline.pl \
     --joshua-mem 20g \
     --packer-mem 8g \
     --tuner mira \
-    --maxlen 80 \
-    --optimizer-runs 5
+    --maxlen 80

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/examples/training/hiero-europarl.sh
----------------------------------------------------------------------
diff --git a/examples/training/hiero-europarl.sh b/examples/training/hiero-europarl.sh
index a76d519..e980533 100644
--- a/examples/training/hiero-europarl.sh
+++ b/examples/training/hiero-europarl.sh
@@ -38,5 +38,4 @@ $JOSHUA/bin/pipeline.pl \
     --tuner mert \
     --joshua-mem 20g \
     --packer-mem 16g \
-    --lmfile /path/to/gigaword/lmfile \
-    --optimizer-runs 5
+    --lmfile /path/to/gigaword/lmfile

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/examples/training/hiero.sh
----------------------------------------------------------------------
diff --git a/examples/training/hiero.sh b/examples/training/hiero.sh
index 67050e5..5062fb1 100644
--- a/examples/training/hiero.sh
+++ b/examples/training/hiero.sh
@@ -33,5 +33,4 @@ $JOSHUA/bin/pipeline.pl \
     --threads 8 \
     --tuner mert \
     --joshua-mem 10g \
-    --packer-mem 8g \
-    --optimizer-runs 5
+    --packer-mem 8g

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/examples/training/phrase.sh
----------------------------------------------------------------------
diff --git a/examples/training/phrase.sh b/examples/training/phrase.sh
index c99c732..223ae10 100644
--- a/examples/training/phrase.sh
+++ b/examples/training/phrase.sh
@@ -36,5 +36,4 @@ $JOSHUA/bin/pipeline.pl \
     --threads 8 \
     --tuner mert \
     --joshua-mem 5g \
-    --packer-mem 8g \
-    --optimizer-runs 5
+    --packer-mem 8g

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/examples/training/samt.sh
----------------------------------------------------------------------
diff --git a/examples/training/samt.sh b/examples/training/samt.sh
index f8ff7e0..d5e1ce7 100644
--- a/examples/training/samt.sh
+++ b/examples/training/samt.sh
@@ -36,5 +36,4 @@ $JOSHUA/bin/pipeline.pl \
     --threads 4 \
     --tuner mert \
     --joshua-mem 20g \
-    --packer-mem 8g \
-    --optimizer-runs 5
+    --packer-mem 8g

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index 31635ef..4309820 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -46,29 +46,24 @@ public class Distortion extends StatelessFF {
   public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
       Sentence sentence, Accumulator acc) {
 
-    if (rule == Hypothesis.MONO_RULE || rule == Hypothesis.SWAP_RULE) {
-//        int start_point = j - rule.getFrench().length + rule.getArity();
-//        int jump_size = Math.abs(tailNodes.get(0).j - start_point);
+    if (rule == Hypothesis.INORDER_RULE) {
+      int last_phrase_end = tailNodes.get(0).j;
+      int new_phrase_start = tailNodes.get(1).i;
+      int jump_size = Math.abs(last_phrase_end - new_phrase_start);
 
-      if (rule == Hypothesis.MONO_RULE) {
-        int start_point = j - tailNodes.get(1).getHyperEdges().get(0).getRule().getSource().length;
-        int last_point = tailNodes.get(0).j;
-        int jump_size = Math.abs(start_point - last_point);
-      
-//        System.err.println(String.format("DISTORTION_mono(%d -> %d) = %d", 
-//            last_point, start_point, jump_size));
+      //        System.err.println(String.format("DISTORTION_mono(%d -> %d) = %d", 
+      //            last_phrase_end, new_phrase_start, jump_size));
 
-        acc.add(featureId, -jump_size);
-      } else {
-        int start_point = j - tailNodes.get(0).getHyperEdges().get(0).getRule().getSource().length;
-        int last_point = tailNodes.get(1).j;
-        int jump_size = Math.abs(start_point - last_point);
-      
-//        System.err.println(String.format("DISTORTION_swap(%d -> %d) = %d", 
-//            last_point, start_point, jump_size));
+      acc.add(featureId, -jump_size);
+    } else if (rule == Hypothesis.INVERTED_RULE) {
+      int last_phrase_end = tailNodes.get(1).j;
+      int new_phrase_start = tailNodes.get(0).i;
+      int jump_size = Math.abs(last_phrase_end - new_phrase_start);
 
-        acc.add(featureId, -jump_size);    
-      }
+      //        System.err.println(String.format("DISTORTION_swap(%d -> %d) = %d", 
+      //            last_phrase_end, new_phrase_start, jump_size));
+
+      acc.add(featureId, -jump_size);    
     }
     
     return null;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index bacd294..d98d76f 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -113,20 +113,20 @@ public class PackedGrammar extends AbstractGrammar {
 
   private final File vocabFile; // store path to vocabulary file
 
-  // The version number of the earliest supported grammar packer
-  public static final int SUPPORTED_VERSION = 3;
-
   // A rule cache for commonly used tries to avoid excess object allocations
   // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
   private final Cache<Trie, List<Rule>> cached_rules;
 
   private final String grammarDir;
+  
+  private JoshuaConfiguration config;
 
   public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
       JoshuaConfiguration joshuaConfiguration) throws IOException {
     super(owner, joshuaConfiguration, span_limit);
 
     this.grammarDir = grammar_dir;
+    this.config = joshuaConfiguration;
 
     // Read the vocabulary.
     vocabFile = new File(grammar_dir + File.separator + VOCABULARY_FILENAME);
@@ -976,7 +976,7 @@ public class PackedGrammar extends AbstractGrammar {
    * @throws IOException
    */
   private void readConfig(String config) throws IOException {
-    int version = 0;
+    int version = 2;
 
     for (String line: new LineReader(config)) {
       String[] tokens = line.split(" = ");
@@ -987,10 +987,19 @@ public class PackedGrammar extends AbstractGrammar {
       }
     }
 
-    if (version != 3) {
-      String message = String.format("The grammar at %s was packed with packer version %d, but the earliest supported version is %d",
-          this.grammarDir, version, SUPPORTED_VERSION);
+    if (! isSupportedVersion(version)) {
+      String message = String.format("The grammar at %s was packed with packer version %d, which is incompatible with the current config",
+          this.grammarDir, version);
       throw new RuntimeException(message);
     }
   }
+  
+  /*
+   * Determines whether the current grammar is a supported version. For hierarchical decoding,
+   * no changes have occurred, so any version past 2 (the default) is supported. For phrase-
+   * based decoding, version 4 is required.
+   */
+  private boolean isSupportedVersion(int version) {
+    return (config.search_algorithm.equals("cky") && version >= 2) || (version >= 4);
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
index 9a59e3c..33409ec 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
@@ -37,6 +37,11 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
   private final Stack<OutputString> outputStringStack = new Stack<>();
   private final boolean extractSource;
 
+  /* This comes from the WalkerFunction interface. It is applied at every HGNode in the
+   * hypergraph.
+   *
+   * @see org.apache.joshua.decoder.hypergraph.WalkerFunction#apply(org.apache.joshua.decoder.hypergraph.HGNode, int)
+   */
   @Override
   public void apply(HGNode node, int nodeIndex) {
     apply(node.bestHyperedge.getRule(), nodeIndex);
@@ -47,11 +52,24 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
    * apply() for Viterbi extraction but using the edge from
    * the Derivation state.
    */
+  
+  /*
+   * (non-Javadoc)
+   * @see org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor#before(org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationState, int, int)
+   */
   @Override
   public void before(final DerivationState state, int level, int tailNodeIndex) {
       apply(state.edge.getRule(), tailNodeIndex);
   }
   
+  /* Nothing to do after the visit.
+   * 
+   * (non-Javadoc)
+   * @see org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor#after(org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationState, int, int)
+   */
+  @Override
+  public void after(DerivationState state, int level, int tailNodeIndex) {}
+
   private void apply(Rule rule, int nodeIndex) {
     if (rule != null) {
       final int[] words = extractSource ? rule.getSource() : rule.getTarget();
@@ -59,10 +77,6 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
     }
   }
   
-  /** Nothing to do */
-  @Override
-  public void after(DerivationState state, int level, int tailNodeIndex) {}
-  
   private static int getSourceNonTerminalPosition(final int[] words, int nonTerminalIndex) {
     int nonTerminalsSeen = 0;
     for (int i = 0; i < words.length; i++) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 12f0217..c5f96e5 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -120,7 +120,7 @@ public class Candidate implements Comparable<Candidate> {
     this.phrases = phrases;
     this.future_delta = delta;
     this.ranks = ranks;
-    this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.SWAP_RULE;
+    this.rule = isMonotonic() ? Hypothesis.INORDER_RULE : Hypothesis.INVERTED_RULE;
 //    this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
 
     this.computedResult = null;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 3c61a68..e8f956c 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -44,10 +44,10 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
   // The hypothesis' coverage vector
   private final Coverage coverage;
 
-  public static Rule BEGIN_RULE = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| <s> ||| <s> ||| ");
-  public static Rule END_RULE   = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| </s> ||| </s> ||| ");
-  public static Rule MONO_RULE  = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
-  public static Rule SWAP_RULE  = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
+  public static Rule BEGIN_RULE = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| <s> ||| <s> |||   ||| 0-0");
+  public static Rule END_RULE   = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| </s> ||| </s> |||   ||| 0-0");
+  public static Rule INORDER_RULE  = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
+  public static Rule INVERTED_RULE  = new HieroFormatReader(UNKNOWN_OWNER_ID).parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
   
   public String toString() {
     StringBuffer sb = new StringBuffer();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index 16f5b27..b867d10 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -26,6 +26,7 @@ import java.util.PriorityQueue;
 import java.util.Set;
 
 import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -143,25 +144,28 @@ public class Stack extends ArrayList<Hypothesis> {
 
     // Constrained decoding
     if (sentence.target() != null) {
-      throw new RuntimeException("* FATAL! Constrained decoding no longer works for the new phrase format");
-      // TODO: fix constrained decoding
-
-      /*
-      String oldWords = cand.getHypothesis().bestHyperedge.getRule().getTargetWords().replace("[X,1] ",  "");
-      String newWords = cand.getRule().getTargetWords().replace("[X,1] ",  "");
+      /* Get the rule. If if it's a swap or monolingual rule, find the right backpointer */
+      Rule rule = cand.getHypothesis().getRule();
+      if (rule == Hypothesis.INORDER_RULE)
+        rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(1).bestHyperedge.getRule();
+      else if (rule == Hypothesis.INVERTED_RULE)
+        rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(0).bestHyperedge.getRule();
+      String oldWords = rule.getTargetWords();
 
+      String newWords = cand.getPhraseRule().getTargetWords();
+          
+      boolean allowed = sentence.fullTarget().contains(oldWords + " " + newWords);
+      
       // If the string is not found in the target sentence, explore the cube neighbors
-      if (!sentence.fullTarget().contains(oldWords + " " + newWords)) {
+      if (! allowed) {
         Candidate next = cand.extendPhrase();
         if (next != null)
           addCandidate(next); 
+        
         return;
       }
-      */
     }
 
-    // TODO: sourcepath
-    
     candidates.add(cand);
   }
   

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index cfeaea2..d02ad1a 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -32,7 +32,7 @@ package org.apache.joshua.decoder.phrase;
  * ensures that the coverage vector is consistent but the resulting hypergraph may not be projective,
  * which is different from the CKY algorithm, which does produce projective derivations. 
  * 
- * TODO Lattice decoding is not yet supported (March 2015).
+ * TODO Lattice decoding is not yet supported.
  */
 
 import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
@@ -40,7 +40,6 @@ import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.joshua.corpus.Span;
 import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
 import org.apache.joshua.decoder.ff.FeatureFunction;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java b/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index 838279b..21fe1d2 100644
--- a/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@ -59,12 +59,16 @@ public class GrammarPacker {
    * the documentation.
    *
    * Version history:
+   * 
+   * - 2. The default version.
    *
    * - 3 (May 2016). This was the first version that was marked. It removed the special phrase-
    * table packing that packed phrases without the [X,1] on the source and target sides, which
-   * then required special handling in the decoder to use for phrase-based decoding.
+   * then required special handling in the decoder to use for phrase-based decoding. So in this
+   * version, [X,1] is required to be presented on the source and target sides, and phrase-based
+   * decoding is implemented as a left-branching grammar.
    *
-   * - 4 (August 2016). Phrase-based decoding rewritten to represent phrases without a builtin
+   * - 4 (August 2016). Phrase-based decoding was rewritten to represent phrases without a builtin
    * nonterminal. Instead, cost-less glue rules are used in phrase-based decoding. This eliminates
    * the need for special handling of phrase grammars (except for having to add a LHS), and lets
    * phrase grammars be used in both hierarchical and phrase-based decoding without conversion.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/config b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/config
deleted file mode 100644
index 2251fe6..0000000
--- a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/config
+++ /dev/null
@@ -1,2 +0,0 @@
-version = 4
-max-source-len = 3

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/encoding
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/encoding b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/encoding
deleted file mode 100644
index 57e7b75..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/encoding and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
deleted file mode 100644
index 27fa07d..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
deleted file mode 100644
index cdc98f6..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
deleted file mode 100644
index fa82c0d..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
deleted file mode 100644
index 3e8c294..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary b/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
deleted file mode 100644
index ff62042..0000000
Binary files a/joshua-core/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/include-align-index/log
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/include-align-index/log b/joshua-core/src/test/resources/decoder/phrase/include-align-index/log
deleted file mode 100644
index 05cd80f..0000000
--- a/joshua-core/src/test/resources/decoder/phrase/include-align-index/log
+++ /dev/null
@@ -1,50 +0,0 @@
-Parameters read from configuration file:
-    tm = 'moses -owner pt -maxspan 0 -path rules.1.gz -max-source-len 5'
-    featurefunction = 'StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz'
-    search = 'stack'
-    markoovs = 'false'
-    poplimit = '10'
-    topn = '1'
-    outputformat = '%i ||| %s ||| %f ||| %c'
-    includealignindex = 'true'
-    reorderinglimit = '6'
-    featurefunction = 'OOVPenalty'
-    featurefunction = 'WordPenalty'
-    featurefunction = 'Distortion'
-    featurefunction = 'PhrasePenalty -owner pt'
-Parameters overridden from the command line:
-    threads = '1'
-    c = 'config'
-Read 9 weights (0 of them dense)
-Reading grammar from file rules.1.gz...
-........10........20........30........40........50........60........70........80........90.....100%
-MemoryBasedBatchGrammar: Read 165161 rules with 18 distinct source sides from 'rules.1.gz'
-Couldn't create a GrammarReader for file null with format phrase
-MemoryBasedBatchGrammar: Read 0 rules with 0 distinct source sides from 'null'
-Memory used 219.6 MB
-Grammar loading took: 0 seconds.
-Stateful object with state index 0
-Loading the LM will be faster if you build a binary file.
-Reading lm.1.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
-FEATURE: tm_pt (weight 0.000)
-FEATURE: tm_custom (weight 0.000)
-FEATURE: lm_0, order 5 (weight 0.204)
-FEATURE: OOVPenalty (weight 1.000)
-FEATURE: WordPenalty (weight -0.202)
-FEATURE: Distortion (weight 0.115)
-FEATURE: PhrasePenalty (weight -0.237)
-Grammar sorting happening lazily on-demand.
-Model loading took 0 seconds
-Memory used 219.6 MB
-Input 0: <s> una estrategia republicana para obstaculizar la reelecci�n de Obama </s>
-Input 0: Collecting options took 0.000 seconds
-Input 0: Search took 0.013 seconds
-Input 0: Translation took 1.532 seconds
-Input 0: Memory used is 392.5 MB
-Translation 0: -7.496 a strategy republican to hinder reelection Obama 
-Input 0: 1-best extraction took 0.026 seconds
-Decoding completed.
-Memory used 401.6 MB
-Total running time: 2 seconds

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/include-align-index/output
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/include-align-index/output b/joshua-core/src/test/resources/decoder/phrase/include-align-index/output
deleted file mode 100644
index 509a3de..0000000
--- a/joshua-core/src/test/resources/decoder/phrase/include-align-index/output
+++ /dev/null
@@ -1 +0,0 @@
-0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/decoder/phrase/include-align-index/test.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/include-align-index/test.sh b/joshua-core/src/test/resources/decoder/phrase/include-align-index/test.sh
index 7703aa4..6bef145 100644
--- a/joshua-core/src/test/resources/decoder/phrase/include-align-index/test.sh
+++ b/joshua-core/src/test/resources/decoder/phrase/include-align-index/test.sh
@@ -17,7 +17,7 @@
 #
 set -u
 
-cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
 
 # Compare
 diff -u output output.gold > diff

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/grammar/sparse-features/test-packed.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/grammar/sparse-features/test-packed.sh b/joshua-core/src/test/resources/grammar/sparse-features/test-packed.sh
index 04d7bb0..cf3c460 100755
--- a/joshua-core/src/test/resources/grammar/sparse-features/test-packed.sh
+++ b/joshua-core/src/test/resources/grammar/sparse-features/test-packed.sh
@@ -17,7 +17,7 @@
 #
 set -u
 
-echo el chico | $JOSHUA/bin/decoder -c joshua-packed.config -v 0 > output
+echo el chico | $JOSHUA/bin/joshua -c joshua-packed.config -v 0 > output
 
 # Compare
 diff -u output output.gold > diff

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/grammar/sparse-features/test.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/grammar/sparse-features/test.sh b/joshua-core/src/test/resources/grammar/sparse-features/test.sh
index 93a871c..8560989 100755
--- a/joshua-core/src/test/resources/grammar/sparse-features/test.sh
+++ b/joshua-core/src/test/resources/grammar/sparse-features/test.sh
@@ -17,7 +17,7 @@
 #
 set -u
 
-echo el chico | $JOSHUA/bin/decoder -c joshua.config -v 0 > output 2> log
+echo el chico | $JOSHUA/bin/joshua -c joshua.config -v 0 > output 2> log
 
 # Compare
 diff -u output output.gold > diff

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/server/http/test.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/server/http/test.sh b/joshua-core/src/test/resources/server/http/test.sh
index c0ae90e..01703a1 100755
--- a/joshua-core/src/test/resources/server/http/test.sh
+++ b/joshua-core/src/test/resources/server/http/test.sh
@@ -18,7 +18,7 @@
 
 # This test case starts a server and then throws 10 threads at it to make sure threading is working.
 
-$JOSHUA/bin/decoder -threads 4 -server-port 9010 -server-type http -mark-oovs true > server.log 2>&1 &
+$JOSHUA/bin/joshua -threads 4 -server-port 9010 -server-type http -mark-oovs true > server.log 2>&1 &
 serverpid=$!
 sleep 1
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/joshua-core/src/test/resources/server/tcp-text/test.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/server/tcp-text/test.sh b/joshua-core/src/test/resources/server/tcp-text/test.sh
index 66628ca..8bc4794 100755
--- a/joshua-core/src/test/resources/server/tcp-text/test.sh
+++ b/joshua-core/src/test/resources/server/tcp-text/test.sh
@@ -20,7 +20,7 @@
 
 port=9011
 
-$JOSHUA/bin/decoder -threads 4 -server-port $port -output-format "%i ||| %s" -mark-oovs true -v 1 > server.log 2>&1 &
+$JOSHUA/bin/joshua -threads 4 -server-port $port -output-format "%i ||| %s" -mark-oovs true -v 1 > server.log 2>&1 &
 serverpid=$!
 sleep 2
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index c0e33d3..41a0cbb 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -1154,7 +1154,7 @@ if (! defined $GRAMMAR_FILE) {
     system("mv $thrax_file.tmp $thrax_file");
 
     $cachepipe->cmd("thrax-run",
-                    "hadoop jar $THRAX/bin/thrax.jar -D mapreduce.task.timeout=0 -D mapreduce.map.java.opts='-Xmx$HADOOP_MEM' -D mapreduce.reduce.java.opts='-Xmx$HADOOP_MEM' -D hadoop.tmp.dir=$TMPDIR $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; hadoop fs -getmerge $THRAXDIR/final/ grammar.gz; hadoop fs -rm -r $THRAXDIR",
+                    "hadoop jar $THRAX/bin/thrax.jar -D mapreduce.task.timeout=0 -D mapreduce.map.java.opts='-Xmx$HADOOP_MEM' -D mapreduce.reduce.java.opts='-Xmx$HADOOP_MEM' -D hadoop.tmp.dir=$TMPDIR $thrax_file $THRAXDIR > thrax.log 2>&1; rm -f grammar grammar.gz; hadoop fs -getmerge $THRAXDIR/final/ grammar.gz",
                     "$DATA_DIRS{train}/thrax-input-file",
                     $thrax_file,
                     "grammar.gz");
@@ -1162,6 +1162,11 @@ if (! defined $GRAMMAR_FILE) {
 
     $GRAMMAR_FILE = "grammar.gz";
 
+    # cleanup if successful
+    if (-s $GRAMMAR_FILE) {
+      system("hadoop fs -rm -r $THRAXDIR");
+    }
+
   } else {
 
     print STDERR "* FATAL: There was no way to build a grammar, and none was passed in\n";

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/scripts/training/run_thrax.py
----------------------------------------------------------------------
diff --git a/scripts/training/run_thrax.py b/scripts/training/run_thrax.py
index e38f6b2..4457245 100755
--- a/scripts/training/run_thrax.py
+++ b/scripts/training/run_thrax.py
@@ -35,10 +35,11 @@ EXAMPLE = r"""
 Example invocation:
 
 $JOSHUA/scripts/support/run_thrax.py \
+  /path/to/thrax.config \
   /path/to/corpus.SOURCE \
   /path/to/corpus.TARGET \
   /path/to/alignment \
-  -c /path/to/thrax.config \
+  /path/to/thrax.config \
   -o grammar.gz
 """
 parser = argparse.ArgumentParser(description='Run thrax')
@@ -90,13 +91,13 @@ paste(args.source_corpus, args.target_corpus, args.alignment_file, thrax_file)
 run('%s/bin/hadoop fs -put %s %s/input-file' % (HADOOP, thrax_file, THRAXDIR))
 
 # Copy the template
-conf_file_name = 'thrax.conf'
-conf_file = open(conf_file_name, 'w')
+conf_file = tempfile.NamedTemporaryFile(prefix='thrax.conf')
 for line in open(args.thrax_config):
     if not line.startswith('input-file'):
         conf_file.write(line)
 conf_file.write('input-file %s/input-file\n' % (THRAXDIR))
 conf_file.close()
+conf_file_name = conf_file.name
 
 # Run Hadoop
 run('%s/bin/hadoop jar %s -D mapred.child.java.opts="-Xmx%s" -D hadoop.tmp.dir=%s %s %s > thrax.log 2>&1' % (HADOOP, THRAX_JAR, '4g', args.tmp_dir, conf_file_name, THRAXDIR))

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bd09600b/scripts/training/templates/thrax-phrase.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrase.conf b/scripts/training/templates/thrax-phrase.conf
index 630c76f..1585383 100644
--- a/scripts/training/templates/thrax-phrase.conf
+++ b/scripts/training/templates/thrax-phrase.conf
@@ -17,15 +17,11 @@ min-rule-count 1
 # the number of reducers
 reducers 16
 
-# not only do these next six options have the suggested values as given
-# in Chiang's "Hierarchical Phrase-based Translation" (CL), they are also
-# Thrax's default values! You could comment them out and the resulting grammar
-# would be identical.
-
-# maximum length of initial phrase pairs
-initial-phrase-length   10
-lex-source-words        10
-lex-target-words        10
+# Maximum length of initial phrase pairs. These are set to be shorter than
+# used by Hiero.
+initial-phrase-length   5
+lex-source-words        5
+lex-target-words        5
 
 # maximum number of NTs in a rule
 arity                   0