You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/31 15:14:35 UTC

[4/9] incubator-joshua git commit: StructuredTranslation objects can now be generated from KBest Derivations. This gives way to expose k-best lists if Joshua is used as a library. Also fixed some code issues and tests.

StructuredTranslation objects can now be generated from KBest Derivations. This gives way to expose k-best lists if Joshua is used as a library.
Also fixed some code issues and tests.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e3673e98
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e3673e98
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e3673e98

Branch: refs/heads/master
Commit: e3673e988d5d27f93e69cf270dd4056547a752b9
Parents: 4d73c17
Author: Felix Hieber <fh...@amazon.com>
Authored: Tue Mar 15 11:26:29 2016 +0100
Committer: Felix Hieber <fh...@amazon.com>
Committed: Mon May 30 09:09:10 2016 +0200

----------------------------------------------------------------------
 src/joshua/decoder/StructuredTranslation.java   |  67 ++++------
 .../decoder/StructuredTranslationFactory.java   | 101 +++++++++++++++
 src/joshua/decoder/Translation.java             | 123 ++++++++++++-------
 .../decoder/hypergraph/KBestExtractor.java      |  85 +++++++++----
 .../decoder/hypergraph/WordAlignmentState.java  | 103 +++++++++-------
 src/joshua/decoder/io/JSONMessage.java          |   2 +-
 tst/joshua/corpus/VocabularyTest.java           |  26 ++--
 .../kbest_extraction/KBestExtractionTest.java   |  10 +-
 .../ConstrainedPhraseDecodingTest.java          |  10 +-
 .../phrase/decode/PhraseDecodingTest.java       |  10 +-
 tst/joshua/system/KenLmTest.java                |   6 +-
 .../system/MultithreadedTranslationTests.java   |  10 +-
 tst/joshua/system/StructuredOutputTest.java     |  16 +--
 .../system/StructuredTranslationTest.java       | 107 ++++++++++++----
 tst/joshua/util/FormatUtilsTest.java            |   8 +-
 15 files changed, 446 insertions(+), 238 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/StructuredTranslation.java b/src/joshua/decoder/StructuredTranslation.java
index 7b2185f..2a7af73 100644
--- a/src/joshua/decoder/StructuredTranslation.java
+++ b/src/joshua/decoder/StructuredTranslation.java
@@ -18,27 +18,18 @@
  */
 package joshua.decoder;
 
-import static java.util.Arrays.asList;
-import static java.util.Collections.emptyList;
-import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiFeatures;
-import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString;
-import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiWordAlignmentList;
-import static joshua.util.FormatUtils.removeSentenceMarkers;
-
 import java.util.List;
 import java.util.Map;
 
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.hypergraph.HyperGraph;
 import joshua.decoder.segment_file.Sentence;
 
 /**
- * structuredTranslation provides a more structured access to translation
- * results than the Translation class.
- * Members of instances of this class can be used upstream.
- * <br/>
- * TODO:
- * Enable K-Best extraction.
+ * A StructuredTranslation instance provides a more structured access to
+ * translation results than the string-based Translation class.
+ * This is useful if the decoder is encapsulated in a larger project, instead
+ * of simply writing to a file or stdout.
+ * StructuredTranslation encodes all relevant information about a derivation,
+ * namely output string, tokens, score, features, and word alignment.
  * 
  * @author fhieber
  */
@@ -52,39 +43,23 @@ public class StructuredTranslation {
   private final Map<String,Float> translationFeatures;
   private final float extractionTime;
   
-  public StructuredTranslation(final Sentence sourceSentence,
-      final HyperGraph hypergraph,
-      final List<FeatureFunction> featureFunctions) {
-    
-      final long startTime = System.currentTimeMillis();
-      
-      this.sourceSentence = sourceSentence;
-      this.translationString = removeSentenceMarkers(getViterbiString(hypergraph));
-      this.translationTokens = extractTranslationTokens();
-      this.translationScore = extractTranslationScore(hypergraph);
-      this.translationFeatures = getViterbiFeatures(hypergraph, featureFunctions, sourceSentence).getMap();
-      this.translationWordAlignments = getViterbiWordAlignmentList(hypergraph);
-      this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
-  }
-  
-  private float extractTranslationScore(final HyperGraph hypergraph) {
-    if (hypergraph == null) {
-      return 0;
-    } else {
-      return hypergraph.goalNode.getScore();
-    }
+  public StructuredTranslation(
+      final Sentence sourceSentence,
+      final String translationString,
+      final List<String> translationTokens,
+      final float translationScore,
+      final List<List<Integer>> translationWordAlignments,
+      final Map<String,Float> translationFeatures,
+      final float extractionTime) {
+    this.sourceSentence = sourceSentence;
+    this.translationString = translationString;
+    this.translationTokens = translationTokens;
+    this.translationScore = translationScore;
+    this.translationWordAlignments = translationWordAlignments;
+    this.translationFeatures = translationFeatures;
+    this.extractionTime = extractionTime;
   }
   
-  private List<String> extractTranslationTokens() {
-    if (translationString.isEmpty()) {
-      return emptyList();
-    } else {
-      return asList(translationString.split("\\s+"));
-    }
-  }
-  
-  // Getters to use upstream
-  
   public Sentence getSourceSentence() {
     return sourceSentence;
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/StructuredTranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/StructuredTranslationFactory.java b/src/joshua/decoder/StructuredTranslationFactory.java
new file mode 100644
index 0000000..c6bfb50
--- /dev/null
+++ b/src/joshua/decoder/StructuredTranslationFactory.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.decoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiFeatures;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString;
+import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiWordAlignmentList;
+import static joshua.util.FormatUtils.removeSentenceMarkers;
+
+import java.util.List;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.hypergraph.HyperGraph;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * This factory provides methods to create StructuredTranslation objects
+ * from either Viterbi derivations or KBest derivations.
+ * 
+ * @author fhieber
+ */
+public class StructuredTranslationFactory {
+  
+  /**
+   * Returns a StructuredTranslation instance from the Viterbi derivation.
+   * @return A StructuredTranslation object representing the Viterbi derivation.
+   */
+  public static StructuredTranslation fromViterbiDerivation(
+      final Sentence sourceSentence,
+      final HyperGraph hypergraph,
+      final List<FeatureFunction> featureFunctions) {
+    final long startTime = System.currentTimeMillis();
+    final String translationString = removeSentenceMarkers(getViterbiString(hypergraph));
+    return new StructuredTranslation(
+        sourceSentence,
+        translationString,
+        extractTranslationTokens(translationString),
+        extractTranslationScore(hypergraph),
+        getViterbiWordAlignmentList(hypergraph),
+        getViterbiFeatures(hypergraph, featureFunctions, sourceSentence).getMap(),
+        (System.currentTimeMillis() - startTime) / 1000.0f);
+  }
+  
+  /**
+   * Returns a StructuredTranslation instance from a KBest DerivationState. 
+   * @param sourceSentence Sentence object representing the source.
+   * @param derivationState the KBest DerivationState.
+   * @return A StructuredTranslation object representing the derivation encoded by derivationState.
+   */
+  public static StructuredTranslation fromKBestDerivation(
+      final Sentence sourceSentence,
+      final DerivationState derivationState) {
+    final long startTime = System.currentTimeMillis();
+    final String translationString = removeSentenceMarkers(derivationState.getHypothesis());
+    return new StructuredTranslation(
+        sourceSentence,
+        translationString,
+        extractTranslationTokens(translationString),
+        derivationState.getModelCost(),
+        derivationState.getWordAlignmentList(),
+        derivationState.getFeatures().getMap(),
+        (System.currentTimeMillis() - startTime) / 1000.0f);
+  }
+  
+  private static float extractTranslationScore(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return 0;
+    } else {
+      return hypergraph.goalNode.getScore();
+    }
+  }
+  
+  private static List<String> extractTranslationTokens(final String translationString) {
+    if (translationString.isEmpty()) {
+      return emptyList();
+    } else {
+      return asList(translationString.split("\\s+"));
+    }
+  }
+  
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Translation.java b/src/joshua/decoder/Translation.java
index 8004d9f..03ea62f 100644
--- a/src/joshua/decoder/Translation.java
+++ b/src/joshua/decoder/Translation.java
@@ -18,6 +18,8 @@
  */
 package joshua.decoder;
 
+import static java.util.Arrays.asList;
+import static joshua.decoder.StructuredTranslationFactory.fromViterbiDerivation;
 import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiFeatures;
 import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiString;
 import static joshua.decoder.hypergraph.ViterbiExtractor.getViterbiWordAlignments;
@@ -42,6 +44,7 @@ import joshua.decoder.segment_file.Sentence;
  * DecoderThread instances to the InputHandler, where they are assembled in order for output.
  * 
  * @author Matt Post <po...@cs.jhu.edu>
+ * @author Felix Hieber <fh...@amazon.com>
  */
 
 public class Translation {
@@ -53,17 +56,44 @@ public class Translation {
    */
   private String output = null;
 
-  private StructuredTranslation structuredTranslation = null;
+  /**
+   * Stores the list of StructuredTranslations.
+   * If joshuaConfig.topN == 0, will only contain the Viterbi translation.
+   * Else it will use KBestExtractor to populate this list.
+   */
+  private List<StructuredTranslation> structuredTranslations = null;
   
   public Translation(Sentence source, HyperGraph hypergraph, 
       List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
     this.source = source;
     
+    /**
+     * Structured output from Joshua provides a way to programmatically access translation results
+     * from downstream applications, instead of writing results as strings to an output buffer.
+     */
     if (joshuaConfiguration.use_structured_output) {
       
-      structuredTranslation = new StructuredTranslation(
-          source, hypergraph, featureFunctions);
-      this.output = structuredTranslation.getTranslationString();
+      if (joshuaConfiguration.topN == 0) {
+        /*
+         * Obtain Viterbi StructuredTranslation
+         */
+        StructuredTranslation translation = fromViterbiDerivation(source, hypergraph, featureFunctions);
+        this.output = translation.getTranslationString();
+        structuredTranslations = asList(translation);
+        
+      } else {
+        /*
+         * Get K-Best list of StructuredTranslations
+         */
+        final KBestExtractor kBestExtractor = new KBestExtractor(source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
+        structuredTranslations = kBestExtractor.KbestExtractOnHG(hypergraph, joshuaConfiguration.topN);
+        if (structuredTranslations.isEmpty()) {
+            this.output = "";
+        } else {
+            this.output = structuredTranslations.get(0).getTranslationString();
+        }
+        // TODO: We omit the BLEU rescoring for now since it is not clear whether it works at all and what the desired output is below.
+      }
       
     } else {
 
@@ -71,7 +101,9 @@ public class Translation {
       BufferedWriter out = new BufferedWriter(sw);
 
       try {
+        
         if (hypergraph != null) {
+          
           if (!joshuaConfiguration.hypergraphFilePattern.equals("")) {
             hypergraph.dump(String.format(joshuaConfiguration.hypergraphFilePattern, source.id()), featureFunctions);
           }
@@ -132,44 +164,26 @@ public class Translation {
           Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
               joshuaConfiguration.topN, seconds));
 
-      } else {
-        
-        // Failed translations and blank lines get empty formatted outputs
-        // @formatter:off
-        String outputString = joshuaConfiguration.outputFormat
-            .replace("%s", source.source())
-            .replace("%e", "")
-            .replace("%S", "")
-            .replace("%t", "()")
-            .replace("%i", Integer.toString(source.id()))
-            .replace("%f", "")
-            .replace("%c", "0.000");
-        // @formatter:on
-
-        out.write(outputString);
-        out.newLine();
-      }
+        } else {
+          
+          // Failed translations and blank lines get empty formatted outputs
+          out.write(getFailedTranslationOutput(source, joshuaConfiguration));
+          out.newLine();
+          
+        }
 
         out.flush();
+        
       } catch (IOException e) {
-        e.printStackTrace();
-        System.exit(1);
+        throw new RuntimeException(e);
       }
       
       this.output = sw.toString();
       
     }
-
-    /*
-     * KenLM hack. If using KenLMFF, we need to tell KenLM to delete the pool used to create chart
-     * objects for this sentence.
-     */
-    for (FeatureFunction feature : featureFunctions) {
-      if (feature instanceof StateMinimizingLanguageModel) {
-        ((StateMinimizingLanguageModel) feature).destroyPool(getSourceSentence().id());
-        break;
-      }
-    }
+    
+    // remove state from StateMinimizingLanguageModel instances in features.
+    destroyKenLMStates(featureFunctions);
     
   }
 
@@ -186,17 +200,42 @@ public class Translation {
     return output;
   }
   
+  private String getFailedTranslationOutput(final Sentence source, final JoshuaConfiguration joshuaConfiguration) {
+    return joshuaConfiguration.outputFormat
+        .replace("%s", source.source())
+        .replace("%e", "")
+        .replace("%S", "")
+        .replace("%t", "()")
+        .replace("%i", Integer.toString(source.id()))
+        .replace("%f", "")
+        .replace("%c", "0.000");
+  }
+  
   /**
-   * Returns the StructuredTranslation object
-   * if JoshuaConfiguration.construct_structured_output == True.
-   * @throws RuntimeException if StructuredTranslation object not set.
-   * @return
+   * Returns the StructuredTranslations
+   * if JoshuaConfiguration.use_structured_output == True.
+   * @throws RuntimeException if JoshuaConfiguration.use_structured_output == False.
+   * @return List of StructuredTranslations.
    */
-  public StructuredTranslation getStructuredTranslation() {
-    if (structuredTranslation == null) {
-      throw new RuntimeException("No StructuredTranslation object created. You should set JoshuaConfigration.construct_structured_output = true");
+  public List<StructuredTranslation> getStructuredTranslations() {
+    if (structuredTranslations == null) {
+      throw new RuntimeException(
+          "No StructuredTranslation objects created. You should set JoshuaConfigration.use_structured_output = true");
+    }
+    return structuredTranslations;
+  }
+  
+  /**
+   * KenLM hack. If using KenLMFF, we need to tell KenLM to delete the pool used to create chart
+   * objects for this sentence.
+   */
+  private void destroyKenLMStates(final List<FeatureFunction> featureFunctions) {
+    for (FeatureFunction feature : featureFunctions) {
+      if (feature instanceof StateMinimizingLanguageModel) {
+        ((StateMinimizingLanguageModel) feature).destroyPool(getSourceSentence().id());
+        break;
+      }
     }
-    return structuredTranslation;
   }
   
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index 6dd3207..d6e7c60 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -1,4 +1,4 @@
-/*
+            /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,15 +18,16 @@
  */
 package joshua.decoder.hypergraph;
 
-import static joshua.util.FormatUtils.unescapeSpecialSymbols;
+import static java.util.Collections.emptyList;
 import static joshua.util.FormatUtils.removeSentenceMarkers;
+import static joshua.util.FormatUtils.unescapeSpecialSymbols;
 
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -35,6 +36,8 @@ import java.util.PriorityQueue;
 import joshua.corpus.Vocabulary;
 import joshua.decoder.BLEU;
 import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.StructuredTranslation;
+import joshua.decoder.StructuredTranslationFactory;
 import joshua.decoder.ff.FeatureFunction;
 import joshua.decoder.ff.FeatureVector;
 import joshua.decoder.ff.fragmentlm.Tree;
@@ -153,11 +156,49 @@ public class KBestExtractor {
    * @return the derivation object
    */
   public DerivationState getKthDerivation(HGNode node, int k) {
-    VirtualNode virtualNode = getVirtualNode(node);
+    final VirtualNode virtualNode = getVirtualNode(node);
     return virtualNode.lazyKBestExtractOnNode(this, k);
   }
   
   /**
+   * Returns the k-th Structured Translation.
+   */
+  public StructuredTranslation getKthStructuredTranslation(HGNode node, int k) {
+    StructuredTranslation result = null;
+    final DerivationState derivationState = getKthDerivation(node, k);
+    if (derivationState != null) {
+      result = StructuredTranslationFactory.fromKBestDerivation(sentence, derivationState);
+    }
+    return result;
+  }
+  
+  /**
+   * This is an entry point for extracting k-best hypotheses as StructuredTranslation objects.
+   * It computes all of them and returning a list of StructuredTranslation objects.
+   * These objects hold all translation information (string, tokens, features, alignments, score).
+   * 
+   * @param hg the hypergraph to extract from
+   * @param topN how many to extract
+   * @param out object to write to
+   * @return list of StructuredTranslation objects, empty if there is no HyperGraph goal node.
+   */
+  public List<StructuredTranslation> KbestExtractOnHG(HyperGraph hg, int topN) {
+    resetState();
+    if (hg.goalNode == null) {
+      return emptyList();
+    }
+    final List<StructuredTranslation> kbest = new ArrayList<>(topN);
+    for (int k = 1; k <= topN; k++) {
+      StructuredTranslation translation = getKthStructuredTranslation(hg.goalNode, k);
+      if (translation == null) {
+        break;
+      }
+      kbest.add(translation);
+    }
+    return kbest;
+  }
+  
+  /**
    * Compute the string that is output from the decoder, using the "output-format" config file
    * parameter as a template.
    * 
@@ -166,11 +207,7 @@ public class KBestExtractor {
   public String getKthHyp(HGNode node, int k) {
 
     String outputString = null;
-    
-    // Determine the k-best hypotheses at each HGNode
-    VirtualNode virtualNode = getVirtualNode(node);
-    DerivationState derivationState = virtualNode.lazyKBestExtractOnNode(this, k);
-//    DerivationState derivationState = getKthDerivation(node, k);
+    DerivationState derivationState = getKthDerivation(node, k);
     if (derivationState != null) {
       // ==== read the kbest from each hgnode and convert to output format
       String hypothesis = maybeProjectCase(
@@ -213,7 +250,7 @@ public class KBestExtractor {
       
       /* %a causes output of word level alignments between input and output hypothesis */
       if (outputFormat.contains("%a")) {
-        outputString = outputString.replace("%a",  derivationState.getWordAlignmentString());
+        outputString = outputString.replace("%a",  derivationState.getWordAlignment());
       }
       
     }
@@ -236,7 +273,7 @@ public class KBestExtractor {
 
     if (joshuaConfiguration.project_case) {
       String[] tokens = hypothesis.split("\\s+");
-      List<List<Integer>> points = state.getWordAlignment();
+      List<List<Integer>> points = state.getWordAlignmentList();
       for (int i = 0; i < points.size(); i++) {
         List<Integer> target = points.get(i);
         for (int source: target) {
@@ -763,42 +800,36 @@ public class KBestExtractor {
 
       return visitor;
     }
-
-    private String getWordAlignmentString() {
+    
+    public String getWordAlignment() {
       return visit(new WordAlignmentExtractor()).toString();
     }
     
-    private List<List<Integer>> getWordAlignment() {
-      WordAlignmentExtractor extractor = new WordAlignmentExtractor();
-      visit(extractor);
-      return extractor.getFinalWordAlignments();
+    public List<List<Integer>> getWordAlignmentList() {
+      final WordAlignmentExtractor visitor = new WordAlignmentExtractor();
+      visit(visitor);
+      return visitor.getFinalWordAlignments();
     }
 
-    private String getTree() {
+    public String getTree() {
       return visit(new TreeExtractor()).toString();
     }
     
-    private String getHypothesis() {
+    public String getHypothesis() {
       return getHypothesis(defaultSide);
     }
 
-    /**
-     * For stack decoding we keep using the old string-based
-     * HypothesisExtractor.
-     * For Hiero, we use a faster, int-based hypothesis extraction
-     * that is correct also for Side.SOURCE cases.
-     */
     private String getHypothesis(final Side side) {
       return visit(new OutputStringExtractor(side.equals(Side.SOURCE))).toString();
     }
 
-    private FeatureVector getFeatures() {
+    public FeatureVector getFeatures() {
       final FeatureVectorExtractor extractor = new FeatureVectorExtractor(featureFunctions, sentence);
       visit(extractor);
       return extractor.getFeatures();
     }
 
-    private String getDerivation() {
+    public String getDerivation() {
       return visit(new DerivationExtractor()).toString();
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/hypergraph/WordAlignmentState.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WordAlignmentState.java b/src/joshua/decoder/hypergraph/WordAlignmentState.java
index 258e062..3430c5d 100644
--- a/src/joshua/decoder/hypergraph/WordAlignmentState.java
+++ b/src/joshua/decoder/hypergraph/WordAlignmentState.java
@@ -18,6 +18,8 @@
  */
 package joshua.decoder.hypergraph;
 
+import static java.lang.Integer.MAX_VALUE;
+
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
@@ -40,8 +42,8 @@ public class WordAlignmentState {
    * rule. The values of the elements correspond to the aligned source token on
    * the source side of the rule.
    */
-  private LinkedList<AlignedSourceTokens> trgPoints;
-  private int srcStart;
+  private List<AlignedSourceTokens> trgPoints;
+  private final int srcStart;
   /** number of NTs we need to substitute. */
   private int numNT;
   /** grows with substitutions of child rules. Reaches original Rule span if substitutions are complete */
@@ -51,17 +53,17 @@ public class WordAlignmentState {
    * construct AlignmentState object from a virgin Rule and its source span.
    * Determines if state is complete (if no NT present)
    */
-  WordAlignmentState(Rule rule, int start) {
+  public WordAlignmentState(final Rule rule, final int start) {
     trgPoints = new LinkedList<AlignedSourceTokens>();
     srcLength = rule.getFrench().length;
     numNT = rule.getArity();
     srcStart = start;
-    Map<Integer, List<Integer>> alignmentMap = rule.getAlignmentMap();
-    int[] nonTermPositions = rule.getNonTerminalSourcePositions();
-    int[] trg = rule.getEnglish();
+    final Map<Integer, List<Integer>> alignmentMap = rule.getAlignmentMap();
+    final int[] nonTerminalSourcePositions = rule.getNonTerminalSourcePositions();
+    final int[] trg = rule.getEnglish();
     // for each target index, create a TargetAlignmentPoint
     for (int trgIndex = 0; trgIndex < trg.length; trgIndex++) {
-      AlignedSourceTokens trgPoint = new AlignedSourceTokens();
+      final AlignedSourceTokens trgPoint = new AlignedSourceTokens();
 
       if (trg[trgIndex] >= 0) { // this is a terminal symbol, check for alignment
         if (alignmentMap.containsKey(trgIndex)) {
@@ -72,9 +74,10 @@ public class WordAlignmentState {
         } else { // this target word is NULL-aligned
           trgPoint.setNull();
         }
-      } else { // this is a nonterminal ([X]) [actually its the (negative) index of the NT in the source
-        trgPoint.setNonTerminal();
-        trgPoint.add(srcStart + nonTermPositions[Math.abs(trg[trgIndex]) - 1]);
+      } else { // this is a nonterminal ([X]) [actually its the (negative) index of the NT in the source]
+        trgPoint.setNonTerminal(); // mark as non-terminal
+        final int absoluteNonTerminalSourcePosition = srcStart + nonTerminalSourcePositions[Math.abs(trg[trgIndex]) - 1];
+        trgPoint.add(absoluteNonTerminalSourcePosition);
       }
       trgPoints.add(trgPoint);
     }
@@ -93,17 +96,18 @@ public class WordAlignmentState {
    * trg. Sorted by trg indexes. Disregards the sentence markers.
    */
   public String toFinalString() {
-    StringBuilder sb = new StringBuilder();
+    final StringBuilder sb = new StringBuilder();
     int t = 0;
     for (AlignedSourceTokens pt : trgPoints) {
-      for (int s : pt)
-        sb.append(String.format(" %d-%d", s-1, t-1)); // disregard sentence
-                                                      // markers
+      for (int s : pt) {
+        sb.append(String.format(" %d-%d", s-1, t-1)); // disregard sentence markers
+      }
       t++;
     }
-    String result = sb.toString();
-    if (!result.isEmpty())
+    final String result = sb.toString();
+    if (!result.isEmpty()) {
       return result.substring(1);
+    }
     return result;
   }
   
@@ -113,18 +117,19 @@ public class WordAlignmentState {
    * First and last item in trgPoints is skipped.
    */
   public List<List<Integer>> toFinalList() {
-    assert (isComplete() == true);
-    List<List<Integer>> alignment = new ArrayList<List<Integer>> ();
-    if (trgPoints.isEmpty())
+    final List<List<Integer>> alignment = new ArrayList<List<Integer>>(trgPoints.size());
+    if (trgPoints.isEmpty()) {
       return alignment;
-    ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
+    }
+    final ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
     it.next(); // skip first item (sentence marker)
     while (it.hasNext()) {
-      AlignedSourceTokens alignedSourceTokens = it.next();
+      final AlignedSourceTokens alignedSourceTokens = it.next();
       if (it.hasNext()) { // if not last element in trgPoints
-        List<Integer> newAlignedSourceTokens = new ArrayList<Integer>();
-        for (Integer sourceIndex : alignedSourceTokens)
+        final List<Integer> newAlignedSourceTokens = new ArrayList<Integer>();
+        for (Integer sourceIndex : alignedSourceTokens) {
           newAlignedSourceTokens.add(sourceIndex - 1); // shift by one to disregard sentence marker
+        }
         alignment.add(newAlignedSourceTokens);
       }
     }
@@ -134,38 +139,46 @@ public class WordAlignmentState {
   /**
    * String representation for debugging.
    */
+  @Override
   public String toString() {
     return String.format("%s , len=%d start=%d, isComplete=%s",
         trgPoints.toString(), srcLength, srcStart, this.isComplete());
   }
 
   /**
-   * substitutes a child WorldAlignmentState into this instance at the first
-   * NT it finds. Also shifts the indeces in this instance by the span/width of the
+   * Substitutes a child WorldAlignmentState into this instance at the next
+   * nonterminal slot. Also shifts the indeces in this instance by the span/width of the
    * child that is to be substituted.
    * Substitution order is determined by the source-first traversal through the hypergraph.
    */
-  void substituteIn(WordAlignmentState child) {
-    // update existing indexes by length of child (has no effect on NULL and
-    // NonTerminal points)
-    for (AlignedSourceTokens trgPoint : trgPoints)
+  public void substituteIn(WordAlignmentState child) {
+    // find the index of the NonTerminal where we substitute the child targetPoints into.
+    // The correct NT is the first one on the SOURCE side.
+    // Also shift all trgPoints by the child length.
+    int substitutionIndex = 0;
+    int sourcePosition = MAX_VALUE;
+    for (final ListIterator<AlignedSourceTokens> trgPointsIterator = trgPoints.listIterator(); trgPointsIterator.hasNext();) {
+      final AlignedSourceTokens trgPoint = trgPointsIterator.next();
       trgPoint.shiftBy(child.srcStart, child.srcLength - 1);
-
-    // now substitute in the child at first NT, modifying the list
-    ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
-    while (it.hasNext()) {
-      AlignedSourceTokens trgPoint = it.next();
-      if (trgPoint.isNonTerminal()) { // found first NT
-        it.remove(); // remove NT symbol
-        for (AlignedSourceTokens childElement : child.trgPoints) {
-          childElement.setFinal(); // child source indexes are final, do not change them anymore
-          it.add(childElement);
-        }
-        this.srcLength += child.srcLength - 1; // -1 (NT)
-        this.numNT--;
-        break;
+      if (trgPoint.isNonTerminal() && trgPoint.get(0) < sourcePosition) {
+        sourcePosition = trgPoint.get(0);
+        substitutionIndex = trgPointsIterator.previousIndex();
       }
     }
+    
+    // point and remove NT element determined from above
+    final ListIterator<AlignedSourceTokens> insertionIterator = trgPoints.listIterator(substitutionIndex);
+    insertionIterator.next();
+    insertionIterator.remove();
+    
+    // insert child target points and set them to final.
+    for (AlignedSourceTokens childElement : child.trgPoints) {
+      childElement.setFinal();
+      insertionIterator.add(childElement);
+    }
+    
+    // update length and number of non terminal slots
+    this.srcLength += child.srcLength - 1; // -1 (NT)
+    this.numNT--;
   }
-
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/src/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/JSONMessage.java b/src/joshua/decoder/io/JSONMessage.java
index 2733db4..e373716 100644
--- a/src/joshua/decoder/io/JSONMessage.java
+++ b/src/joshua/decoder/io/JSONMessage.java
@@ -90,7 +90,7 @@ public class JSONMessage {
     JSONMessage message = new JSONMessage();
     String[] results = translation.toString().split("\\n");
     if (results.length > 0) {
-      JSONMessage.TranslationItem item = message.addTranslation(translation.getStructuredTranslation().getTranslationString());
+      JSONMessage.TranslationItem item = message.addTranslation(translation.getStructuredTranslations().get(0).getTranslationString());
 
       for (String result: results) {
         String[] tokens = result.split(" \\|\\|\\| ");

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/corpus/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/corpus/VocabularyTest.java b/tst/joshua/corpus/VocabularyTest.java
index 724d9c7..107f76f 100644
--- a/tst/joshua/corpus/VocabularyTest.java
+++ b/tst/joshua/corpus/VocabularyTest.java
@@ -1,11 +1,13 @@
 // Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 package joshua.corpus;
 
-import static org.junit.Assert.*;
+import static joshua.util.FormatUtils.isNonterminal;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Arrays;
 
 import org.junit.After;
 import org.junit.Before;
@@ -53,21 +55,21 @@ public class VocabularyTest {
   @Test
   public void givenVocabulary_whenCheckingStringInBracketsOrNegativeNumber_thenIsNonTerminal() {
     //non-terminals
-    assertTrue(Vocabulary.nt(NON_TERMINAL));
+    assertTrue(isNonterminal(NON_TERMINAL));
     //terminals
-    assertFalse(Vocabulary.nt(WORD1));
-    assertFalse(Vocabulary.nt("[]"));
-    assertFalse(Vocabulary.nt("["));
-    assertFalse(Vocabulary.nt("]"));
-    assertFalse(Vocabulary.nt(""));
+    assertFalse(isNonterminal(WORD1));
+    assertFalse(isNonterminal("[]"));
+    assertFalse(isNonterminal("["));
+    assertFalse(isNonterminal("]"));
+    assertFalse(isNonterminal(""));
     
     //negative numbers indicate non-terminals
-    assertTrue(Vocabulary.nt(-1));
-    assertTrue(Vocabulary.nt(-5));
+    assertTrue(isNonterminal(-1));
+    assertTrue(isNonterminal(-5));
     
     //positive numbers indicate terminals:
-    assertFalse(Vocabulary.nt(0));
-    assertFalse(Vocabulary.nt(5));
+    assertFalse(isNonterminal(0));
+    assertFalse(isNonterminal(5));
 
     
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/tst/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index 26c503a..36b0bd3 100644
--- a/tst/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/tst/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -18,11 +18,14 @@
  */
  package joshua.decoder.kbest_extraction;
 
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
-import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.Translation;
@@ -32,11 +35,6 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-import static com.google.common.base.Charsets.UTF_8;
-import static java.nio.file.Files.readAllBytes;
-import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
-import static org.junit.Assert.assertEquals;
-
 /**
  * Reimplements the kbest extraction regression test
  * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/tst/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
index 6abfbe2..14a87be 100644
--- a/tst/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
+++ b/tst/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -18,11 +18,14 @@
  */
  package joshua.decoder.phrase.constrained;
 
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
-import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.Translation;
@@ -32,11 +35,6 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-import static com.google.common.base.Charsets.UTF_8;
-import static java.nio.file.Files.readAllBytes;
-import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
-import static org.junit.Assert.assertEquals;
-
 /**
  * Reimplements the constrained phrase decoding test
  */

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/tst/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 4785aff..621d80b 100644
--- a/tst/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/tst/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -18,11 +18,14 @@
  */
  package joshua.decoder.phrase.decode;
 
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
-import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.Translation;
@@ -32,11 +35,6 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
-import static com.google.common.base.Charsets.UTF_8;
-import static java.nio.file.Files.readAllBytes;
-import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
-import static org.junit.Assert.assertEquals;
-
 /**
  * Reimplements the constrained phrase decoding test
  */

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/KenLmTest.java b/tst/joshua/system/KenLmTest.java
index dba74fc..5529fa7 100644
--- a/tst/joshua/system/KenLmTest.java
+++ b/tst/joshua/system/KenLmTest.java
@@ -20,10 +20,10 @@
 
 import static joshua.corpus.Vocabulary.registerLanguageModel;
 import static joshua.corpus.Vocabulary.unregisterLanguageModels;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.ff.lm.KenLM;
 
 import org.junit.After;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/MultithreadedTranslationTests.java b/tst/joshua/system/MultithreadedTranslationTests.java
index b257aa6..f438ccd 100644
--- a/tst/joshua/system/MultithreadedTranslationTests.java
+++ b/tst/joshua/system/MultithreadedTranslationTests.java
@@ -20,7 +20,9 @@
 
 import static org.junit.Assert.assertTrue;
 
+import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 
@@ -29,7 +31,7 @@ import joshua.decoder.Decoder;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.Translation;
 import joshua.decoder.Translations;
-import joshua.decoder.io.TranslationRequest;
+import joshua.decoder.io.TranslationRequestStream;
 
 import org.junit.After;
 import org.junit.Before;
@@ -108,7 +110,7 @@ public class MultithreadedTranslationTests {
     // GIVEN
 
     int inputLines = 10000;
-    joshuaConfig.construct_structured_output = true; // Enabled alignments.
+    joshuaConfig.use_structured_output = true; // Enabled alignments.
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < inputLines; i++) {
       sb.append(INPUT + "\n");
@@ -116,8 +118,8 @@ public class MultithreadedTranslationTests {
 
     // Append a large string together to simulate N requests to the decoding
     // engine.
-    TranslationRequest req = new TranslationRequest(new ByteArrayInputStream(sb.toString()
-        .getBytes(Charset.forName("UTF-8"))), joshuaConfig);
+    TranslationRequestStream req = new TranslationRequestStream(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(sb.toString()
+        .getBytes(Charset.forName("UTF-8"))))), joshuaConfig);
 
     // WHEN
     // Translate all spans in parallel.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/StructuredOutputTest.java b/tst/joshua/system/StructuredOutputTest.java
index 12e6e88..a1cdd82 100644
--- a/tst/joshua/system/StructuredOutputTest.java
+++ b/tst/joshua/system/StructuredOutputTest.java
@@ -63,8 +63,8 @@ public class StructuredOutputTest {
     joshuaConfig.use_unique_nbest = false;
     joshuaConfig.include_align_index = false;
     joshuaConfig.topN = 0;
-    joshuaConfig.tms.add("thrax pt 20 resources/wa_grammar");
-    joshuaConfig.tms.add("thrax glue -1 resources/grammar.glue");
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
     joshuaConfig.goal_symbol = "[GOAL]";
     joshuaConfig.default_non_terminal = "[X]";
     joshuaConfig.features.add("feature_function = OOVPenalty");
@@ -107,14 +107,14 @@ public class StructuredOutputTest {
     joshuaConfig.use_structured_output = true; // set structured output creation to true
     translation = decode(input);
     Assert
-        .assertEquals(expectedTranslation, translation.getTranslationString());
+        .assertEquals(expectedTranslation, translation.getStructuredTranslations().get(0).getTranslationString());
     Assert.assertEquals(Arrays.asList(expectedTranslation.split("\\s+")),
-        translation.getTranslationTokens());
-    Assert.assertEquals(expectedScore, translation.getTranslationScore(),
+        translation.getStructuredTranslations().get(0).getTranslationTokens());
+    Assert.assertEquals(expectedScore, translation.getStructuredTranslations().get(0).getTranslationScore(),
         0.00001);
-    Assert.assertEquals(expectedWordAlignment, translation.getWordAlignment());
-    Assert.assertEquals(translation.getWordAlignment().size(), translation
-        .getTranslationTokens().size());
+    Assert.assertEquals(expectedWordAlignment, translation.getStructuredTranslations().get(0).getTranslationWordAlignments());
+    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
+        .getStructuredTranslations().get(0).getTranslationTokens().size());
 
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/StructuredTranslationTest.java b/tst/joshua/system/StructuredTranslationTest.java
index 7460614..0608a65 100644
--- a/tst/joshua/system/StructuredTranslationTest.java
+++ b/tst/joshua/system/StructuredTranslationTest.java
@@ -19,7 +19,6 @@
  package joshua.system;
 
 import static java.util.Arrays.asList;
-import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -27,12 +26,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.StructuredTranslation;
 import joshua.decoder.Translation;
-import joshua.decoder.ff.FeatureVector;
 import joshua.decoder.segment_file.Sentence;
 
 import org.junit.After;
@@ -63,6 +60,7 @@ public class StructuredTranslationTest {
       asList(), asList(7));
   private static final double EXPECTED_SCORE = -17.0;
   private static final Map<String,Float> EXPECTED_FEATURES = new HashMap<>();
+  private static final int EXPECTED_NBEST_LIST_SIZE = 8;
   static {
     EXPECTED_FEATURES.put("tm_glue_0", 1.0f);
     EXPECTED_FEATURES.put("tm_pt_0", -3.0f);
@@ -115,7 +113,7 @@ public class StructuredTranslationTest {
   @Test
   public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = false;
+    joshuaConfig.use_structured_output = false;
     joshuaConfig.outputFormat = "%s | %a ";
     
     // WHEN
@@ -128,7 +126,7 @@ public class StructuredTranslationTest {
   @Test
   public void givenInput_whenRegularOutputFormatWithTopN1_thenExpectedOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = false;
+    joshuaConfig.use_structured_output = false;
     joshuaConfig.outputFormat = "%s | %e | %a | %c";
     joshuaConfig.topN = 1;
     
@@ -141,19 +139,48 @@ public class StructuredTranslationTest {
   }
 
   @Test
-  public void givenInput_whenStructuredOutputFormat_thenExpectedOutput() {
+  public void givenInput_whenStructuredOutputFormatWithTopN0_thenExpectedOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = true;
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 0;
+    
+    // WHEN
+    final Translation translation = decode(INPUT);
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
+    
+    // THEN
+    assertTrue(translation.getStructuredTranslations().size() == 1);
+    assertEquals(EXPECTED_TRANSLATION, translationString);
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
+    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
+    assertEquals(wordAlignment.size(), translatedTokens.size());
+    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
+  }
+  
+  @Test
+  public void givenInput_whenStructuredOutputFormatWithTopN1_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 1;
     
     // WHEN
-    final StructuredTranslation translation = decode(INPUT).getStructuredTranslation();
-    final String translationString = translation.getTranslationString();
-    final List<String> translatedTokens = translation.getTranslationTokens();
-    final float translationScore = translation.getTranslationScore();
-    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
-    final Map<String,Float> translationFeatures = translation.getTranslationFeatures();
+    final Translation translation = decode(INPUT);
+    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
+    final StructuredTranslation structuredTranslation = structuredTranslations.get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
     
     // THEN
+    assertTrue(structuredTranslations.size() == 1);
     assertEquals(EXPECTED_TRANSLATION, translationString);
     assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
     assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
@@ -163,16 +190,43 @@ public class StructuredTranslationTest {
   }
   
   @Test
+  public void givenInput_whenStructuredOutputFormatWithKBest_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 100;
+    
+    // WHEN
+    final Translation translation = decode(INPUT);
+    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
+    final StructuredTranslation viterbiTranslation = structuredTranslations.get(0);
+    final StructuredTranslation lastKBest = structuredTranslations.get(structuredTranslations.size() - 1);
+    
+    // THEN
+    assertEquals(structuredTranslations.size(), EXPECTED_NBEST_LIST_SIZE);
+    assertTrue(structuredTranslations.size() > 1);
+    assertEquals(EXPECTED_TRANSLATION, viterbiTranslation.getTranslationString());
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, viterbiTranslation.getTranslationTokens());
+    assertEquals(EXPECTED_SCORE, viterbiTranslation.getTranslationScore(), 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, viterbiTranslation.getTranslationWordAlignments());
+    assertEquals(EXPECTED_FEATURES.entrySet(), viterbiTranslation.getTranslationFeatures().entrySet());
+    // last entry in KBEST is all input words untranslated, should have 8 OOVs.
+    assertEquals(INPUT, lastKBest.getTranslationString());
+    assertEquals(-800.0, lastKBest.getTranslationFeatures().get("OOVPenalty"), 0.0001);
+    
+  }
+  
+  @Test
   public void givenEmptyInput_whenStructuredOutputFormat_thenEmptyOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = true;
+    joshuaConfig.use_structured_output = true;
     
     // WHEN
-    final StructuredTranslation translation = decode("").getStructuredTranslation();
-    final String translationString = translation.getTranslationString();
-    final List<String> translatedTokens = translation.getTranslationTokens();
-    final float translationScore = translation.getTranslationScore();
-    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
+    final Translation translation = decode("");
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
     
     // THEN
     assertEquals("", translationString);
@@ -184,15 +238,16 @@ public class StructuredTranslationTest {
   @Test
   public void givenOOVInput_whenStructuredOutputFormat_thenOOVOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = true;
+    joshuaConfig.use_structured_output = true;
     final String input = "gabarbl";
     
     // WHEN
-    final StructuredTranslation translation = decode(input).getStructuredTranslation();
-    final String translationString = translation.getTranslationString();
-    final List<String> translatedTokens = translation.getTranslationTokens();
-    final float translationScore = translation.getTranslationScore();
-    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
+    final Translation translation = decode(input);
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
     
     // THEN
     assertEquals(input, translationString);
@@ -204,7 +259,7 @@ public class StructuredTranslationTest {
   @Test
   public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
     // GIVEN
-    joshuaConfig.construct_structured_output = false;
+    joshuaConfig.use_structured_output = false;
     
     // WHEN
     final Translation translation = decode("");

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e3673e98/tst/joshua/util/FormatUtilsTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/util/FormatUtilsTest.java b/tst/joshua/util/FormatUtilsTest.java
index 254522d..a1edc33 100644
--- a/tst/joshua/util/FormatUtilsTest.java
+++ b/tst/joshua/util/FormatUtilsTest.java
@@ -19,9 +19,9 @@
  package joshua.util;
 
 import static joshua.util.FormatUtils.cleanNonTerminal;
+import static joshua.util.FormatUtils.ensureNonTerminalBrackets;
 import static joshua.util.FormatUtils.escapeSpecialSymbols;
 import static joshua.util.FormatUtils.isNonterminal;
-import static joshua.util.FormatUtils.markup;
 import static joshua.util.FormatUtils.stripNonTerminalIndex;
 import static joshua.util.FormatUtils.unescapeSpecialSymbols;
 import static org.junit.Assert.*;
@@ -58,11 +58,7 @@ public class FormatUtilsTest {
   
   @Test
   public void givenTokens_whenMarkup_thenCorrectMarkup() {
-    assertEquals(markup("X"), "[X]");
-    assertEquals(markup("X", 1), "[X,1]");
-    assertEquals(markup("X", 15), "[X,15]");
-    assertEquals(markup("[X]", 1), "[X,1]");
-    assertEquals(markup("[X,1]", 4), "[X,4]");
+    assertEquals(ensureNonTerminalBrackets("X"), "[X]");
   }
   
   @Test