You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/02 00:56:24 UTC

[01/10] incubator-joshua git commit: Viterbi information is now extracted from the hypergraph using a more principled traversel functionality (WalkerFunction). Also updated the unit tests.

Repository: incubator-joshua
Updated Branches:
  refs/heads/master e7ead8fb3 -> 4c0b55337


Viterbi information is now extracted from the hypergraph using a more principled traversel functionality (WalkerFunction).
Also updated the unit tests.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/244e6936
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/244e6936
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/244e6936

Branch: refs/heads/master
Commit: 244e6936d8e3e7b30ebbe49ff7a9a2bd0c0c9994
Parents: 9501535
Author: Felix Hieber <fh...@amazon.com>
Authored: Mon Aug 24 08:29:17 2015 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:42 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/StructuredTranslation.java   | 143 ++++++++++++++
 .../ViterbiFeatureVectorWalkerFunction.java     |  44 +++++
 .../ViterbiOutputStringWalkerFunction.java      |  96 ++++++++++
 src/joshua/decoder/JoshuaConfiguration.java     |   5 +-
 .../system/StructuredTranslationTest.java       | 184 +++++++++++++++++++
 5 files changed, 470 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/244e6936/joshua-6/src/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/StructuredTranslation.java b/joshua-6/src/joshua/decoder/StructuredTranslation.java
new file mode 100644
index 0000000..1939ea0
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/StructuredTranslation.java
@@ -0,0 +1,143 @@
+package joshua.decoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.emptyMap;
+import static joshua.decoder.hypergraph.ViterbiExtractor.walk;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.hypergraph.HyperGraph;
+import joshua.decoder.hypergraph.ViterbiFeatureVectorWalkerFunction;
+import joshua.decoder.hypergraph.ViterbiOutputStringWalkerFunction;
+import joshua.decoder.hypergraph.WalkerFunction;
+import joshua.decoder.hypergraph.WordAlignmentExtractor;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * structuredTranslation provides a more structured access to translation
+ * results than the Translation class.
+ * Members of instances of this class can be used upstream.
+ * <br/>
+ * TODO:
+ * Enable K-Best extraction.
+ * 
+ * @author fhieber
+ */
+public class StructuredTranslation {
+  
+  private final Sentence sourceSentence;
+  private final List<FeatureFunction> featureFunctions;
+  
+  private final String translationString;
+  private final List<String> translationTokens;
+  private final float translationScore;
+  private List<List<Integer>> translationWordAlignments;
+  private Map<String,Float> translationFeatures;
+  private final float extractionTime;
+  
+  public StructuredTranslation(final Sentence sourceSentence,
+      final HyperGraph hypergraph,
+      final List<FeatureFunction> featureFunctions) {
+    
+      final long startTime = System.currentTimeMillis();
+      
+      this.sourceSentence = sourceSentence;
+      this.featureFunctions = featureFunctions;
+      this.translationString = extractViterbiString(hypergraph);
+      this.translationTokens = extractTranslationTokens();
+      this.translationScore = extractTranslationScore(hypergraph);
+      this.translationFeatures = extractViterbiFeatures(hypergraph);
+      this.translationWordAlignments = extractViterbiWordAlignment(hypergraph);
+      this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
+  }
+  
+  private Map<String,Float> extractViterbiFeatures(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return emptyMap(); 
+    } else {
+      ViterbiFeatureVectorWalkerFunction viterbiFeatureVectorWalker = new ViterbiFeatureVectorWalkerFunction(featureFunctions, sourceSentence);
+      walk(hypergraph.goalNode, viterbiFeatureVectorWalker);
+      return new HashMap<String,Float>(viterbiFeatureVectorWalker.getFeaturesMap());
+    }
+  }
+
+  private List<List<Integer>> extractViterbiWordAlignment(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return emptyList();
+    } else {
+      final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
+      walk(hypergraph.goalNode, wordAlignmentWalker);
+      return wordAlignmentWalker.getFinalWordAlignments();
+    }
+  }
+  
+  private float extractTranslationScore(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return 0;
+    } else {
+      return hypergraph.goalNode.getScore();
+    }
+  }
+  
+  private String extractViterbiString(final HyperGraph hypergraph) {
+    if (hypergraph == null) {
+      return sourceSentence.source();
+    } else {
+      final WalkerFunction viterbiOutputStringWalker = new ViterbiOutputStringWalkerFunction();
+      walk(hypergraph.goalNode, viterbiOutputStringWalker);
+      return viterbiOutputStringWalker.toString();
+    }
+  }
+  
+  private List<String> extractTranslationTokens() {
+    if (translationString.isEmpty()) {
+      return emptyList();
+    } else {
+      return asList(translationString.split("\\s+"));
+    }
+  }
+  
+  // Getters to use upstream
+  
+  public Sentence getSourceSentence() {
+    return sourceSentence;
+  }
+
+  public int getSentenceId() {
+    return sourceSentence.id();
+  }
+
+  public String getTranslationString() {
+    return translationString;
+  }
+
+  public List<String> getTranslationTokens() {
+    return translationTokens;
+  }
+
+  public float getTranslationScore() {
+    return translationScore;
+  }
+
+  /**
+   * Returns a list of target to source alignments.
+   */
+  public List<List<Integer>> getTranslationWordAlignments() {
+    return translationWordAlignments;
+  }
+  
+  public Map<String,Float> getTranslationFeatures() {
+    return translationFeatures;
+  }
+  
+  /**
+   * Time taken to build output information from the hypergraph.
+   */
+  public Float getExtractionTime() {
+    return extractionTime;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/244e6936/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java b/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
new file mode 100644
index 0000000..5af6c4d
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
@@ -0,0 +1,44 @@
+package joshua.decoder.hypergraph;
+
+import static joshua.decoder.chart_parser.ComputeNodeResult.computeTransitionFeatures;
+
+import java.util.List;
+import java.util.Map;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.segment_file.Sentence;
+
+public class ViterbiFeatureVectorWalkerFunction implements WalkerFunction {
+  
+  private final FeatureVector features;
+  private final List<FeatureFunction> featureFunctions;
+  private final Sentence sourceSentence;
+  
+  public ViterbiFeatureVectorWalkerFunction(
+      final List<FeatureFunction> featureFunctions,
+      final Sentence sourceSentence) {
+    this.features = new FeatureVector();
+    this.featureFunctions = featureFunctions;
+    this.sourceSentence = sourceSentence;
+  }
+
+  /**
+   * Recompute feature values for each Viterbi edge and add to features.
+   */
+  @Override
+  public void apply(HGNode node) {
+    final FeatureVector edgeFeatures = computeTransitionFeatures(
+        featureFunctions, node.bestHyperedge, node.i, node.j, sourceSentence);
+    features.add(edgeFeatures);
+  }
+  
+  public FeatureVector getFeatures() {
+    return features;
+  }
+  
+  public Map<String,Float> getFeaturesMap() {
+    return features.getMap();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/244e6936/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java b/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
new file mode 100644
index 0000000..0c84375
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
@@ -0,0 +1,96 @@
+package joshua.decoder.hypergraph;
+
+import static java.lang.Integer.MAX_VALUE;
+import static joshua.corpus.Vocabulary.getWords;
+import static joshua.corpus.Vocabulary.nt;
+
+import java.util.Stack;
+
+import joshua.decoder.ff.tm.Rule;
+
+public class ViterbiOutputStringWalkerFunction implements WalkerFunction {
+  
+  private Stack<int[]> viterbiWords = new Stack<int[]>();
+
+  @Override
+  public void apply(HGNode node) {
+    final Rule rule = node.bestHyperedge.getRule();
+    if (rule != null) {
+      merge(rule.getEnglish());
+    }
+  }
+  
+  private boolean containsNonTerminals(final int[] ids) {
+    boolean hasNonTerminals = false;
+    for (int i = 0; i < ids.length; i++) {
+      if (nt(ids[i])) {
+        hasNonTerminals = true;
+        break;
+      }
+    }
+    return hasNonTerminals;
+  }
+  
+  /**
+   * Returns the index of the next non-terminal slot to fill.
+   * Since non-terminals in right hand sides of rules are indexed by
+   * their order on the source side, this function looks for the largest
+   * negative id in ids and returns its index. 
+   */
+  private int getNextNonTerminalIndexToFill(final int[] ids) {
+    int nextIndex = 0;
+    int nextNonTerminal = -MAX_VALUE;
+    for (int i = 0; i < ids.length; i++) {
+      if (nt(ids[i]) && ids[i] > nextNonTerminal) {
+        nextIndex = i;
+        nextNonTerminal = ids[i];
+      }
+    }
+    return nextIndex;
+  }
+  
+  private int[] substituteNonTerminal(final int[] parentWords, final int[] childWords) {
+    final int ntIndex = getNextNonTerminalIndexToFill(parentWords);
+    final int[] result = new int[parentWords.length + childWords.length - 1];
+    int resultIndex = 0;
+    for (int i = 0; i < ntIndex; i++) {
+      result[resultIndex++] = parentWords[i];
+    }
+    for (int i = 0; i < childWords.length; i++) {
+      result[resultIndex++] = childWords[i];
+    }
+    for (int i = ntIndex + 1; i < parentWords.length; i++) {
+      result[resultIndex++] = parentWords[i];
+    }
+    return result;
+  }
+
+  private void merge(final int[] words) {
+    if (!containsNonTerminals(words)
+        && !viterbiWords.isEmpty()
+        && containsNonTerminals(viterbiWords.peek())) {
+      merge(substituteNonTerminal(viterbiWords.pop(), words));
+    } else {
+      viterbiWords.add(words);
+    }
+  }
+  
+  @Override
+  public String toString() {
+    if (viterbiWords.isEmpty()) {
+      return "";
+    }
+    
+    if (viterbiWords.size() != 1) {
+      throw new RuntimeException(
+          String.format(
+              "Stack of ViterbiOutputStringWalker should contain only a single (last) element, but was size %d", viterbiWords.size()));
+    }
+    
+    String result = getWords(viterbiWords.peek());
+    // strip of sentence markers (<s>,</s>)
+    result = result.substring(result.indexOf(' ') + 1, result.lastIndexOf(' '));
+    return result.trim();
+  }
+  
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/244e6936/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index 266198c..2eb24c4 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -30,8 +30,9 @@ import joshua.util.io.LineReader;
  */
 public class JoshuaConfiguration {
   
-  // whether to use structured output
-  public Boolean use_structured_output = false;
+  // whether to construct a StructuredTranslation object for each request instead of 
+  // printing to stdout. Used when the Decoder is used from Java directly.
+  public Boolean construct_structured_output = false;
 
   // List of grammar files to read
   public ArrayList<String> tms = new ArrayList<String>();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/244e6936/tst/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/StructuredTranslationTest.java b/tst/joshua/system/StructuredTranslationTest.java
new file mode 100644
index 0000000..821ceea
--- /dev/null
+++ b/tst/joshua/system/StructuredTranslationTest.java
@@ -0,0 +1,184 @@
+package joshua.system;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.Decoder;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.StructuredTranslation;
+import joshua.decoder.Translation;
+import joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Integration test for the complete Joshua decoder using a toy grammar that translates
+ * a bunch of capital letters to lowercase letters. Rules in the test grammar
+ * drop and generate additional words and simulate reordering of rules, so that
+ * proper extraction of word alignments and other information from the decoder
+ * can be tested.
+ * 
+ * @author fhieber
+ */
+public class StructuredTranslationTest {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+  private static final String EXPECTED_TRANSLATION = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
+  private static final List<String> EXPECTED_TRANSLATED_TOKENS = asList(EXPECTED_TRANSLATION.split("\\s+"));
+  private static final String EXPECTED_WORD_ALIGNMENT_STRING = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
+  private static final List<List<Integer>> EXPECTED_WORD_ALIGNMENT = asList(
+      asList(0), asList(2, 6), asList(), asList(3),
+      asList(4, 5), asList(7), asList(1),
+      asList(1), asList(1), asList(), asList(),
+      asList(), asList(7));
+  private static final double EXPECTED_SCORE = -17.0;
+  private static final Map<String,Float> EXPECTED_FEATURES = new HashMap<>();
+  static {
+    EXPECTED_FEATURES.put("tm_glue_0", 1.0f);
+    EXPECTED_FEATURES.put("tm_pt_0", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_1", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_2", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_3", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_4", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_5", -3.0f);
+    EXPECTED_FEATURES.put("OOV", 7.0f);
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    Vocabulary.clear();
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("feature_function = OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
+                                             // is not even used by the
+                                             // constructor/initialize)
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    Vocabulary.clear();
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+  private Translation decode(String input) {
+    Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+  
+  @Test
+  public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.construct_structured_output = false;
+    joshuaConfig.outputFormat = "%s | %a ";
+    
+    // WHEN
+    final String translation = decode(INPUT).toString().trim();
+    
+    // THEN
+    assertEquals(EXPECTED_TRANSLATION + " | " + EXPECTED_WORD_ALIGNMENT_STRING, translation);
+  }
+
+  @Test
+  public void givenInput_whenSaarStructuredOutputFormat_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.construct_structured_output = true;
+    
+    // WHEN
+    final StructuredTranslation translation = decode(INPUT).getStructuredTranslation();
+    final String translationString = translation.getTranslationString();
+    final List<String> translatedTokens = translation.getTranslationTokens();
+    final float translationScore = translation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
+    final Map<String,Float> translationFeatures = translation.getTranslationFeatures();
+    
+    // THEN
+    assertEquals(EXPECTED_TRANSLATION, translationString);
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
+    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
+    assertEquals(wordAlignment.size(), translatedTokens.size());
+    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
+  }
+  
+  @Test
+  public void givenEmptyInput_whenSaarStructuredOutputFormat_thenEmptyOutput() {
+    // GIVEN
+    joshuaConfig.construct_structured_output = true;
+    
+    // WHEN
+    final StructuredTranslation translation = decode("").getStructuredTranslation();
+    final String translationString = translation.getTranslationString();
+    final List<String> translatedTokens = translation.getTranslationTokens();
+    final float translationScore = translation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
+    
+    // THEN
+    assertEquals("", translationString);
+    assertTrue(translatedTokens.isEmpty());
+    assertEquals(0, translationScore, 0.00001);
+    assertTrue(wordAlignment.isEmpty());
+  }
+  
+  @Test
+  public void givenOOVInput_whenSaarStructuredOutputFormat_thenOOVOutput() {
+    // GIVEN
+    joshuaConfig.construct_structured_output = true;
+    final String input = "gabarbl";
+    
+    // WHEN
+    final StructuredTranslation translation = decode(input).getStructuredTranslation();
+    final String translationString = translation.getTranslationString();
+    final List<String> translatedTokens = translation.getTranslationTokens();
+    final float translationScore = translation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
+    
+    // THEN
+    assertEquals(input, translationString);
+    assertTrue(translatedTokens.contains(input));
+    assertEquals(-199.0, translationScore, 0.00001);
+    assertTrue(wordAlignment.contains(asList(0)));
+  }
+  
+  @Test
+  public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
+    // GIVEN
+    joshuaConfig.construct_structured_output = false;
+    
+    // WHEN
+    final Translation translation = decode("");
+    final String translationString = translation.toString();
+    
+    // THEN
+    assertEquals("\n", translationString);
+  }
+
+}


[10/10] incubator-joshua git commit: Merge branch 'kellen-pull'

Posted by mj...@apache.org.
Merge branch 'kellen-pull'


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/4c0b5533
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/4c0b5533
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/4c0b5533

Branch: refs/heads/master
Commit: 4c0b55337f04f7ca31557667f0c369e32f21e0c4
Parents: e7ead8f a98a80b
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Apr 1 18:46:52 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Apr 1 18:46:52 2016 -0400

----------------------------------------------------------------------
 .gitignore                                      |   6 +
 bin/joshua-decoder                              |   2 +-
 build.xml                                       |   1 +
 .../joshua/decoder/StructuredTranslation.java   | 143 +++++++
 .../ViterbiFeatureVectorWalkerFunction.java     |  44 +++
 .../ViterbiOutputStringWalkerFunction.java      |  96 +++++
 lib/ivy.xml                                     |   1 +
 pom.xml                                         |   5 +
 resources/grammar.glue                          |   4 +
 resources/wa_grammar                            |   3 +
 resources/wa_grammar.packed/config              |   1 +
 resources/wa_grammar.packed/encoding            | Bin 0 -> 154 bytes
 .../wa_grammar.packed/slice_00000.alignments    | Bin 0 -> 45 bytes
 .../wa_grammar.packed/slice_00000.features      | Bin 0 -> 47 bytes
 resources/wa_grammar.packed/slice_00000.source  | Bin 0 -> 204 bytes
 resources/wa_grammar.packed/slice_00000.target  | Bin 0 -> 128 bytes
 .../wa_grammar.packed/slice_00000.target.lookup | Bin 0 -> 32 bytes
 resources/wa_grammar.packed/vocabulary          | Bin 0 -> 238 bytes
 src/joshua/decoder/Decoder.java                 |  12 +-
 src/joshua/decoder/JoshuaConfiguration.java     |  11 +
 src/joshua/decoder/Translation.java             | 122 +++++-
 src/joshua/decoder/ff/FeatureVector.java        |   5 +-
 src/joshua/decoder/ff/tm/PhraseRule.java        |  51 ++-
 src/joshua/decoder/ff/tm/Rule.java              | 162 ++++++--
 .../decoder/ff/tm/packed/PackedGrammar.java     | 371 ++++++++++++-------
 .../decoder/hypergraph/AlignedSourceTokens.java |  93 +++++
 .../decoder/hypergraph/KBestExtractor.java      |  10 +
 .../decoder/hypergraph/ViterbiExtractor.java    |  30 ++
 .../hypergraph/WordAlignmentExtractor.java      |  55 +++
 .../decoder/hypergraph/WordAlignmentState.java  | 154 ++++++++
 test/bn-en/packed/output.scores.gold            |  24 +-
 test/lattice-short/test.sh                      |   2 +-
 tst/joshua/system/AlignmentMapTest.java         |  53 +++
 .../system/MultithreadedTranslationTests.java   | 125 +++++++
 tst/joshua/system/StructuredOutputTest.java     | 103 +++++
 .../system/StructuredTranslationTest.java       | 184 +++++++++
 36 files changed, 1633 insertions(+), 240 deletions(-)
----------------------------------------------------------------------



[04/10] incubator-joshua git commit: Forced synchronization on method that still occasionally fails multi-threaded test. This is a fix for a very rare multithreading issue we've observed in Joshua. We have a test that is able to reproduce the error fairl

Posted by mj...@apache.org.
Forced synchronization on method that still occasionally fails multi-threaded test.
This is a fix for a very rare multithreading issue we've observed in Joshua. We have a test that is able to reproduce the error fairly often when run on a host with multiple physical cores.  This patch fixes all errors seen in both the patch and during runtime.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/cadd987c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/cadd987c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/cadd987c

Branch: refs/heads/master
Commit: cadd987c16ff012298b42074fb96bab8697fa84f
Parents: cabb52c
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Mar 29 15:46:27 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 .../decoder/ff/tm/packed/PackedGrammar.java     | 26 +++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cadd987c/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 2251f5a..18aa60e 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -46,6 +46,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
+import java.nio.BufferUnderflowException;
 import java.nio.IntBuffer;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
@@ -487,6 +488,7 @@ public class PackedGrammar extends AbstractGrammar {
 
       feature_position += EncoderConfiguration.ID_SIZE;
       StringBuilder sb = new StringBuilder();
+
       for (int i = 0; i < num_features; i++) {
         int feature_id = encoding.readId(features, feature_position);
         FloatEncoder encoder = encoding.encoder(feature_id);
@@ -505,18 +507,34 @@ public class PackedGrammar extends AbstractGrammar {
       return sb.toString().trim();
     }
 
-    private final byte[] getAlignmentArray(int block_id) {
+    /**
+     * We need to synchronize this method as there is a many to one ratio between
+     * PackedRule/PhrasePair and this class (PackedSlice). This means during concurrent first
+     * getAlignments calls to PackedRule objects they could alter each other's positions within the
+     * buffer before calling read on the buffer.
+     */
+    private synchronized final byte[] getAlignmentArray(int block_id) {
       if (alignments == null)
         throw new RuntimeException("No alignments available.");
       int alignment_position = alignmentLookup[block_id];
       int num_points = (int) alignments.get(alignment_position);
       byte[] alignment = new byte[num_points * 2];
-      
+
       alignments.position(alignment_position + 1);
-      alignments.get(alignment, 0, num_points * 2);
+      try {
+        alignments.get(alignment, 0, num_points * 2);
+      } catch (BufferUnderflowException bue) {
+        Decoder.LOG(4, "Had an exception when accessing alignment mapped byte buffer");
+        Decoder.LOG(4, "Attempting to access alignments at position: " + alignment_position + 1);
+        Decoder.LOG(4, "And to read this many bytes: " + num_points * 2);
+        Decoder.LOG(4, "Buffer capacity is : " + alignments.capacity());
+        Decoder.LOG(4, "Buffer position is : " + alignments.position());
+        Decoder.LOG(4, "Buffer limit is : " + alignments.limit());
+        throw bue;
+      }
       return alignment;
     }
-    
+
     private final PackedTrie root() {
       return getTrie(0);
     }


[06/10] incubator-joshua git commit: Remove sorting which may rely on LOCALE of machine

Posted by mj...@apache.org.
Remove sorting which may rely on LOCALE of machine

Here we fixed an integration test that will fail on any machine with a console locale set to a euro-based numbering system. So for example de-DE and fr-FR locales would fail this test.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2cc9996b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2cc9996b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2cc9996b

Branch: refs/heads/master
Commit: 2cc9996b4ed9e71ae4998a0db3eaef9586b0c69d
Parents: cadd987
Author: Felix Hieber <fh...@amazon.com>
Authored: Tue Mar 29 16:55:07 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 test/lattice-short/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2cc9996b/test/lattice-short/test.sh
----------------------------------------------------------------------
diff --git a/test/lattice-short/test.sh b/test/lattice-short/test.sh
index ecae1e2..c10b2a2 100755
--- a/test/lattice-short/test.sh
+++ b/test/lattice-short/test.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-cat input | $JOSHUA/bin/joshua-decoder -m 500m -config joshua.config 2> log | sort > output
+cat input | $JOSHUA/bin/joshua-decoder -m 500m -config joshua.config 2> log > output
 
 if [[ $? -ne 0 ]]; then
 	exit 1


[03/10] incubator-joshua git commit: Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection

Posted by mj...@apache.org.
Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call
Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e70677d2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e70677d2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e70677d2

Branch: refs/heads/master
Commit: e70677d2eab23daa7082173e6fe337d68aa12230
Parents: 0990ebc
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 22 13:37:54 2015 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:42 2016 +0200

----------------------------------------------------------------------
 bin/joshua-decoder                              |  2 +-
 build.xml                                       |  1 +
 lib/ivy.xml                                     |  1 +
 pom.xml                                         |  5 +++++
 src/joshua/decoder/JoshuaConfiguration.java     |  7 +++++++
 .../decoder/ff/tm/packed/PackedGrammar.java     | 20 +++++++++++++++++++-
 6 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/bin/joshua-decoder
----------------------------------------------------------------------
diff --git a/bin/joshua-decoder b/bin/joshua-decoder
index 57c09f1..cdb2cf4 100755
--- a/bin/joshua-decoder
+++ b/bin/joshua-decoder
@@ -27,7 +27,7 @@ set -u
 JOSHUA=$(dirname $0)/..
 
 exec java -Xmx${mem} \
- 	-cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar \
+ 	-cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar:$JOSHUA/lib/guava-19.0.jar \
 	-Dfile.encoding=utf8 \
 	-Djava.util.logging.config.file=${JOSHUA}/logging.properties \
 	-Djava.library.path=$JOSHUA/lib \

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 6456721..7095ca2 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,7 @@
       <include name="collections-generic-4.01.jar" />
       <include name="args4j-2.0.29.jar" />
       <include name="gson-2.5.jar" />
+      <include name="guava-19.0.jar" />
     </fileset>
     <fileset dir="${thraxlib}">
       <include name="thrax.jar" />

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/lib/ivy.xml
----------------------------------------------------------------------
diff --git a/lib/ivy.xml b/lib/ivy.xml
index 02f3ff7..d41595d 100644
--- a/lib/ivy.xml
+++ b/lib/ivy.xml
@@ -12,5 +12,6 @@
     <dependency org="net.sourceforge.collections" name="collections-generic" rev="4.01"/>
     <dependency org="args4j" name="args4j" rev="2.0.29" />
     <dependency org="com.google.code.gson" name="gson" rev="2.5"/>
+    <dependency org="com.google.guava" name="guava" rev="19.0"/>
   </dependencies>
 </ivy-module>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3b4aac1..de75e80 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,5 +122,10 @@
       <version>4.10</version>
       <optional>true</optional>
     </dependency>
+    <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>19.0</version>
+    </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index ece18d2..49ab87d 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -33,6 +33,10 @@ public class JoshuaConfiguration {
   // List of grammar files to read
   public ArrayList<String> tms = new ArrayList<String>();
 
+  // A rule cache for commonly used tries to avoid excess object allocations
+  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+  public Integer cachedRuleSize = new Integer(5000);
+
   /*
    * The file to read the weights from (part of the sparse features implementation). Weights can
    * also just be listed in the main config file.
@@ -609,6 +613,9 @@ public class JoshuaConfiguration {
             // Check source sentence
             source_annotations = true;
 
+          } else if (parameter.equals(normalize_key("cached-rules-size"))) {
+              // Check source sentence
+              cachedRuleSize = Integer.parseInt(fds[1]);
           } else {
 
             if (parameter.equals(normalize_key("use-sent-specific-tm"))

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index df5538a..dc72a4b 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -79,6 +79,9 @@ import joshua.util.encoding.EncoderConfiguration;
 import joshua.util.encoding.FloatEncoder;
 import joshua.util.io.LineReader;
 
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
 public class PackedGrammar extends AbstractGrammar {
 
   private EncoderConfiguration encoding;
@@ -92,6 +95,10 @@ public class PackedGrammar extends AbstractGrammar {
   // The grammar specification keyword (e.g., "thrax" or "moses")
   private String type;
 
+  // A rule cache for commonly used tries to avoid excess object allocations
+  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+  private final Cache<Trie, List<Rule>> cached_rules;
+
   public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
       JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
     super(joshuaConfiguration);
@@ -132,6 +139,7 @@ public class PackedGrammar extends AbstractGrammar {
     for (PackedSlice s : slices)
       count += s.estimated.length;
     root = new PackedRoot(slices);
+    cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build();
 
     Decoder.LOG(1, String.format("Loaded %d rules", count));
   }
@@ -618,17 +626,24 @@ public class PackedGrammar extends AbstractGrammar {
 
       @Override
       public List<Rule> getRules() {
+        List<Rule> rules = cached_rules.getIfPresent(this);
+        if (rules != null) {
+          return rules;
+        }
+
         int num_children = source[position];
         int rule_position = position + 2 * (num_children + 1);
         int num_rules = source[rule_position - 1];
 
-        ArrayList<Rule> rules = new ArrayList<Rule>(num_rules);
+        rules = new ArrayList<Rule>(num_rules);
         for (int i = 0; i < num_rules; i++) {
           if (type.equals("moses") || type.equals("phrase"))
             rules.add(new PackedPhrasePair(rule_position + 3 * i));
           else
             rules.add(new PackedRule(rule_position + 3 * i));
         }
+
+        cached_rules.put(this, rules);
         return rules;
       }
 
@@ -684,6 +699,9 @@ public class PackedGrammar extends AbstractGrammar {
         }
         for (int i = 0; i < sorted.length; i++)
           source[rule_position + i] = sorted[i];
+
+        // Replace rules in cache with their sorted values on next getRules()
+        cached_rules.invalidate(this);
         this.sorted = true;
       }
 


[09/10] incubator-joshua git commit: Merge branch 'master' of github.com:KellenSunderland/joshua into kellen-pull

Posted by mj...@apache.org.
Merge branch 'master' of github.com:KellenSunderland/joshua into kellen-pull


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/a98a80b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/a98a80b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/a98a80b4

Branch: refs/heads/master
Commit: a98a80b494384dd923da7f7e7dd22bd237732ae9
Parents: e7ead8f 5665f02
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Apr 1 18:39:44 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Apr 1 18:39:44 2016 -0400

----------------------------------------------------------------------
 .gitignore                                      |   6 +
 bin/joshua-decoder                              |   2 +-
 build.xml                                       |   1 +
 .../joshua/decoder/StructuredTranslation.java   | 143 +++++++
 .../ViterbiFeatureVectorWalkerFunction.java     |  44 +++
 .../ViterbiOutputStringWalkerFunction.java      |  96 +++++
 lib/ivy.xml                                     |   1 +
 pom.xml                                         |   5 +
 resources/grammar.glue                          |   4 +
 resources/wa_grammar                            |   3 +
 resources/wa_grammar.packed/config              |   1 +
 resources/wa_grammar.packed/encoding            | Bin 0 -> 154 bytes
 .../wa_grammar.packed/slice_00000.alignments    | Bin 0 -> 45 bytes
 .../wa_grammar.packed/slice_00000.features      | Bin 0 -> 47 bytes
 resources/wa_grammar.packed/slice_00000.source  | Bin 0 -> 204 bytes
 resources/wa_grammar.packed/slice_00000.target  | Bin 0 -> 128 bytes
 .../wa_grammar.packed/slice_00000.target.lookup | Bin 0 -> 32 bytes
 resources/wa_grammar.packed/vocabulary          | Bin 0 -> 238 bytes
 src/joshua/decoder/Decoder.java                 |  12 +-
 src/joshua/decoder/JoshuaConfiguration.java     |  11 +
 src/joshua/decoder/Translation.java             | 122 +++++-
 src/joshua/decoder/ff/FeatureVector.java        |   5 +-
 src/joshua/decoder/ff/tm/PhraseRule.java        |  51 ++-
 src/joshua/decoder/ff/tm/Rule.java              | 162 ++++++--
 .../decoder/ff/tm/packed/PackedGrammar.java     | 371 ++++++++++++-------
 .../decoder/hypergraph/AlignedSourceTokens.java |  93 +++++
 .../decoder/hypergraph/KBestExtractor.java      |  10 +
 .../decoder/hypergraph/ViterbiExtractor.java    |  30 ++
 .../hypergraph/WordAlignmentExtractor.java      |  55 +++
 .../decoder/hypergraph/WordAlignmentState.java  | 154 ++++++++
 test/bn-en/packed/output.scores.gold            |  24 +-
 test/lattice-short/test.sh                      |   2 +-
 tst/joshua/system/AlignmentMapTest.java         |  53 +++
 .../system/MultithreadedTranslationTests.java   | 125 +++++++
 tst/joshua/system/StructuredOutputTest.java     | 103 +++++
 .../system/StructuredTranslationTest.java       | 184 +++++++++
 36 files changed, 1633 insertions(+), 240 deletions(-)
----------------------------------------------------------------------



[08/10] incubator-joshua git commit: Removed slow and redundant feature string parsing when constructing rules from packed grammar (at sort time and at actual construction of feature vector).

Posted by mj...@apache.org.
Removed slow and redundant feature string parsing when constructing rules from packed grammar (at sort time and at actual construction of feature vector).

Gets rid of String parsing features over and over again which turned out to be slow in profiling. The solution is not perfect, but we get a nice speedup of roughly a factor 5: If JoshuaConfiguration.amortize is set to false grammars are forced to be sorted at decoder startup. Here are the stats: New code: Took 561.64 seconds to load pipeline. Old code: Took 2688.60 seconds to load pipeline.  Basically we are significantly reducing the time for sorting the rules by getting rid of an intermediate string representation of the features in a rule. Since String parsing of floats is removed now there was some float precision change in the regression-test for which I changed the gold output. This is fine.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5665f02f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5665f02f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5665f02f

Branch: refs/heads/master
Commit: 5665f02ff0385db4f77bf4493db2d96bc63355d8
Parents: 9448ba5
Author: Felix Hieber <fh...@amazon.com>
Authored: Tue Dec 1 13:34:47 2015 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 src/joshua/decoder/Decoder.java                 |   8 +-
 src/joshua/decoder/JoshuaConfiguration.java     |   2 +-
 src/joshua/decoder/ff/FeatureVector.java        |   5 +-
 src/joshua/decoder/ff/tm/PhraseRule.java        |  48 +++++----
 src/joshua/decoder/ff/tm/Rule.java              | 100 +++++++++++++------
 .../decoder/ff/tm/packed/PackedGrammar.java     |  63 ++++++------
 test/bn-en/packed/output.scores.gold            |  24 ++---
 .../system/MultithreadedTranslationTests.java   |  12 +++
 8 files changed, 156 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index 1a353ca..8e74d42 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -1,5 +1,7 @@
 package joshua.decoder;
 
+import static joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
+
 import java.io.BufferedWriter;	
 import java.io.File;
 import java.io.IOException;
@@ -627,7 +629,7 @@ public class Decoder {
       }
 
       Decoder.LOG(1, String.format("Read %d weights (%d of them dense)", weights.size(),
-          FeatureVector.DENSE_FEATURE_NAMES.size()));
+      DENSE_FEATURE_NAMES.size()));
 
       // Do this before loading the grammars and the LM.
       this.featureFunctions = new ArrayList<FeatureFunction>();
@@ -644,8 +646,8 @@ public class Decoder {
 
       // This is mostly for compatibility with the Moses tuning script
       if (joshuaConfiguration.show_weights_and_quit) {
-        for (int i = 0; i < FeatureVector.DENSE_FEATURE_NAMES.size(); i++) {
-          String name = FeatureVector.DENSE_FEATURE_NAMES.get(i);
+        for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+          String name = DENSE_FEATURE_NAMES.get(i);
           if (joshuaConfiguration.moses) 
             System.out.println(String.format("%s= %.5f", mosesize(name), weights.getDense(i)));
           else

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index 2eb24c4..b7be145 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -32,7 +32,7 @@ public class JoshuaConfiguration {
   
   // whether to construct a StructuredTranslation object for each request instead of 
   // printing to stdout. Used when the Decoder is used from Java directly.
-  public Boolean construct_structured_output = false;
+  public Boolean use_structured_output = false;
 
   // List of grammar files to read
   public ArrayList<String> tms = new ArrayList<String>();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/FeatureVector.java b/src/joshua/decoder/ff/FeatureVector.java
index a5526e4..50b2a3c 100644
--- a/src/joshua/decoder/ff/FeatureVector.java
+++ b/src/joshua/decoder/ff/FeatureVector.java
@@ -78,10 +78,7 @@ public class FeatureVector {
      * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
      * This is the source of *no end* of confusion and should be done away with.
      */
-    sparseFeatures = new HashMap<String, Float>();
-    denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size());
-    for (int i = 0; i < denseFeatures.size(); i++)
-      denseFeatures.set(i, 0.0f);
+    this();
     
     int denseFeatureIndex = 0;
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/PhraseRule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java b/src/joshua/decoder/ff/tm/PhraseRule.java
index c178b31..8530aa0 100644
--- a/src/joshua/decoder/ff/tm/PhraseRule.java
+++ b/src/joshua/decoder/ff/tm/PhraseRule.java
@@ -21,44 +21,54 @@ import com.google.common.base.Suppliers;
  */
 public class PhraseRule extends Rule {
 
-  private String mosesFeatureString = null;
-  private Supplier<byte[]> alignmentSupplier;
+
+  private final String mosesFeatureString;
+  private final Supplier<byte[]> alignmentSupplier;
+  private final Supplier<String> sparseFeaturesStringSupplier;
   
   public PhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity,
       String alignment) {
     super(lhs, french, english, null, arity, alignment);
-    mosesFeatureString = sparse_features;
-    this.alignmentSupplier = Suppliers.memoize(() ->{
-        String[] tokens = getAlignmentString().split("[-\\s]+");
-        byte[] alignmentArray = new byte[tokens.length + 2];
-        alignmentArray[0] = alignmentArray[1] = 0;
-        for (int i = 0; i < tokens.length; i++)
-            alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
-        return alignmentArray;
-    });
+    this.mosesFeatureString = sparse_features;
+    this.alignmentSupplier = initializeAlignmentSupplier();
+    this.sparseFeaturesStringSupplier = initializeSparseFeaturesStringSupplier();
   }
-
+  
   /** 
    * Moses features are probabilities; we need to convert them here by taking the negative log prob.
    * We do this only when the rule is used to amortize.
    */
-  @Override
-  public String getFeatureString() {
-    if (sparseFeatureString == null) {
+  private Supplier<String> initializeSparseFeaturesStringSupplier() {
+    return Suppliers.memoize(() ->{
       StringBuffer values = new StringBuffer();
       for (String value: mosesFeatureString.split(" ")) {
         float f = Float.parseFloat(value);
         values.append(String.format("%f ", f <= 0.0 ? -100 : -Math.log(f)));
       }
-      sparseFeatureString = values.toString().trim();
-    }
-    return sparseFeatureString;
+      return values.toString().trim();
+    });
   }
-  
+
   /**
    * This is the exact same as the parent implementation, but we need to add 1 to each alignment
    * point to account for the nonterminal [X] that was prepended to each rule. 
    */
+  private Supplier<byte[]> initializeAlignmentSupplier(){
+    return Suppliers.memoize(() ->{
+      String[] tokens = getAlignmentString().split("[-\\s]+");
+      byte[] alignmentArray = new byte[tokens.length + 2];
+      alignmentArray[0] = alignmentArray[1] = 0;
+      for (int i = 0; i < tokens.length; i++)
+          alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
+      return alignmentArray;
+    });
+  }
+
+  @Override
+  public String getFeatureString() {
+    return this.sparseFeaturesStringSupplier.get();
+  }
+  
   @Override
   public byte[] getAlignment() {
     return this.alignmentSupplier.get();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java
index 3d715ea..abef4b7 100644
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ b/src/joshua/decoder/ff/tm/Rule.java
@@ -43,10 +43,9 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   protected int arity;
 
   // And a string containing the sparse ones
-  protected FeatureVector features = null;
-  protected String sparseFeatureString;
-
-  private final Supplier<byte[]> alignmentSupplier;
+  //protected final String sparseFeatureString;
+  protected final Supplier<String> sparseFeatureStringSupplier;
+  private final Supplier<FeatureVector> featuresSupplier;
 
   /*
    * a feature function will be fired for this rule only if the owner of the rule matches the owner
@@ -68,13 +67,16 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
 
   // The alignment string, e.g., 0-0 0-1 1-1 2-1
   private String alignmentString;
+  private final Supplier<byte[]> alignmentSupplier;
 
   /**
-   * Constructs a new rule using the provided parameters. The owner and rule id for this rule are
+   * Constructs a new rule using the provided parameters. Rule id for this rule is
    * undefined. Note that some of the sparse features may be unlabeled, but they cannot be mapped to
    * their default names ("tm_OWNER_INDEX") until later, when we know the owner of the rule. This is
    * not known until the rule is actually added to a grammar in Grammar::addRule().
    * 
+   * Constructor used by other constructors below;
+   * 
    * @param lhs Left-hand side of the rule.
    * @param sourceRhs Source language right-hand side of the rule.
    * @param targetRhs Target language right-hand side of the rule.
@@ -82,34 +84,63 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
    * @param arity Number of nonterminals in the source language right-hand side.
    * @param owner
    */
-  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity,
-      int owner) {
+  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, int owner) {
+    this.lhs = lhs;
+    this.pFrench = sourceRhs;
+    this.arity = arity;
+    this.owner = owner;
+    this.english = targetRhs;
+    this.sparseFeatureStringSupplier = Suppliers.memoize(() -> { return sparseFeatures; });
+    this.featuresSupplier = initializeFeatureSupplierFromString();
+    this.alignmentSupplier = initializeAlignmentSupplier();
+  }
+  
+  /**
+   * Constructor used by PackedGrammar's sortRules().
+   */
+  public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, int owner) {
     this.lhs = lhs;
     this.pFrench = sourceRhs;
-    this.sparseFeatureString = sparseFeatures;
     this.arity = arity;
     this.owner = owner;
     this.english = targetRhs;
-    alignmentSupplier = initializeAlignmentSupplier();
+    this.featuresSupplier = Suppliers.memoize(() -> { return features; });
+    this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier();
+    this.alignmentSupplier = initializeAlignmentSupplier();
   }
 
-  // Sparse feature version
+  /**
+   * Constructor used for SamtFormatReader and GrammarBuilderWalkerFunction's getRuleWithSpans()
+   * Owner set to -1
+   */
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
     this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
   }
 
+  /**
+   * Constructor used for addOOVRules(), HieroFormatReader and PhraseRule.
+   */
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, String alignment) {
     this(lhs, sourceRhs, targetRhs, sparseFeatures, arity);
     this.alignmentString = alignment;
   }
   
+  /**
+   * Constructor (implicitly) used by PackedRule
+   */
   public Rule() {
     this.lhs = -1;
-    alignmentSupplier = initializeAlignmentSupplier();
+    this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier();
+    this.featuresSupplier = initializeFeatureSupplierFromString();
+    this.alignmentSupplier = initializeAlignmentSupplier();
   }
 
+  // ==========================================================================
+  // Lazy loading Suppliers for alignments, feature vector, and feature strings
+  // ==========================================================================
+  
   private Supplier<byte[]> initializeAlignmentSupplier(){
-    Supplier<byte[]> result = Suppliers.memoize(() ->{
+    return Suppliers.memoize(() ->{
       byte[] alignment = null;
       String alignmentString = getAlignmentString();
       if (alignmentString != null) {
@@ -120,7 +151,29 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
       }
       return alignment;
     });
-    return result;
+  }
+  
+  /**
+   * If Rule was constructed with sparseFeatures String, we lazily populate the
+   * FeatureSupplier.
+   */
+  private Supplier<FeatureVector> initializeFeatureSupplierFromString(){
+    return Suppliers.memoize(() ->{
+      if (owner != -1) {
+        return new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_");
+      } else {
+        return new FeatureVector();
+      }
+    });
+  }
+  
+  /**
+   * If Rule was constructed with a FeatureVector, we lazily populate the sparseFeaturesStringSupplier.
+   */
+  private Supplier<String> initializeSparseFeaturesStringSupplier() {
+    return Suppliers.memoize(() -> {
+      return getFeatureVector().toString();
+    });
   }
 
   // ===============================================================
@@ -214,18 +267,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
    * specified as labeled features of the form "tm_OWNER_INDEX", but the former format is preferred.
    */
   public FeatureVector getFeatureVector() {
-    /*
-     * Now read the feature scores, which can be any number of dense features and sparse features.
-     * Any unlabeled feature becomes a dense feature. By convention, dense features should precede
-     * sparse (labeled) ones, but it's not required.
-     */
-
-    if (features == null)
-      features = (owner != -1)
-        ? new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_")
-        : new FeatureVector();
-  
-    return features;
+    return featuresSupplier.get();
   }
 
   /**
@@ -263,21 +305,15 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   
   public void setPrecomputableCost(float[] phrase_weights, FeatureVector weights) {
     float cost = 0.0f;
-
-//    System.err.println(String.format("// Setting precomputable cost for for %s/%s", getEnglishWords(), getFrenchWords()));
     FeatureVector features = getFeatureVector();
     for (int i = 0; i < features.getDenseFeatures().size() && i < phrase_weights.length; i++) {
-//      System.err.println(String.format("    %d -> %.5f", i, features.get(i)));
       cost += phrase_weights[i] * features.getDense(i);
     }
 
     for (String key: features.getSparseFeatures().keySet()) {
-//      System.err.println(String.format("    %s -> %.5f", key, features.get(key)));
       cost += weights.getSparse(key) * features.getSparse(key);
     }
     
-//    System.err.println(String.format("-> %f", cost));
-    
     this.precomputableCost = cost;
   }
 
@@ -365,7 +401,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   }
 
   public String getFeatureString() {
-    return sparseFeatureString;
+    return sparseFeatureStringSupplier.get();
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 792a7ad..a4c47d2 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -468,46 +468,39 @@ public class PackedGrammar extends AbstractGrammar {
     }
 
     /**
-     * NEW VERSION
-     * 
-     * Returns a string version of the features associated with a rule (represented as a block ID).
+     * Returns the FeatureVector associated with a rule (represented as a block ID).
      * These features are in the form "feature1=value feature2=value...". By default, unlabeled
-     * features are named using the pattern
-     * 
-     * tm_OWNER_INDEX
-     * 
-     * where OWNER is the grammar's owner (Vocabulary.word(this.owner)) and INDEX is a 0-based index
-     * of the feature found in the grammar.
-     * 
+     * features are named using the pattern.
      * @param block_id
-     * @return
+     * @return feature vector
      */
 
-    private final String getFeatures(int block_id) {
-      int feature_position = featureLookup[block_id];
-
-      // The number of non-zero features stored with the rule.
-      int num_features = encoding.readId(features, feature_position);
-
-      feature_position += EncoderConfiguration.ID_SIZE;
-      StringBuilder sb = new StringBuilder();
-
-      for (int i = 0; i < num_features; i++) {
-        int feature_id = encoding.readId(features, feature_position);
-        FloatEncoder encoder = encoding.encoder(feature_id);
-
-        String feature_name = Vocabulary.word(encoding.outerId(feature_id));
+    private final FeatureVector loadFeatureVector(int block_id) {
+      int featurePosition = featureLookup[block_id];
+      final int numFeatures = encoding.readId(features, featurePosition);
+
+      featurePosition += EncoderConfiguration.ID_SIZE;
+      final FeatureVector featureVector = new FeatureVector();
+      FloatEncoder encoder;
+      String featureName;
+
+      for (int i = 0; i < numFeatures; i++) {
+        final int innerId = encoding.readId(features, featurePosition);
+        final int outerId = encoding.outerId(innerId);
+        encoder = encoding.encoder(innerId);
+        // TODO (fhieber): why on earth are dense feature ids (ints) encoded in the vocabulary?
+        featureName = Vocabulary.word(outerId);
+        final float value = encoder.read(features, featurePosition);
         try {
-          int index = Integer.parseInt(feature_name);
-          sb.append(String.format(" tm_%s_%d=%.5f", Vocabulary.word(owner), index,
-              -encoder.read(features, feature_position)));
+          int index = Integer.parseInt(featureName);
+          featureVector.increment(index, -value);
         } catch (NumberFormatException e) {
-          sb.append(String.format(" %s=%.5f", feature_name, encoder.read(features, feature_position)));
+          featureVector.increment(featureName, value);
         }
-
-        feature_position += EncoderConfiguration.ID_SIZE + encoder.size();
+        featurePosition += EncoderConfiguration.ID_SIZE + encoder.size();
       }
-      return sb.toString().trim();
+      
+      return featureVector;
     }
 
     /**
@@ -697,7 +690,7 @@ public class PackedGrammar extends AbstractGrammar {
           block_id = source[rules[i]];
 
           Rule rule = new Rule(source[rule_position + 3 * i], src,
-              getTarget(target_address), getFeatures(block_id), arity, owner);
+              getTarget(target_address), loadFeatureVector(block_id), arity, owner);
           estimated[block_id] = rule.estimateRuleCost(models);
           precomputable[block_id] = rule.getPrecomputableCost();
         }
@@ -923,8 +916,8 @@ public class PackedGrammar extends AbstractGrammar {
 
         private Supplier<FeatureVector> initializeFeatureVectorSupplier(){
           Supplier<FeatureVector> result = Suppliers.memoize(() ->{
-            return new FeatureVector(getFeatures(source[address + 2]), "tm_" + Vocabulary.word(owner) + "_");
-          });
+            return loadFeatureVector(source[address + 2]);
+         });
           return result;
         }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/test/bn-en/packed/output.scores.gold
----------------------------------------------------------------------
diff --git a/test/bn-en/packed/output.scores.gold b/test/bn-en/packed/output.scores.gold
index 6ef2c31..fd63d12 100644
--- a/test/bn-en/packed/output.scores.gold
+++ b/test/bn-en/packed/output.scores.gold
@@ -91,7 +91,7 @@
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট as the national conference was he main speech -lrb- keynote speech -rrb- on the .  |||  -675.013
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট as the national conference was he main speech -lrb- keynote speech -rrb- to the .  |||  -675.262
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট the national conference was he main speech -lrb- keynote speech -rrb- on the .  |||  -675.282
- in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট the national conference was he main speech -lrb- keynote speech -rrb- to the .  |||  -675.531
+ in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট the national conference was he main speech -lrb- keynote speech -rrb- to the .  |||  -675.530
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট as the national conference was he the speech -lrb- keynote speech -rrb- on the .  |||  -675.766
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট as the national conference was he main speech -lrb- keynote speech -rrb- , the .  |||  -675.800
  in 2004 ম্যাসাচুসেটস অঙ্গরাজ্যের বস্টন in the city in ডেমোক্র্যাট party national conference was he main speech -lrb- keynote speech -rrb- on the .  |||  -675.864
@@ -166,14 +166,14 @@
  britain writers of written drama , novels , stories and recently scripts in আদৃত .  |||  -145.651
  britain writers written drama , novels , stories and in the recent script in the আদৃত .  |||  -145.717
  1919 , on may month it saogat magazine was published in the .  |||  -29.082
- 1919 on may month it saogat magazine was published in the .  |||  -29.174
+ 1919 on may month it saogat magazine was published in the .  |||  -29.173
  1919 , on may month it saogat magazine was published .  |||  -29.196
  1919 on may month it saogat magazine was published .  |||  -29.287
  1919 , on may month it is saogat magazine was published in the .  |||  -29.459
  1919 on may month it is saogat magazine was published in the .  |||  -29.550
  1919 , on may month it is saogat magazine was published .  |||  -29.572
  1919 on may month it is saogat magazine was published .  |||  -29.663
- 1919 , on may month it saogat magazine was published in .  |||  -29.880
+ 1919 , on may month it saogat magazine was published in .  |||  -29.879
  1919 on may month it saogat magazine was published in .  |||  -29.971
  in 2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was arranged .  |||  -460.093
  2005 , উইমেনস tennis association tour টায়ার-থ্রি টুর্নামেন্ট সানফিস্ট open netaji indoor stadium was arranged .  |||  -460.244
@@ -385,15 +385,15 @@
  মিনিক্সের of were smells টানেনবম , a famous operating system design প্রশিক্ষক .  |||  -345.940
  the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1735.945
  the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it in other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.149
- the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.152
+ the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.151
  the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it is the by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.297
  the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it in other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.355
  the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema , " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.363
  the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali is pure film " -rrb- .  |||  -1736.461
  the টাইম্ of 's of ইন্ডিয়া-তে written in that " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it is the by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.503
- the টাইম্ of 's of ইন্ডিয়া-তে written in that , " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.518
- the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema ' -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.519
- after this 1953 , in the month of may nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to .  |||  -345.818
+ the টাইম্ of 's of ইন্ডিয়া-তে written in that , " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema " -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.517
+ the টাইম্ of 's of ইন্ডিয়া-তে written in the " it is absurd to compare it with any other indian cinema ... pather panchali is pure cinema ' -lrb- " it other by the indian films with compared to unreal ... pather panchali are pure film " -rrb- .  |||  -1736.518
+ after this 1953 , in the month of may nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to .  |||  -345.817
  after this 1953 on may month nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to .  |||  -345.874
  after that , 1953 may month nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to .  |||  -345.956
  after that 1953 on may month nazrul and প্রমীলা দেবীকে চিকিৎসার for london sent to .  |||  -346.040
@@ -409,7 +409,7 @@
  the southern and the east there is বিস্তীর্ণ land , west and on the north there is রুক্ষ mountain and mountain .  |||  -251.017
  the southern and the east there is বিস্তীর্ণ plain , west and in the north there are রুক্ষ mountain and mountain .  |||  -251.127
  the southern and the east there is বিস্তীর্ণ land , west and in the north there are রুক্ষ mountain and mountain .  |||  -251.145
- the south and the east there is বিস্তীর্ণ land , west and on the north there are রুক্ষ mountain and mountain .  |||  -251.256
+ the south and the east there is বিস্তীর্ণ land , west and on the north there are রুক্ষ mountain and mountain .  |||  -251.255
  the southern and the east there is বিস্তীর্ণ plain , west and in the north there is রুক্ষ mountain and mountain .  |||  -251.309
  the southern and the east there is বিস্তীর্ণ plain , west and on the north are রুক্ষ mountain and mountain .  |||  -251.317
  the southern and the east there is বিস্তীর্ণ land , west and in the north there is রুক্ষ mountain and mountain .  |||  -251.327
@@ -598,7 +598,7 @@
  open source or open source -lrb- open source -rrb- the money is computer software the source code or main সাংকেতিক language free way বিতরণ to .  |||  -471.716
  open source or open source -lrb- open source -rrb- , the money is computer software of the source code or main সাংকেতিক language open way বিতরণ to .  |||  -471.717
  open source or open source -lrb- open source -rrb- , the money is computer software the source code or main সাংকেতিক language free way বিতরণ to .  |||  -471.789
- open source or open source -lrb- open source -rrb- of the money is computer software of the source code or main সাংকেতিক language open way বিতরণ to .  |||  -471.789
+ open source or open source -lrb- open source -rrb- of the money is computer software of the source code or main সাংকেতিক language open way বিতরণ to .  |||  -471.790
  open source or open source -lrb- open source -rrb- the money is computer software of the source code or main সাংকেতিক language free way বিতরণ to .  |||  -471.794
  open source or open source -lrb- open source -rrb- in the money is computer software the source code or main সাংকেতিক language open way বিতরণ to .  |||  -471.850
  bangladesh অনলাইনে dhaka  |||  -109.639
@@ -635,11 +635,11 @@
  he was the military forces to আনফিট declared was .  |||  -123.198
  he was military forces for আনফিট declared was .  |||  -123.198
  he was armed forces for আনফিট was declared .  |||  -123.208
- he was the military forces for আনফিট declared in .  |||  -123.628
+ he was the military forces for আনফিট declared in .  |||  -123.629
  bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the , yahya khan mujib was to form the government to জানালে he that government by নেবেন not .  |||  -492.585
  bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the , yahya khan mujib was to form the government to জানালে he that government by নেবেন not .  |||  -492.686
  bhutto এ্যাসেম্বলি বয়কট to 2.5 to declared in the , yahya khan mujib was to form the government to জানালে he that government by নেবেন not .  |||  -492.687
- bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the , yahya khan mujib was to form the government on to জানালে he that government by নেবেন not .  |||  -492.742
+ bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the , yahya khan mujib was to form the government on to জানালে he that government by নেবেন not .  |||  -492.743
  bhutto এ্যাসেম্বলি বয়কট to 2.5 with declared in the , yahya khan mujib was to form the government for জানালে he that government by নেবেন not .  |||  -492.760
  bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that that , yahya khan mujib was to form the government to জানালে he that government by নেবেন not .  |||  -492.771
  bhutto এ্যাসেম্বলি বয়কট to 2.5 with announced that the , yahya khan mujib was to form the government on to জানালে he that government by নেবেন not .  |||  -492.843
@@ -806,7 +806,7 @@
  it is mainly শূকরের in the middle of was which can শূকরকে ইনফ্লুয়েঞ্জাতে infected by british .  |||  -349.610
  it is mainly শূকরের in between in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected by british .  |||  -349.627
  it is basically শূকরের in between in which was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected by british .  |||  -349.663
- it is basically শূকরের in between in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected by british .  |||  -349.705
+ it is basically শূকরের in between in was which can শূকরকে ইনফ্লুয়েঞ্জাতে affected by british .  |||  -349.704
  these are একএ the mycelium structure .  |||  -221.617
  these একএ the mycelium structure .  |||  -221.656
  these are একএ to mycelium structure .  |||  -221.769

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5665f02f/tst/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/MultithreadedTranslationTests.java b/tst/joshua/system/MultithreadedTranslationTests.java
index 4ff549c..b8d8af0 100644
--- a/tst/joshua/system/MultithreadedTranslationTests.java
+++ b/tst/joshua/system/MultithreadedTranslationTests.java
@@ -28,6 +28,8 @@ public class MultithreadedTranslationTests {
   private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;
   private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+  private int previousLogLevel;
+  private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000;
 
   @Before
   public void setUp() throws Exception {
@@ -62,6 +64,9 @@ public class MultithreadedTranslationTests {
                                                   // (configFile)
                                                   // is not even used by the
                                                   // constructor/initialize.
+
+    previousLogLevel = Decoder.VERBOSE;
+    Decoder.VERBOSE = 0;
   }
 
   @After
@@ -69,6 +74,7 @@ public class MultithreadedTranslationTests {
     Vocabulary.clear();
     this.decoder.cleanUp();
     this.decoder = null;
+    Decoder.VERBOSE = previousLogLevel;
   }
 
 
@@ -102,11 +108,17 @@ public class MultithreadedTranslationTests {
     Translations translations = this.decoder.decodeAll(req);
     ArrayList<Translation> translationResults = new ArrayList<Translation>();
 
+
+    final long translationStartTime = System.nanoTime();
     Translation t;
     while ((t = translations.next()) != null) {
       translationResults.add(t);
     }
 
+    final long translationEndTime = System.nanoTime();
+    final double pipelineLoadDurationInSeconds = (translationEndTime - translationStartTime) / ((double)NANO_SECONDS_PER_SECOND);
+    System.err.println(String.format("%.2f seconds", pipelineLoadDurationInSeconds));
+
     // THEN
     assertTrue(translationResults.size() == inputLines);
   }


[07/10] incubator-joshua git commit: Clean up Slice constructor, Fully loading source tries, lazy loading other structures

Posted by mj...@apache.org.
Clean up Slice constructor, Fully loading source tries, lazy loading other structures


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9448ba55
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9448ba55
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9448ba55

Branch: refs/heads/master
Commit: 9448ba552cd03bacad81eb4b9b5e900db360c00e
Parents: 2cc9996
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Mar 29 17:23:23 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 .../decoder/ff/tm/packed/PackedGrammar.java     | 147 ++++++++++---------
 1 file changed, 75 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9448ba55/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 18aa60e..792a7ad 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -38,15 +38,13 @@ package joshua.decoder.ff.tm.packed;
 
 import static java.util.Collections.sort;
 
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.RandomAccessFile;
 import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
 import java.nio.IntBuffer;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
@@ -75,7 +73,6 @@ import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.ff.tm.RuleCollection;
 import joshua.decoder.ff.tm.Trie;
 import joshua.decoder.ff.tm.hash_based.ExtensionIterator;
-import joshua.decoder.ff.tm.packed.SliceAggregatingTrie;
 import joshua.util.encoding.EncoderConfiguration;
 import joshua.util.encoding.FloatEncoder;
 import joshua.util.io.LineReader;
@@ -322,20 +319,15 @@ public class PackedGrammar extends AbstractGrammar {
     private final String name;
 
     private final int[] source;
+    private final IntBuffer target;
+    private final ByteBuffer features;
+    private final ByteBuffer alignments;
 
-    private final int[] target;
     private final int[] targetLookup;
-
-    private MappedByteBuffer features;
     private int featureSize;
     private int[] featureLookup;
-    private RandomAccessFile featureFile;
-
     private float[] estimated;
     private float[] precomputable;
-    
-    private RandomAccessFile alignmentFile;
-    private MappedByteBuffer alignments;
     private int[] alignmentLookup;
 
     /**
@@ -352,81 +344,92 @@ public class PackedGrammar extends AbstractGrammar {
       File feature_file = new File(prefix + ".features");
       File alignment_file = new File(prefix + ".alignments");
 
-      // Get the channels etc.
-      FileInputStream source_fis = new FileInputStream(source_file);
-      FileChannel source_channel = source_fis.getChannel();
-      int source_size = (int) source_channel.size();
-
-      FileInputStream target_fis = new FileInputStream(target_file);
-      FileChannel target_channel = target_fis.getChannel();
-      int target_size = (int) target_channel.size();
+      source = fullyLoadFileToArray(source_file);
+      // First int specifies the size of this file, load from 1st int on
+      targetLookup = fullyLoadFileToArray(target_lookup_file, 1);
 
-      featureFile = new RandomAccessFile(feature_file, "r");
-      FileChannel feature_channel = featureFile.getChannel();
-      int feature_size = (int) feature_channel.size();
+      target = associateMemoryMappedFile(target_file).asIntBuffer();
+      features = associateMemoryMappedFile(feature_file);
+      initializeFeatureStructures();
 
-      IntBuffer source_buffer = source_channel.map(MapMode.READ_ONLY, 0, source_size).asIntBuffer();
-      source = new int[source_size / 4];
-      source_buffer.get(source);
-      source_fis.close();
-
-      IntBuffer target_buffer = target_channel.map(MapMode.READ_ONLY, 0, target_size).asIntBuffer();
-      target = new int[target_size / 4];
-      target_buffer.get(target);
-      target_fis.close();
-
-      features = feature_channel.map(MapMode.READ_ONLY, 0, feature_size);
-      features.load();
-      
       if (alignment_file.exists()) {
-        alignmentFile = new RandomAccessFile(alignment_file, "r");
-        FileChannel alignment_channel = alignmentFile.getChannel();
-        int alignment_size = (int) alignment_channel.size();
-        alignments = alignment_channel.map(MapMode.READ_ONLY, 0, alignment_size);
-        alignments.load();
-        
-        int num_blocks = alignments.getInt(0);
-        alignmentLookup = new int[num_blocks];
-        int header_pos = 8;
-        for (int i = 0; i < num_blocks; i++) {
-          alignmentLookup[i] = alignments.getInt(header_pos);
-          header_pos += 4;
-        }
+        alignments = associateMemoryMappedFile(alignment_file);
+        alignmentLookup = parseLookups(alignments);
       } else {
         alignments = null;
       }
 
+      tries = new HashMap<Integer, PackedTrie>();
+    }
+
+    /**
+     * Helper function to help create all the structures which describe features
+     * in the Slice. Only called during object construction.
+     */
+    private void initializeFeatureStructures(){
       int num_blocks = features.getInt(0);
-      featureLookup = new int[num_blocks];
       estimated = new float[num_blocks];
       precomputable = new float[num_blocks];
+      Arrays.fill(estimated, Float.NEGATIVE_INFINITY);
+      Arrays.fill(precomputable, Float.NEGATIVE_INFINITY);
+      featureLookup = parseLookups(features);
       featureSize = features.getInt(4);
-      int header_pos = 8;
-      for (int i = 0; i < num_blocks; i++) {
-        featureLookup[i] = features.getInt(header_pos);
-        estimated[i] = Float.NEGATIVE_INFINITY;
-        precomputable[i] = Float.NEGATIVE_INFINITY;
-        header_pos += 4;
+    }
+
+    // TOOD: (kellens) see if we can remove these lookups as they're addressed
+    // predictably into already present data structures. Are they redundant?
+    /**
+     * Build lookup arrays for various buffers (features / alignments) Typically
+     * this is copying out some relevant information from a larger byte array
+     *
+     * @param buffer
+     *          the buffer parsed to find sub-elements
+     * @return an int array which can easily be accessed to find lookup values.
+     */
+    private int[] parseLookups(ByteBuffer buffer) {
+      int numBlocks = buffer.getInt(0);
+      int[] result = new int[numBlocks];
+      int headerPosition = 8;
+      for (int i = 0; i < numBlocks; i++) {
+        result[i] = buffer.getInt(headerPosition);
+        headerPosition += 4;
       }
+      return result;
+    }
 
-      DataInputStream target_lookup_stream = new DataInputStream(new BufferedInputStream(
-          new FileInputStream(target_lookup_file)));
-      targetLookup = new int[target_lookup_stream.readInt()];
-      for (int i = 0; i < targetLookup.length; i++)
-        targetLookup[i] = target_lookup_stream.readInt();
-      target_lookup_stream.close();
+    private int[] fullyLoadFileToArray(File file) throws IOException {
+      return fullyLoadFileToArray(file, 0);
+    }
 
-      tries = new HashMap<Integer, PackedTrie>();
+    /**
+     * This function will use a bulk loading method to fully populate a target
+     * array from file.
+     *
+     * @param file
+     *          File that will be read from disk.
+     * @param startIndex
+     *          an offset into the read file.
+     * @return an int array of size length(file) - offset containing ints in the
+     *         file.
+     * @throws IOException
+     */
+    private int[] fullyLoadFileToArray(File file, int startIndex) throws IOException {
+      IntBuffer buffer = associateMemoryMappedFile(file).asIntBuffer();
+      int size = (int) (file.length() - (4 * startIndex))/4;
+      int[] result = new int[size];
+      buffer.position(startIndex);
+      buffer.get(result, 0, size);
+      return result;
     }
 
-    @SuppressWarnings("unused")
-    private final Object guardian = new Object() {
-      @Override
-      // Finalizer object to ensure feature file handle get closed upon slice's dismissal.
-      protected void finalize() throws Throwable {
-        featureFile.close();
+    private ByteBuffer associateMemoryMappedFile(File file) throws IOException {
+      try(FileInputStream fileInputStream = new FileInputStream(file)) {
+        FileChannel fileChannel = fileInputStream.getChannel();
+        int size = (int) fileChannel.size();
+        MappedByteBuffer result = fileChannel.map(MapMode.READ_ONLY, 0, size);
+        return result;
       }
-    };
+    }
 
     private final int[] getTarget(int pointer) {
       // Figure out level.
@@ -437,9 +440,9 @@ public class PackedGrammar extends AbstractGrammar {
       int index = 0;
       int parent;
       do {
-        parent = target[pointer];
+        parent = target.get(pointer);
         if (parent != -1)
-          tgt[index++] = target[pointer + 1];
+          tgt[index++] = target.get(pointer + 1);
         pointer = parent;
       } while (pointer != -1);
       return tgt;


[02/10] incubator-joshua git commit: Decoder's Translation class now contains more members including the possibility to store word alignment from the derivation. Allows use of Joshua decoder class in a larger code project to extract information, rather t

Posted by mj...@apache.org.
Decoder's Translation class now contains more members including the possibility to store word alignment from the derivation. Allows use of Joshua decoder class in a larger code project to extract information, rather than relying on stdout. Also added a getter for JoshuaConfiguration in the Decoder.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9501535d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9501535d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9501535d

Branch: refs/heads/master
Commit: 9501535dcd67b89e821fd686089f621c5721497f
Parents: e70677d
Author: Felix Hieber <fh...@amazon.com>
Authored: Fri Feb 27 14:05:50 2015 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:42 2016 +0200

----------------------------------------------------------------------
 .gitignore                                      |   6 +
 resources/grammar.glue                          |   4 +
 resources/wa_grammar                            |   3 +
 src/joshua/decoder/Decoder.java                 |   4 +
 src/joshua/decoder/JoshuaConfiguration.java     |   3 +
 src/joshua/decoder/Translation.java             | 122 ++++++++++++---
 src/joshua/decoder/ff/tm/Rule.java              |  36 +++++
 .../decoder/hypergraph/AlignedSourceTokens.java |  93 +++++++++++
 .../decoder/hypergraph/KBestExtractor.java      |  10 ++
 .../decoder/hypergraph/ViterbiExtractor.java    |  30 ++++
 .../hypergraph/WordAlignmentExtractor.java      |  55 +++++++
 .../decoder/hypergraph/WordAlignmentState.java  | 154 +++++++++++++++++++
 tst/joshua/system/AlignmentMapTest.java         |  53 +++++++
 tst/joshua/system/StructuredOutputTest.java     | 103 +++++++++++++
 14 files changed, 658 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 869300e..1238e15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,9 @@ bin
 *~
 
 .philog.LOGFILE
+build
+**.history
+.settings
+/eclipse-bin/
+.classpath
+/target/

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/resources/grammar.glue
----------------------------------------------------------------------
diff --git a/resources/grammar.glue b/resources/grammar.glue
new file mode 100644
index 0000000..69e1520
--- /dev/null
+++ b/resources/grammar.glue
@@ -0,0 +1,4 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
+[GOAL] ||| <s> [X,1] </s> ||| <s> [X,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/resources/wa_grammar
----------------------------------------------------------------------
diff --git a/resources/wa_grammar b/resources/wa_grammar
new file mode 100644
index 0000000..08c2c38
--- /dev/null
+++ b/resources/wa_grammar
@@ -0,0 +1,3 @@
+[X] ||| A [X,1] B1 [X,2] B2 C ||| a b [X,2] c1 [X,1] c2 ||| 1 1 1 1 1 1 ||| 0-0 2-1 4-1 5-3 5-5
+[X] ||| U Z1 Z2 ||| n1 u z ||| 1 1 1 1 1 1 ||| 0-1 1-2 2-2
+[X] ||| K ||| k1 k2 k3 n1 n2 n3 ||| 1 1 1 1 1 1 ||| 0-0 0-1 0-2

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index a9d7ba9..1a353ca 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -64,6 +64,10 @@ public class Decoder {
 
   private final JoshuaConfiguration joshuaConfiguration;
 
+  public JoshuaConfiguration getJoshuaConfiguration() {
+    return joshuaConfiguration;
+  }
+
   /*
    * Many of these objects themselves are global objects. We pass them in when constructing other
    * objects, so that they all share pointers to the same object. This is good because it reduces

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index 49ab87d..266198c 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -29,6 +29,9 @@ import joshua.util.io.LineReader;
  * @author Matt Post <po...@cs.jhu.edu>
  */
 public class JoshuaConfiguration {
+  
+  // whether to use structured output
+  public Boolean use_structured_output = false;
 
   // List of grammar files to read
   public ArrayList<String> tms = new ArrayList<String>();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Translation.java b/src/joshua/decoder/Translation.java
index fbf1571..a9f82f7 100644
--- a/src/joshua/decoder/Translation.java
+++ b/src/joshua/decoder/Translation.java
@@ -3,6 +3,7 @@ package joshua.decoder;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.StringWriter;
+import java.util.Arrays;
 import java.util.List;
 
 import joshua.decoder.ff.FeatureFunction;
@@ -10,6 +11,7 @@ import joshua.decoder.ff.lm.StateMinimizingLanguageModel;
 import joshua.decoder.hypergraph.HyperGraph;
 import joshua.decoder.hypergraph.KBestExtractor;
 import joshua.decoder.hypergraph.ViterbiExtractor;
+import joshua.decoder.hypergraph.WordAlignmentState;
 import joshua.decoder.io.DeNormalize;
 import joshua.decoder.segment_file.Sentence;
 
@@ -39,20 +41,31 @@ public class Translation {
     return rawTranslation;
   }
 
+  private WordAlignmentState alignment = null;
+  private float score = 0;
+  private float translationTime;
+  
   public Translation(Sentence source, HyperGraph hypergraph, 
       List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
     this.source = source;
+    
+    if (joshuaConfiguration.use_structured_output) {
+      
+      // create structured output instead of the String manipulation below.
+      createStructuredOutput(source, hypergraph);
+      
+    } else {
+
+      StringWriter sw = new StringWriter();
+      BufferedWriter out = new BufferedWriter(sw);
+
+      try {
+        if (hypergraph != null) {
+          if (!joshuaConfiguration.hypergraphFilePattern.equals("")) {
+            hypergraph.dump(String.format(joshuaConfiguration.hypergraphFilePattern, source.id()), featureFunctions);
+          }
 
-    StringWriter sw = new StringWriter();
-    BufferedWriter out = new BufferedWriter(sw);
-
-    try {
-      if (hypergraph != null) {
-        if (!joshuaConfiguration.hypergraphFilePattern.equals("")) {
-          hypergraph.dump(String.format(joshuaConfiguration.hypergraphFilePattern, source.id()), featureFunctions);
-        }
-
-        long startTime = System.currentTimeMillis();
+          long startTime = System.currentTimeMillis();
 
         // We must put this weight as zero, otherwise we get an error when we try to retrieve it
         // without checking
@@ -75,9 +88,15 @@ public class Translation {
               .replace("%S", DeNormalize.processSingleLine(rawTranslation))
               .replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
               .replace("%i", String.format("%d", source.id()));
+          
+          /* %a causes output of word level alignments between input and output hypothesis */
+          if (joshuaConfiguration.outputFormat.contains("%a")) {
+            translation = translation.replace("%a", ViterbiExtractor.extractViterbiAlignment(hypergraph.goalNode));
+          }
 
           out.write(translation);
           out.newLine();
+          
         } else  {
           KBestExtractor kBestExtractor = new KBestExtractor(source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
           kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
@@ -91,9 +110,11 @@ public class Translation {
           }
         }
 
-        float seconds = (float) (System.currentTimeMillis() - startTime) / 1000.0f;
-        Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
-            joshuaConfiguration.topN, seconds));
+          float seconds = (float) (System.currentTimeMillis() - startTime) / 1000.0f;
+          Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
+              joshuaConfiguration.topN, seconds));
+          this.translationTime = seconds;
+          
 
       } else {
         
@@ -113,10 +134,14 @@ public class Translation {
         out.newLine();
       }
 
-      out.flush();
-    } catch (IOException e) {
-      e.printStackTrace();
-      System.exit(1);
+        out.flush();
+      } catch (IOException e) {
+        e.printStackTrace();
+        System.exit(1);
+      }
+      
+      this.output = sw.toString();
+      
     }
 
     /*
@@ -129,8 +154,40 @@ public class Translation {
         break;
       }
     }
+    
+  }
 
-    this.output = sw.toString();
+  /**
+   * Instead of returning a single string with output information appended
+   * (if JoshuaConfig.use_structured_output == false),
+   * write Viterbi information (score, translation, word alignment) to member
+   * variables for easier access from outside pipelines.
+   */
+  private void createStructuredOutput(Sentence source, HyperGraph hypergraph) {
+    
+    this.translationTime = 0;
+    
+    long startTime = System.currentTimeMillis();
+
+    if (hypergraph != null) {
+
+      this.output = ViterbiExtractor.extractViterbiString(hypergraph.goalNode).trim();
+      // trims whitespaces (same idiom as in existing Joshua code (65)
+      this.output = this.output.substring(this.output.indexOf(' ') + 1, this.output.lastIndexOf(' ')); 
+      this.alignment = ViterbiExtractor.buildViterbiAlignment(hypergraph.goalNode);
+      this.score = hypergraph.goalNode.getScore();
+
+    } else {
+      
+      this.output = this.source.source();
+      this.alignment = null;
+      
+    }
+    
+    this.translationTime = (System.currentTimeMillis() - startTime) / 1000.0f;
+    
+    Decoder.LOG(1, String.format("Translation %d: %.3f %s (%.3f)", source.id(), hypergraph.goalNode.getScore(), this.output, this.translationTime));
+    
   }
 
   public Sentence getSourceSentence() {
@@ -145,4 +202,33 @@ public class Translation {
   public String toString() {
     return output;
   }
+  
+  public float getTranslationTime() {
+    return translationTime;
+  }
+
+  public String getTranslationString() {
+    return (output == null) ? "" : output.trim();
+  }
+
+  public List<List<Integer>> getWordAlignment() {
+    return alignment.toFinalList();
+  }
+  
+  public String getWordAlignmentString() {
+    return (alignment == null) ? "" : alignment.toFinalString();
+  }
+
+  public float getTranslationScore() {
+    return score;
+  }
+  
+  public List<String> getTranslationTokens() {
+    return Arrays.asList(getTranslationString().split("\\s+"));
+  }
+  
+  public String getDeNormalizedTranslation() {
+    return DeNormalize.processSingleLine(getTranslationString());
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java
index 2f3ec47..90a44d5 100644
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ b/src/joshua/decoder/ff/tm/Rule.java
@@ -1,8 +1,11 @@
 package joshua.decoder.ff.tm;
 
+import java.util.ArrayList;
 import java.util.Arrays;  
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.regex.Pattern;
 
 import joshua.corpus.Vocabulary;
@@ -411,6 +414,39 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
         nts[index++] = -id;
     return nts;
   }
+  
+  /**
+   * Returns an array of size getArity() containing the source indeces of non terminals.
+   */
+  public int[] getNonTerminalSourcePositions() {
+    int[] nonTerminalPositions = new int[getArity()];
+    int ntPos = 0;
+    for (int sourceIdx = 0; sourceIdx < getFrench().length; sourceIdx++) {
+      if (getFrench()[sourceIdx] < 0)
+        nonTerminalPositions[ntPos++] = sourceIdx;
+    }
+    return nonTerminalPositions;
+  }
+  
+  /**
+   * Parses the Alignment byte[] into a Map from target to (possibly a list of) source positions.
+   * Used by the WordAlignmentExtractor.
+   */
+  public Map<Integer, List<Integer>> getAlignmentMap() {
+    byte[] alignmentArray = getAlignment();
+    Map<Integer, List<Integer>> alignmentMap = new HashMap<Integer, List<Integer>>();
+    if (alignmentArray != null) {
+      for (int alignmentIdx = 0; alignmentIdx < alignmentArray.length; alignmentIdx += 2 ) {
+        int s = alignmentArray[alignmentIdx];
+        int t = alignmentArray[alignmentIdx + 1];
+        List<Integer> values = alignmentMap.get(t);
+        if (values == null)
+          alignmentMap.put(t, values = new ArrayList<Integer>());
+        values.add(s);
+      }
+    }
+    return alignmentMap;
+  }
 
   /**
    * Return the English (target) nonterminals as list of Strings

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/hypergraph/AlignedSourceTokens.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/AlignedSourceTokens.java b/src/joshua/decoder/hypergraph/AlignedSourceTokens.java
new file mode 100644
index 0000000..eb92f0d
--- /dev/null
+++ b/src/joshua/decoder/hypergraph/AlignedSourceTokens.java
@@ -0,0 +1,93 @@
+package joshua.decoder.hypergraph;
+
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+/**
+ * Class that represents a one to (possibly) many alignment from target to
+ * source. Extends from a LinkedList. Instances of this class are updated by the
+ * WordAlignmentExtractor.substitute() method. The <shiftBy> method shifts the
+ * elements in the list by a scalar to reflect substitutions of non terminals in
+ * the rule. if indexes are final, i.e. the point instance has been substituted
+ * into a parent WordAlignmentState once, <isFinal> is set to true. This is
+ * necessary since the final source index of a point is known once we have
+ * substituted in a complete WordAlignmentState into its parent. If the index in
+ * the list is a non terminal, <isNonTerminal> = true
+ */
+class AlignedSourceTokens extends LinkedList<Integer> {
+
+  private static final long serialVersionUID = 1L;
+  /** whether this Point refers to a non terminal in source&target */
+  private boolean isNonTerminal = false;
+  /** whether this instance does not need to be updated anymore */
+  private boolean isFinal = false;
+  /** whether the word this Point corresponds to has no alignment in source */
+  private boolean isNull = false;
+
+  AlignedSourceTokens() {
+  }
+
+  void setFinal() {
+    isFinal = true;
+  }
+
+  void setNonTerminal() {
+    isNonTerminal = true;
+  }
+
+  void setNull() {
+    isNull = true;
+  }
+
+  @Override
+  /**
+   * returns true if element was added.
+   */
+  public boolean add(Integer x) {
+    if (isNull || isNonTerminal)
+      return false;
+    return super.add(x);
+  }
+
+  public boolean isNonTerminal() {
+    return isNonTerminal;
+  }
+
+  public boolean isFinal() {
+    return isFinal;
+  }
+
+  public boolean isNull() {
+    return isNull;
+  }
+
+  /**
+   * shifts each item in the LinkedList by <shift>.
+   * Only applies to items larger than <start>
+   */
+  void shiftBy(int start, int shift) {
+    if (!isFinal && !isNull) {
+      ListIterator<Integer> it = this.listIterator();
+      while (it.hasNext()) {
+        int x = it.next();
+        if (x > start) {
+          it.set(x + shift);
+        }
+      }
+    }
+  }
+
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    if (isFinal)
+      sb.append("f");
+    if (isNull) {
+      sb.append("[NULL]");
+    } else {
+      sb.append(super.toString());
+    }
+    if (isNonTerminal)
+      sb.append("^");
+    return sb.toString();
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index 78cf6fe..46ab9ea 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -188,6 +188,12 @@ public class KBestExtractor {
       if (joshuaConfiguration.outputFormat.contains("%d")) {
         outputString = outputString.replace("%d", derivationState.getDerivation());
       }
+      
+      /* %a causes output of word level alignments between input and output hypothesis */
+      if (joshuaConfiguration.outputFormat.contains("%a")) {
+        outputString = outputString.replace("%a",  derivationState.getWordAlignment());
+      }
+      
     }
 
     return outputString;
@@ -581,6 +587,10 @@ public class KBestExtractor {
       bleu = 0.0f;
     }
 
+    public String getWordAlignment() {
+      return visit(new WordAlignmentExtractor()).toString();
+    }
+
     /**
      * Computes a scaled approximate BLEU from the accumulated statistics. We know the number of
      * words; to compute the effective reference length, we take the real reference length statistic

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/hypergraph/ViterbiExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/ViterbiExtractor.java b/src/joshua/decoder/hypergraph/ViterbiExtractor.java
index e19bb3f..95ec7c7 100644
--- a/src/joshua/decoder/hypergraph/ViterbiExtractor.java
+++ b/src/joshua/decoder/hypergraph/ViterbiExtractor.java
@@ -39,6 +39,36 @@ public class ViterbiExtractor {
     }
     return res.toString();
   }
+  
+  public static String extractViterbiAlignment(HGNode node) {
+    WordAlignmentState viterbiAlignment = buildViterbiAlignment(node);
+    return viterbiAlignment.toFinalString();
+  }
+  
+  // get one-best alignment for Viterbi string
+  public static WordAlignmentState buildViterbiAlignment(HGNode node) {
+    HyperEdge edge = node.bestHyperedge;
+    Rule rl = edge.getRule();  
+    if (rl == null) { // deductions under "goal item" does not have rule
+      if (edge.getTailNodes().size() != 1)
+        throw new RuntimeException("deduction under goal item have not equal one item");
+      return buildViterbiAlignment(edge.getTailNodes().get(0));
+    }
+    WordAlignmentState waState = new WordAlignmentState(rl, node.i);
+    if (edge.getTailNodes() != null) {
+      int[] english = rl.getEnglish();
+      for (int c = 0; c < english.length; c++) {
+        if (Vocabulary.nt(english[c])) {
+          // determines the index in the tail node array by
+          // the index of the nonterminal in the source [english[c] gives a negative
+          // int]
+          int index = -(english[c] + 1);
+          waState.substituteIn(buildViterbiAlignment(edge.getTailNodes().get(index)));
+        }
+      }
+    }
+    return waState;
+  }
 
   // ######## find 1best hypergraph#############
   public static HyperGraph getViterbiTreeHG(HyperGraph hg_in) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java b/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
new file mode 100644
index 0000000..63619ee
--- /dev/null
+++ b/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
@@ -0,0 +1,55 @@
+package joshua.decoder.hypergraph;
+
+import java.util.Stack;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.Decoder;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
+import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
+
+/**
+ * this class implements Joshua's Derivation Visitor interface.
+ * before() and after() methods are called at each visit of a rule in 
+ * the hypergraph.
+ * We place WordAlignmentStates on a stack and merge/substitute them into each
+ * other if possible. At the end, the remaining last state on the stack 
+ * should be complete (no NonTerminals to substitute anymore).
+ */
+public class WordAlignmentExtractor implements DerivationVisitor {
+
+  private Stack<WordAlignmentState> stack;
+
+  public WordAlignmentExtractor() {
+    stack = new Stack<WordAlignmentState>();
+  }
+
+  void merge(WordAlignmentState astate) {
+    // if alignment state has no NTs left AND stack is not empty
+    // AND parent object on stack still needs something to substitute
+    if (astate.isComplete() && stack.size() > 0 && !stack.peek().isComplete()) {
+      WordAlignmentState parentState = stack.pop();
+      parentState.substituteIn(astate);
+      merge(parentState);
+    } else {
+      stack.add(astate);
+    }
+  }
+
+  @Override
+  public void before(DerivationState state, int level) {
+    Rule rule = state.edge.getRule();
+    if (rule != null) {
+      merge(new WordAlignmentState(rule, state.parentNode.i));
+    }
+  }
+
+  @Override
+  public void after(DerivationState state, int level) {
+  }
+
+  public String toString() {
+    WordAlignmentState finalState = stack.pop();
+    return finalState.toFinalString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/src/joshua/decoder/hypergraph/WordAlignmentState.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WordAlignmentState.java b/src/joshua/decoder/hypergraph/WordAlignmentState.java
new file mode 100644
index 0000000..e3b9598
--- /dev/null
+++ b/src/joshua/decoder/hypergraph/WordAlignmentState.java
@@ -0,0 +1,154 @@
+package joshua.decoder.hypergraph;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+
+import joshua.decoder.ff.tm.Rule;
+
+/**
+ * This class encodes a derivation state in terms of a list of alignment points.
+ * Whenever a child instance is substituted into the parent instance, we need to
+ * adjust source indexes of the alignments.
+ * 
+ * @author fhieber
+ */
+public class WordAlignmentState {
+
+  /**
+   * each element in this list corresponds to a token on the target side of the
+   * rule. The values of the elements correspond to the aligned source token on
+   * the source side of the rule.
+   */
+  private LinkedList<AlignedSourceTokens> trgPoints;
+  private int srcStart;
+  /** number of NTs we need to substitute. */
+  private int numNT;
+  /** grows with substitutions of child rules. Reaches original Rule span if substitutions are complete */
+  private int srcLength;
+
+  /**
+   * construct AlignmentState object from a virgin Rule and its source span.
+   * Determines if state is complete (if no NT present)
+   */
+  WordAlignmentState(Rule rule, int start) {
+    trgPoints = new LinkedList<AlignedSourceTokens>();
+    srcLength = rule.getFrench().length;
+    numNT = rule.getArity();
+    srcStart = start;
+    Map<Integer, List<Integer>> alignmentMap = rule.getAlignmentMap();
+    int[] nonTermPositions = rule.getNonTerminalSourcePositions();
+    int[] trg = rule.getEnglish();
+    // for each target index, create a TargetAlignmentPoint
+    for (int trgIndex = 0; trgIndex < trg.length; trgIndex++) {
+      AlignedSourceTokens trgPoint = new AlignedSourceTokens();
+
+      if (trg[trgIndex] >= 0) { // this is a terminal symbol, check for alignment
+        if (alignmentMap.containsKey(trgIndex)) {
+          // add source indexes to TargetAlignmentPoint
+          for (int srcIdx : alignmentMap.get(trgIndex)) {
+            trgPoint.add(srcStart + srcIdx);
+          }
+        } else { // this target word is NULL-aligned
+          trgPoint.setNull();
+        }
+      } else { // this is a nonterminal ([X]) [actually its the (negative) index of the NT in the source
+        trgPoint.setNonTerminal();
+        trgPoint.add(srcStart + nonTermPositions[Math.abs(trg[trgIndex]) - 1]);
+      }
+      trgPoints.add(trgPoint);
+    }
+  }
+
+  /**
+   * if there are no more NonTerminals to substitute,
+   * this state is said to be complete
+   */
+  public boolean isComplete() {
+    return numNT == 0;
+  }
+
+  /**
+   * builds the final alignment string in the standard alignment format: src -
+   * trg. Sorted by trg indexes. Disregards the sentence markers.
+   */
+  public String toFinalString() {
+    StringBuilder sb = new StringBuilder();
+    int t = 0;
+    for (AlignedSourceTokens pt : trgPoints) {
+      for (int s : pt)
+        sb.append(String.format(" %d-%d", s-1, t-1)); // disregard sentence
+                                                      // markers
+      t++;
+    }
+    String result = sb.toString();
+    if (!result.isEmpty())
+      return result.substring(1);
+    return result;
+  }
+  
+  /**
+   * builds the final alignment list.
+   * each entry in the list corresponds to a list of aligned source tokens.
+   * First and last item in trgPoints is skipped.
+   */
+  public List<List<Integer>> toFinalList() {
+    assert (isComplete() == true);
+    List<List<Integer>> alignment = new ArrayList<List<Integer>> ();
+    if (trgPoints.isEmpty())
+      return alignment;
+    ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
+    it.next(); // skip first item (sentence marker)
+    while (it.hasNext()) {
+      AlignedSourceTokens alignedSourceTokens = it.next();
+      if (it.hasNext()) { // if not last element in trgPoints
+        List<Integer> newAlignedSourceTokens = new ArrayList<Integer>();
+        for (Integer sourceIndex : alignedSourceTokens)
+          newAlignedSourceTokens.add(sourceIndex - 1); // shift by one to disregard sentence marker
+        alignment.add(newAlignedSourceTokens);
+      }
+    }
+    return alignment;
+  }
+
+  /**
+   * String representation for debugging.
+   */
+  public String toString() {
+    return String.format("%s , len=%d start=%d, isComplete=%s",
+        trgPoints.toString(), srcLength, srcStart, this.isComplete());
+  }
+
+  /**
+   * substitutes a child WorldAlignmentState into this instance at the first
+   * NT it finds. Also shifts the indeces in this instance by the span/width of the
+   * child that is to be substituted.
+   * Substitution order is determined by the architecture of Joshua's hypergraph.
+   */
+  void substituteIn(WordAlignmentState child) {
+    // update existing indexes by length of child (has no effect on NULL and
+    // NonTerminal points)
+    for (AlignedSourceTokens trgPoint : trgPoints)
+      trgPoint.shiftBy(child.srcStart, child.srcLength - 1);
+
+    // now substitute in the child at first NT, modifying the list
+    ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
+    while (it.hasNext()) {
+      AlignedSourceTokens trgPoint = it.next();
+      if (trgPoint.isNonTerminal()) { // found first NT
+        it.remove(); // remove NT symbol
+        for (AlignedSourceTokens childElement : child.trgPoints) {
+          childElement.setFinal(); // child source indexes are final, do not change them anymore
+          it.add(childElement);
+        }
+        this.srcLength += child.srcLength - 1; // -1 (NT)
+        this.numNT--;
+        break;
+      }
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/tst/joshua/system/AlignmentMapTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/AlignmentMapTest.java b/tst/joshua/system/AlignmentMapTest.java
new file mode 100644
index 0000000..0eee8c8
--- /dev/null
+++ b/tst/joshua/system/AlignmentMapTest.java
@@ -0,0 +1,53 @@
+package joshua.system;
+
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.ff.tm.Rule;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class AlignmentMapTest {
+  
+  private Rule rule1 = null;
+  private Rule rule2 = null;
+  private static Map<Integer, List<Integer>> expectedAlignmentMap = null;
+  private static final int[] expectedNonTerminalPositions = {2,5};
+
+  @Before
+  public void setUp() throws Exception {
+    int[] sourceRhs = {Vocabulary.id("A1"),Vocabulary.id("A2"),-1,Vocabulary.id("B"),Vocabulary.id("C"),-2};
+    int[] targetRhs = {Vocabulary.id("c"),Vocabulary.id("b1"),-1,Vocabulary.id("b2"),-4,Vocabulary.id("a")};
+    int arity = 2; // 2 non terminals
+    String alignment = "0-5 1-5 3-1 3-3 4-0";
+    expectedAlignmentMap = new HashMap<Integer, List<Integer>>();
+    expectedAlignmentMap.put(0, Arrays.asList(4));
+    expectedAlignmentMap.put(5, Arrays.asList(0,1));
+    expectedAlignmentMap.put(1, Arrays.asList(3));
+    expectedAlignmentMap.put(3, Arrays.asList(3));
+    rule1 = new Rule(-1, sourceRhs, targetRhs, "", arity, alignment);
+    rule2 = new Rule(-1, sourceRhs, targetRhs, "", arity, null); // rule with no alignment
+  }
+
+  @Test
+  public void test() {
+    // test regular rule with arity 2
+    Map<Integer, List<Integer>> alignmentMap1 = rule1.getAlignmentMap();
+    assertEquals(expectedAlignmentMap, alignmentMap1);
+    int[] nonTerminalPositions1 = rule1.getNonTerminalSourcePositions();
+    assertArrayEquals(expectedNonTerminalPositions, nonTerminalPositions1);
+    
+    // test rule with no alignment
+    Map<Integer, List<Integer>> alignmentMap2 = rule2.getAlignmentMap();
+    assertTrue(alignmentMap2.isEmpty());
+    int[] nonTerminalPositions2 = rule2.getNonTerminalSourcePositions();
+    assertArrayEquals(expectedNonTerminalPositions, nonTerminalPositions2);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9501535d/tst/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/StructuredOutputTest.java b/tst/joshua/system/StructuredOutputTest.java
new file mode 100644
index 0000000..981f9d8
--- /dev/null
+++ b/tst/joshua/system/StructuredOutputTest.java
@@ -0,0 +1,103 @@
+package joshua.system;
+
+import java.util.Arrays;
+import java.util.List;
+
+import joshua.decoder.Decoder;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.Translation;
+import joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.Assert;
+
+/**
+ * Integration test for the complete Joshua decoder using a toy grammar that translates
+ * a bunch of capital letters to lowercase letters. Rules in the test grammar
+ * drop and generate additional words and simulate reordering of rules, so that
+ * proper extraction of word alignments can be tested.
+ * 
+ * @author fhieber
+ */
+public class StructuredOutputTest {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private Translation translation = null;
+  private static final String input = "A K B1 U Z1 Z2 B2 C";
+  private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
+  private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
+  private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
+      Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
+      Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
+      Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
+      Arrays.asList(), Arrays.asList(7));
+  private static final double expectedScore = -17.0;
+
+  @Before
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax pt 20 resources/wa_grammar");
+    joshuaConfig.tms.add("thrax glue -1 resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("feature_function = OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
+                                             // is not even used by the
+                                             // constructor/initialize)
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+    translation = null;
+  }
+
+  private Translation decode(String input) {
+    Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+  @Test
+  public void test() {
+
+    // test standard output
+    joshuaConfig.use_structured_output = false;
+    joshuaConfig.outputFormat = "%s | %a ";
+    translation = decode(input);
+    Assert.assertEquals(expectedTranslation + " | "
+        + expectedWordAlignmentString, translation.toString().trim());
+
+    // test structured output
+    joshuaConfig.use_structured_output = true; // set structured output creation to true
+    translation = decode(input);
+    Assert
+        .assertEquals(expectedTranslation, translation.getTranslationString());
+    Assert.assertEquals(Arrays.asList(expectedTranslation.split("\\s+")),
+        translation.getTranslationTokens());
+    Assert.assertEquals(expectedScore, translation.getTranslationScore(),
+        0.00001);
+    Assert.assertEquals(expectedWordAlignment, translation.getWordAlignment());
+    Assert.assertEquals(translation.getWordAlignment().size(), translation
+        .getTranslationTokens().size());
+
+  }
+
+}


[05/10] incubator-joshua git commit: kellens: Use Guava's memoize for expensive calls, removed unneeded members fhieber: Important bugfix for obtaining word alignments from packedRules in multi-threading environment

Posted by mj...@apache.org.
kellens: Use Guava's memoize for expensive calls, removed unneeded members
fhieber: Important bugfix for obtaining word alignments from packedRules in multi-threading environment


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/cabb52ca
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/cabb52ca
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/cabb52ca

Branch: refs/heads/master
Commit: cabb52cabd5a81088b21b9e01a4668ebb2a85ffa
Parents: 244e693
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Wed Oct 14 17:13:18 2015 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200

----------------------------------------------------------------------
 resources/wa_grammar.packed/config              |   1 +
 resources/wa_grammar.packed/encoding            | Bin 0 -> 154 bytes
 .../wa_grammar.packed/slice_00000.alignments    | Bin 0 -> 45 bytes
 .../wa_grammar.packed/slice_00000.features      | Bin 0 -> 47 bytes
 resources/wa_grammar.packed/slice_00000.source  | Bin 0 -> 204 bytes
 resources/wa_grammar.packed/slice_00000.target  | Bin 0 -> 128 bytes
 .../wa_grammar.packed/slice_00000.target.lookup | Bin 0 -> 32 bytes
 resources/wa_grammar.packed/vocabulary          | Bin 0 -> 238 bytes
 src/joshua/decoder/ff/tm/PhraseRule.java        |  21 ++--
 src/joshua/decoder/ff/tm/Rule.java              |  42 ++++---
 .../decoder/ff/tm/packed/PackedGrammar.java     | 121 +++++++++++++------
 .../system/MultithreadedTranslationTests.java   | 113 +++++++++++++++++
 12 files changed, 240 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/config
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/config b/resources/wa_grammar.packed/config
new file mode 100644
index 0000000..ebd1bf1
--- /dev/null
+++ b/resources/wa_grammar.packed/config
@@ -0,0 +1 @@
+max-source-len = 6

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/encoding
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/encoding b/resources/wa_grammar.packed/encoding
new file mode 100644
index 0000000..630f69f
Binary files /dev/null and b/resources/wa_grammar.packed/encoding differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.alignments
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.alignments b/resources/wa_grammar.packed/slice_00000.alignments
new file mode 100644
index 0000000..f1425eb
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.alignments differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.features b/resources/wa_grammar.packed/slice_00000.features
new file mode 100644
index 0000000..5a4c774
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.features differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.source b/resources/wa_grammar.packed/slice_00000.source
new file mode 100644
index 0000000..4607b89
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.source differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.target b/resources/wa_grammar.packed/slice_00000.target
new file mode 100644
index 0000000..fe11a38
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.target differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/slice_00000.target.lookup b/resources/wa_grammar.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..7d82179
Binary files /dev/null and b/resources/wa_grammar.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/resources/wa_grammar.packed/vocabulary
----------------------------------------------------------------------
diff --git a/resources/wa_grammar.packed/vocabulary b/resources/wa_grammar.packed/vocabulary
new file mode 100644
index 0000000..637651e
Binary files /dev/null and b/resources/wa_grammar.packed/vocabulary differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/PhraseRule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/PhraseRule.java b/src/joshua/decoder/ff/tm/PhraseRule.java
index e42a7ee..c178b31 100644
--- a/src/joshua/decoder/ff/tm/PhraseRule.java
+++ b/src/joshua/decoder/ff/tm/PhraseRule.java
@@ -1,5 +1,8 @@
 package joshua.decoder.ff.tm;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+
 /***
  * A class for reading in rules from a Moses phrase table. Most of the conversion work is done
  * in {@link joshua.decoder.ff.tm.format.PhraseFormatReader}. This includes prepending every
@@ -19,11 +22,20 @@ package joshua.decoder.ff.tm;
 public class PhraseRule extends Rule {
 
   private String mosesFeatureString = null;
+  private Supplier<byte[]> alignmentSupplier;
   
   public PhraseRule(int lhs, int[] french, int[] english, String sparse_features, int arity,
       String alignment) {
     super(lhs, french, english, null, arity, alignment);
     mosesFeatureString = sparse_features;
+    this.alignmentSupplier = Suppliers.memoize(() ->{
+        String[] tokens = getAlignmentString().split("[-\\s]+");
+        byte[] alignmentArray = new byte[tokens.length + 2];
+        alignmentArray[0] = alignmentArray[1] = 0;
+        for (int i = 0; i < tokens.length; i++)
+            alignmentArray[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
+        return alignmentArray;
+    });
   }
 
   /** 
@@ -49,13 +61,6 @@ public class PhraseRule extends Rule {
    */
   @Override
   public byte[] getAlignment() {
-    if (alignment == null) {
-      String[] tokens = getAlignmentString().split("[-\\s]+");
-      alignment = new byte[tokens.length + 2];
-      alignment[0] = alignment[1] = 0;
-      for (int i = 0; i < tokens.length; i++)
-        alignment[i + 2] = (byte) (Short.parseShort(tokens[i]) + 1);
-    }
-    return alignment;
+    return this.alignmentSupplier.get();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/Rule.java b/src/joshua/decoder/ff/tm/Rule.java
index 90a44d5..3d715ea 100644
--- a/src/joshua/decoder/ff/tm/Rule.java
+++ b/src/joshua/decoder/ff/tm/Rule.java
@@ -8,6 +8,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+
 import joshua.corpus.Vocabulary;
 import joshua.decoder.Decoder;
 import joshua.decoder.ff.FeatureFunction;
@@ -43,6 +46,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   protected FeatureVector features = null;
   protected String sparseFeatureString;
 
+  private final Supplier<byte[]> alignmentSupplier;
+
   /*
    * a feature function will be fired for this rule only if the owner of the rule matches the owner
    * of the feature function
@@ -63,7 +68,6 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
 
   // The alignment string, e.g., 0-0 0-1 1-1 2-1
   private String alignmentString;
-  protected byte[] alignment = null;
 
   /**
    * Constructs a new rule using the provided parameters. The owner and rule id for this rule are
@@ -86,16 +90,12 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
     this.arity = arity;
     this.owner = owner;
     this.english = targetRhs;
+    alignmentSupplier = initializeAlignmentSupplier();
   }
 
   // Sparse feature version
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
-    this.lhs = lhs;
-    this.pFrench = sourceRhs;
-    this.sparseFeatureString = sparseFeatures;
-    this.arity = arity;
-    this.owner = -1;
-    this.english = targetRhs;
+    this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
   }
 
   public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity, String alignment) {
@@ -105,6 +105,22 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   
   public Rule() {
     this.lhs = -1;
+    alignmentSupplier = initializeAlignmentSupplier();
+  }
+
+  private Supplier<byte[]> initializeAlignmentSupplier(){
+    Supplier<byte[]> result = Suppliers.memoize(() ->{
+      byte[] alignment = null;
+      String alignmentString = getAlignmentString();
+      if (alignmentString != null) {
+        String[] tokens = alignmentString.split("[-\\s]+");
+        alignment = new byte[tokens.length];
+        for (int i = 0; i < tokens.length; i++)
+          alignment[i] = (byte) Short.parseShort(tokens[i]);
+      }
+      return alignment;
+    });
+    return result;
   }
 
   // ===============================================================
@@ -351,23 +367,17 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
   public String getFeatureString() {
     return sparseFeatureString;
   }
-  
+
   /**
    * Returns an alignment as a sequence of integers. The integers at positions i and i+1 are paired,
    * with position i indexing the source and i+1 the target.
    */
   public byte[] getAlignment() {
-    if (alignment == null && getAlignmentString() != null) {
-      String[] tokens = getAlignmentString().split("[-\\s]+");
-      alignment = new byte[tokens.length];
-      for (int i = 0; i < tokens.length; i++)
-        alignment[i] = (byte) Short.parseShort(tokens[i]);
-    }
-    return alignment;
+    return this.alignmentSupplier.get();
   }
   
   public String getAlignmentString() {
-    return alignmentString;
+    return this.alignmentString;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index dc72a4b..2251f5a 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -79,6 +79,8 @@ import joshua.util.encoding.EncoderConfiguration;
 import joshua.util.encoding.FloatEncoder;
 import joshua.util.io.LineReader;
 
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 
@@ -790,32 +792,63 @@ public class PackedGrammar extends AbstractGrammar {
        *
        */
       public final class PackedPhrasePair extends PackedRule {
+
+        private final Supplier<int[]> englishSupplier;
+        private final Supplier<byte[]> alignmentSupplier;
+
         public PackedPhrasePair(int address) {
           super(address);
+          englishSupplier = initializeEnglishSupplier();
+          alignmentSupplier = initializeAlignmentSupplier();
         }
 
         @Override
         public int getArity() {
           return PackedTrie.this.getArity() + 1;
         }
-        
+
         /**
-         * Take the English phrase of the underlying rule and prepend an [X].
-         * 
-         * @return
+         * Initialize a number of suppliers which get evaluated when their respective getters
+         * are called.
+         * Inner lambda functions are guaranteed to only be called once, because of this underlying
+         * structures are accessed in a threadsafe way.
+         * Guava's implementation makes sure only one read of a volatile variable occurs per get.
+         * This means this implementation should be as thread-safe and performant as possible.
          */
-        @Override
-        public int[] getEnglish() {
-          if (tgt == null) {
+
+        private Supplier<int[]> initializeEnglishSupplier(){
+          Supplier<int[]> result = Suppliers.memoize(() ->{
             int[] phrase = getTarget(source[address + 1]);
-            tgt = new int[phrase.length + 1];
+            int[] tgt = new int[phrase.length + 1];
             tgt[0] = -1;
             for (int i = 0; i < phrase.length; i++)
               tgt[i+1] = phrase[i];
-          }
-          return tgt;
+            return tgt;
+          });
+          return result;
         }
 
+        private Supplier<byte[]> initializeAlignmentSupplier(){
+          Supplier<byte[]> result = Suppliers.memoize(() ->{
+            byte[] raw_alignment = getAlignmentArray(source[address + 2]);
+            byte[] points = new byte[raw_alignment.length + 2];
+            points[0] = points[1] = 0;
+            for (int i = 0; i < raw_alignment.length; i++)
+              points[i + 2] = (byte) (raw_alignment[i] + 1);
+            return points;
+          });
+          return result;
+        }
+
+        /**
+         * Take the English phrase of the underlying rule and prepend an [X].
+         * 
+         * @return
+         */
+        @Override
+        public int[] getEnglish() {
+          return this.englishSupplier.get();
+        }
         
         /**
          * Take the French phrase of the underlying rule and prepend an [X].
@@ -838,27 +871,51 @@ public class PackedGrammar extends AbstractGrammar {
          */
         @Override
         public byte[] getAlignment() {
-          // alignments is the underlying raw alignment data
-          if (alignments != null) {
-            byte[] a = getAlignmentArray(source[address + 2]);
-            byte[] points = new byte[a.length + 2];
-            points[0] = points[1] = 0;
-            for (int i = 0; i < a.length; i++)
-              points[i + 2] = (byte) (a[i] + 1);
-            return points;
+          // if no alignments in grammar do not fail
+          if (alignments == null) {
+            return null;
           }
-          return null;
+
+          return this.alignmentSupplier.get();
         }
       }
 
       public class PackedRule extends Rule {
         protected final int address;
-
-        protected int[] tgt = null;
-        private FeatureVector features = null;
+        private final Supplier<int[]> englishSupplier;
+        private final Supplier<FeatureVector> featureVectorSupplier;
+        private final Supplier<byte[]> alignmentsSupplier;
 
         public PackedRule(int address) {
           this.address = address;
+          this.englishSupplier = intializeEnglishSupplier();
+          this.featureVectorSupplier = initializeFeatureVectorSupplier();
+          this.alignmentsSupplier = initializeAlignmentsSupplier();
+        }
+
+        private Supplier<int[]> intializeEnglishSupplier(){
+          Supplier<int[]> result = Suppliers.memoize(() ->{
+            return getTarget(source[address + 1]);
+          });
+          return result;
+        }
+
+        private Supplier<FeatureVector> initializeFeatureVectorSupplier(){
+          Supplier<FeatureVector> result = Suppliers.memoize(() ->{
+            return new FeatureVector(getFeatures(source[address + 2]), "tm_" + Vocabulary.word(owner) + "_");
+          });
+          return result;
+        }
+
+        private Supplier<byte[]> initializeAlignmentsSupplier(){
+          Supplier<byte[]> result = Suppliers.memoize(()->{
+            // if no alignments in grammar do not fail
+            if (alignments == null){
+              return null;
+            }
+            return getAlignmentArray(source[address + 2]);
+          });
+          return result;
         }
 
         @Override
@@ -894,10 +951,7 @@ public class PackedGrammar extends AbstractGrammar {
 
         @Override
         public int[] getEnglish() {
-          if (tgt == null) {
-            tgt = getTarget(source[address + 1]);
-          }
-          return tgt;
+          return this.englishSupplier.get();
         }
 
         @Override
@@ -911,18 +965,17 @@ public class PackedGrammar extends AbstractGrammar {
 
         @Override
         public FeatureVector getFeatureVector() {
-          if (features == null) {
-            features = new FeatureVector(getFeatures(source[address + 2]), "tm_" + Vocabulary.word(owner) + "_");
-          }
-
-          return features;
+          return this.featureVectorSupplier.get();
         }
         
         @Override
         public byte[] getAlignment() {
-          if (alignments != null)
-            return getAlignmentArray(source[address + 2]);
-          return null;
+          return this.alignmentsSupplier.get();
+        }
+        
+        @Override
+        public String getAlignmentString() {
+            throw new RuntimeException("AlignmentString not implemented for PackedRule!");
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cabb52ca/tst/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/tst/joshua/system/MultithreadedTranslationTests.java b/tst/joshua/system/MultithreadedTranslationTests.java
new file mode 100644
index 0000000..4ff549c
--- /dev/null
+++ b/tst/joshua/system/MultithreadedTranslationTests.java
@@ -0,0 +1,113 @@
+package joshua.system;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.Decoder;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.Translation;
+import joshua.decoder.Translations;
+import joshua.decoder.io.TranslationRequest;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Integration test for multithreaded Joshua decoder tests. Grammar used is a
+ * toy packed grammar.
+ *
+ * @author kellens
+ */
+public class MultithreadedTranslationTests {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+
+  @Before
+  public void setUp() throws Exception {
+    Vocabulary.clear();
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar.packed");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("feature_function = OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
+                                              // decoders to run at once.
+                                              // Useful to help flush out
+                                              // concurrency errors in
+                                              // underlying
+                                              // data-structures.
+    this.decoder = new Decoder(joshuaConfig, ""); // Second argument
+                                                  // (configFile)
+                                                  // is not even used by the
+                                                  // constructor/initialize.
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    Vocabulary.clear();
+    this.decoder.cleanUp();
+    this.decoder = null;
+  }
+
+
+
+  // This test was created specifically to reproduce a multithreaded issue
+  // related to mapped byte array access in the PackedGrammer getAlignmentArray
+  // function.
+
+  // We'll test the decoding engine using N = 10,000 identical inputs. This
+  // should be sufficient to induce concurrent data access for many shared
+  // data structures.
+
+  @Test
+  public void givenPackedGrammar_whenNTranslationsCalledConcurrently_thenReturnNResults() {
+    // GIVEN
+
+    int inputLines = 10000;
+    joshuaConfig.construct_structured_output = true; // Enabled alignments.
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < inputLines; i++) {
+      sb.append(INPUT + "\n");
+    }
+
+    // Append a large string together to simulate N requests to the decoding
+    // engine.
+    TranslationRequest req = new TranslationRequest(new ByteArrayInputStream(sb.toString()
+        .getBytes(Charset.forName("UTF-8"))), joshuaConfig);
+
+    // WHEN
+    // Translate all spans in parallel.
+    Translations translations = this.decoder.decodeAll(req);
+    ArrayList<Translation> translationResults = new ArrayList<Translation>();
+
+    Translation t;
+    while ((t = translations.next()) != null) {
+      translationResults.add(t);
+    }
+
+    // THEN
+    assertTrue(translationResults.size() == inputLines);
+  }
+}