You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:52:10 UTC

[75/94] [abbrv] incubator-joshua git commit: Merge remote-tracking branch 'origin/master' into JOSHUA-252

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 12891ee,0000000..794ecd5
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@@ -1,77 -1,0 +1,75 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.decoder.phrase.decode;
 +
++import static com.google.common.base.Charsets.UTF_8;
++import static java.nio.file.Files.readAllBytes;
++import static org.junit.Assert.assertEquals;
++
 +import java.io.IOException;
 +import java.nio.file.Path;
 +import java.nio.file.Paths;
 +
- import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +import org.junit.After;
 +import org.junit.Before;
 +import org.junit.Test;
 +
- import static com.google.common.base.Charsets.UTF_8;
- import static java.nio.file.Files.readAllBytes;
- import static org.apache.joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
- import static org.junit.Assert.assertEquals;
- 
 +/**
 + * Reimplements the constrained phrase decoding test
 + */
 +public class PhraseDecodingTest {
 +  
 +  private static final String CONFIG = "resources/phrase_decoder/config";
 +  private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
 +  private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
 +  
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  
 +  @Before
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(CONFIG);
 +    decoder = new Decoder(joshuaConfig, "");
 +  }
 +  
 +  @After
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +  }
 +  
 +  @Test
 +  public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
 +    final String translation = decode(INPUT).toString();
 +    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
 +    assertEquals(gold, translation);
 +  }
 +  
 +  private Translation decode(String input) {
 +    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/system/KenLmTest.java
index 6c05a58,0000000..d61e303
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@@ -1,95 -1,0 +1,93 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.system;
 +
 +import static org.apache.joshua.corpus.Vocabulary.registerLanguageModel;
 +import static org.apache.joshua.corpus.Vocabulary.unregisterLanguageModels;
 +import static org.junit.Assert.*;
 +import org.apache.joshua.corpus.Vocabulary;
- import org.apache.joshua.decoder.Decoder;
- import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.ff.lm.KenLM;
 +
 +import org.junit.After;
 +import org.junit.Before;
 +import org.junit.Test;
 +
 +/**
 + * KenLM JNI interface tests.
 + * Loads libken.{so,dylib}.
 + * If run in Eclipse, add -Djava.library.path=build/lib to JVM arguments
 + * of the run configuration.
 + */
 +public class KenLmTest {
 +
 +  private static final String LANGUAGE_MODEL_PATH = "resources/kenlm/oilers.kenlm";
 +
 +  @Test
 +  public void givenKenLm_whenQueryingForNgramProbability_thenProbIsCorrect() {
 +    // GIVEN
 +    KenLM kenLm = new KenLM(3, LANGUAGE_MODEL_PATH);
 +    int[] words = Vocabulary.addAll("Wayne Gretzky");
 +    registerLanguageModel(kenLm);
 +
 +    // WHEN
 +    float probability = kenLm.prob(words);
 +
 +    // THEN
 +    assertEquals("Found the wrong probability for 2-gram \"Wayne Gretzky\"", -0.99f, probability,
 +        Float.MIN_VALUE);
 +  }
 +  
 +  @Test
 +  public void givenKenLm_whenQueryingForNgramProbability_thenIdAndStringMethodsReturnTheSame() {
 +    // GIVEN
 +    KenLM kenLm = new KenLM(LANGUAGE_MODEL_PATH);
 +    registerLanguageModel(kenLm);
 +    String sentence = "Wayne Gretzky";
 +    String[] words = sentence.split("\\s+");
 +    int[] ids = Vocabulary.addAll(sentence);
 +
 +    // WHEN
 +    float prob_string = kenLm.prob(words);
 +    float prob_id = kenLm.prob(ids);
 +
 +    // THEN
 +    assertEquals("ngram probabilities differ for word and id based n-gram query", prob_string, prob_id,
 +            Float.MIN_VALUE);
 +
 +  }
 +
 +  @Test
 +  public void givenKenLm_whenIsKnownWord_thenReturnValuesAreCorrect() {
 +    KenLM kenLm = new KenLM(LANGUAGE_MODEL_PATH);
 +    assertTrue(kenLm.isKnownWord("Wayne"));
 +    assertFalse(kenLm.isKnownWord("Wayne2222"));
 +  }
 +
 +  @Before
 +  public void setUp() throws Exception {
 +    Vocabulary.clear();
 +    unregisterLanguageModels();
 +  }
 +
 +  @After
 +  public void tearDown() throws Exception {
 +    Vocabulary.clear();
 +    unregisterLanguageModels();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
index 194be6f,0000000..f006363
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
+++ b/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
@@@ -1,164 -1,0 +1,164 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.system;
 +
 +import static org.junit.Assert.assertTrue;
 +
 +import java.io.BufferedReader;
 +import java.io.ByteArrayInputStream;
 +import java.io.ByteArrayOutputStream;
 +import java.io.IOException;
 +import java.io.InputStreamReader;
 +import java.nio.charset.Charset;
 +import java.util.ArrayList;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.MetaDataException;
 +import org.apache.joshua.decoder.io.TranslationRequestStream;
 +import org.apache.joshua.decoder.segment_file.Sentence;
++
 +import org.junit.After;
 +import org.junit.Before;
 +import org.junit.Test;
 +
 +/**
 + * Integration test for multithreaded Joshua decoder tests. Grammar used is a
 + * toy packed grammar.
 + *
 + * @author kellens
 + */
 +public class MultithreadedTranslationTests {
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
 +  private int previousLogLevel;
 +  private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000;
 +
 +  @Before
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.search_algorithm = "cky";
 +    joshuaConfig.mark_oovs = false;
 +    joshuaConfig.pop_limit = 100;
 +    joshuaConfig.use_unique_nbest = false;
 +    joshuaConfig.include_align_index = false;
 +    joshuaConfig.topN = 0;
 +    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar.packed");
 +    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
 +    joshuaConfig.goal_symbol = "[GOAL]";
 +    joshuaConfig.default_non_terminal = "[X]";
 +    joshuaConfig.features.add("feature_function = OOVPenalty");
 +    joshuaConfig.weights.add("tm_pt_0 1");
 +    joshuaConfig.weights.add("tm_pt_1 1");
 +    joshuaConfig.weights.add("tm_pt_2 1");
 +    joshuaConfig.weights.add("tm_pt_3 1");
 +    joshuaConfig.weights.add("tm_pt_4 1");
 +    joshuaConfig.weights.add("tm_pt_5 1");
 +    joshuaConfig.weights.add("tm_glue_0 1");
 +    joshuaConfig.weights.add("OOVPenalty 2");
 +    joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
 +                                              // decoders to run at once.
 +                                              // Useful to help flush out
 +                                              // concurrency errors in
 +                                              // underlying
 +                                              // data-structures.
 +    this.decoder = new Decoder(joshuaConfig, ""); // Second argument
 +                                                  // (configFile)
 +                                                  // is not even used by the
 +                                                  // constructor/initialize.
 +
 +    previousLogLevel = Decoder.VERBOSE;
 +    Decoder.VERBOSE = 0;
 +  }
 +
 +  @After
 +  public void tearDown() throws Exception {
 +    this.decoder.cleanUp();
 +    this.decoder = null;
 +    Decoder.VERBOSE = previousLogLevel;
 +  }
 +
 +
 +
 +  // This test was created specifically to reproduce a multithreaded issue
 +  // related to mapped byte array access in the PackedGrammer getAlignmentArray
 +  // function.
 +
 +  // We'll test the decoding engine using N = 10,000 identical inputs. This
 +  // should be sufficient to induce concurrent data access for many shared
 +  // data structures.
 +
 +  @Test
 +  public void givenPackedGrammar_whenNTranslationsCalledConcurrently_thenReturnNResults() {
 +    // GIVEN
 +
 +    int inputLines = 10000;
-     //joshuaConfig.construct_structured_output = true; // Enabled alignments.
++    joshuaConfig.use_structured_output = true; // Enabled alignments.
 +    StringBuilder sb = new StringBuilder();
 +    for (int i = 0; i < inputLines; i++) {
 +      sb.append(INPUT + "\n");
 +    }
 +
 +    // Append a large string together to simulate N requests to the decoding
 +    // engine.
 +    TranslationRequestStream req = new TranslationRequestStream(
 +        new BufferedReader(new InputStreamReader(new ByteArrayInputStream(sb.toString()
 +        .getBytes(Charset.forName("UTF-8"))))), joshuaConfig);
 +    
 +    ByteArrayOutputStream output = new ByteArrayOutputStream();
 +
- 
 +    // WHEN
 +    // Translate all spans in parallel.
 +    try {
 +      this.decoder.decodeAll(req, output);
 +    } catch (IOException e) {
 +      // TODO Auto-generated catch block
 +      e.printStackTrace();
 +    }
 +    ArrayList<Sentence> translationResults = new ArrayList<Sentence>();
 +
 +
 +    final long translationStartTime = System.nanoTime();
 +    Sentence t;
 +    try {
 +      while ((t = req.next()) != null) {
 +        translationResults.add(t);
 +      }
 +    } catch (MetaDataException e) {
 +      e.printStackTrace();
 +    } finally {
 +      if (output != null) {
 +        try {
 +          output.close();
 +        } catch (IOException e) {
 +          e.printStackTrace();
 +        }
 +      }
 +    }
 +
 +    final long translationEndTime = System.nanoTime();
 +    final double pipelineLoadDurationInSeconds = (translationEndTime - translationStartTime) / ((double)NANO_SECONDS_PER_SECOND);
 +    System.err.println(String.format("%.2f seconds", pipelineLoadDurationInSeconds));
 +
 +    // THEN
 +    assertTrue(translationResults.size() == inputLines);
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/system/StructuredOutputTest.java
index 99d89f9,0000000..b8a2496
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@@ -1,122 -1,0 +1,118 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.system;
 +
 +import java.util.Arrays;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +import org.junit.After;
 +import org.junit.Before;
 +import org.junit.Test;
 +import org.junit.Assert;
 +
 +/**
 + * Integration test for the complete Joshua decoder using a toy grammar that translates
 + * a bunch of capital letters to lowercase letters. Rules in the test grammar
 + * drop and generate additional words and simulate reordering of rules, so that
 + * proper extraction of word alignments can be tested.
 + * 
 + * @author fhieber
 + */
 +public class StructuredOutputTest {
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  private Translation translation = null;
 +  private static final String input = "A K B1 U Z1 Z2 B2 C";
 +  private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
 +  private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
 +  private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
 +      Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
 +      Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
 +      Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
 +      Arrays.asList(), Arrays.asList(7));
 +  private static final double expectedScore = -17.0;
 +
 +  @Before
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.search_algorithm = "cky";
 +    joshuaConfig.mark_oovs = false;
 +    joshuaConfig.pop_limit = 100;
 +    joshuaConfig.use_unique_nbest = false;
 +    joshuaConfig.include_align_index = false;
 +    joshuaConfig.topN = 0;
-     joshuaConfig.tms.add("thrax pt 20 resources/wa_grammar");
-     joshuaConfig.tms.add("thrax glue -1 resources/grammar.glue");
++    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
++    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
 +    joshuaConfig.goal_symbol = "[GOAL]";
 +    joshuaConfig.default_non_terminal = "[X]";
 +    joshuaConfig.features.add("feature_function = OOVPenalty");
 +    joshuaConfig.weights.add("tm_pt_0 1");
 +    joshuaConfig.weights.add("tm_pt_1 1");
 +    joshuaConfig.weights.add("tm_pt_2 1");
 +    joshuaConfig.weights.add("tm_pt_3 1");
 +    joshuaConfig.weights.add("tm_pt_4 1");
 +    joshuaConfig.weights.add("tm_pt_5 1");
 +    joshuaConfig.weights.add("tm_glue_0 1");
 +    joshuaConfig.weights.add("OOVPenalty 2");
 +    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
 +                                             // is not even used by the
 +                                             // constructor/initialize)
 +  }
 +
 +  @After
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +    translation = null;
 +  }
 +
 +  private Translation decode(String input) {
 +    Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +
 +  @Test
 +  public void test() {
 +
 +    // test standard output
 +    joshuaConfig.use_structured_output = false;
 +    joshuaConfig.outputFormat = "%s | %a ";
 +    translation = decode(input);
 +    Assert.assertEquals(expectedTranslation + " | "
 +        + expectedWordAlignmentString, translation.toString().trim());
 +
 +    // test structured output
 +    joshuaConfig.use_structured_output = true; // set structured output creation to true
 +    translation = decode(input);
-     Assert
-         .assertEquals(expectedTranslation, translation.getStructuredTranslation().getTranslationString());
++    Assert.assertEquals(expectedTranslation, translation.getStructuredTranslations().get(0).getTranslationString());
 +    Assert.assertEquals(Arrays.asList(expectedTranslation.split("\\s+")),
-         translation.getStructuredTranslation().getTranslationTokens());
-     Assert.assertEquals(expectedScore, translation.getStructuredTranslation().getTranslationScore(),
++        translation.getStructuredTranslations().get(0).getTranslationTokens());
++    Assert.assertEquals(expectedScore, translation.getStructuredTranslations().get(0).getTranslationScore(),
 +        0.00001);
-     Assert.assertEquals(expectedWordAlignment, translation.getStructuredTranslation()
-         .getTranslationWordAlignments().get(0));
-     Assert.assertEquals(translation.getStructuredTranslation().getTranslationWordAlignments().size(), translation.
-         getStructuredTranslation().getTranslationTokens().size());
- 
++    Assert.assertEquals(expectedWordAlignment, translation.getStructuredTranslations().get(0).getTranslationWordAlignments());
++    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
++        .getStructuredTranslations().get(0).getTranslationTokens().size());
 +  }
- 
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
index 1cab690,0000000..a78a4a1
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
@@@ -1,217 -1,0 +1,272 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.system;
 +
 +import static java.util.Arrays.asList;
- import static org.apache.joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
 +import static org.junit.Assert.assertEquals;
 +import static org.junit.Assert.assertTrue;
 +
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +
- import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.StructuredTranslation;
 +import org.apache.joshua.decoder.Translation;
- import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +
 +import org.junit.After;
 +import org.junit.Before;
 +import org.junit.Test;
 +
 +/**
 + * Integration test for the complete Joshua decoder using a toy grammar that translates
 + * a bunch of capital letters to lowercase letters. Rules in the test grammar
 + * drop and generate additional words and simulate reordering of rules, so that
 + * proper extraction of word alignments and other information from the decoder
 + * can be tested.
 + * 
 + * @author fhieber
 + */
 +public class StructuredTranslationTest {
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
 +  private static final String EXPECTED_TRANSLATION = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
 +  private static final List<String> EXPECTED_TRANSLATED_TOKENS = asList(EXPECTED_TRANSLATION.split("\\s+"));
 +  private static final String EXPECTED_WORD_ALIGNMENT_STRING = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
 +  private static final List<List<Integer>> EXPECTED_WORD_ALIGNMENT = asList(
 +      asList(0), asList(2, 6), asList(), asList(3),
 +      asList(4, 5), asList(7), asList(1),
 +      asList(1), asList(1), asList(), asList(),
 +      asList(), asList(7));
 +  private static final double EXPECTED_SCORE = -17.0;
 +  private static final Map<String,Float> EXPECTED_FEATURES = new HashMap<>();
++  private static final int EXPECTED_NBEST_LIST_SIZE = 8;
 +  static {
 +    EXPECTED_FEATURES.put("tm_glue_0", 1.0f);
 +    EXPECTED_FEATURES.put("tm_pt_0", -3.0f);
 +    EXPECTED_FEATURES.put("tm_pt_1", -3.0f);
 +    EXPECTED_FEATURES.put("tm_pt_2", -3.0f);
 +    EXPECTED_FEATURES.put("tm_pt_3", -3.0f);
 +    EXPECTED_FEATURES.put("tm_pt_4", -3.0f);
 +    EXPECTED_FEATURES.put("tm_pt_5", -3.0f);
 +    EXPECTED_FEATURES.put("OOV", 7.0f);
 +  }
 +
 +  @Before
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.search_algorithm = "cky";
 +    joshuaConfig.mark_oovs = false;
 +    joshuaConfig.pop_limit = 100;
 +    joshuaConfig.use_unique_nbest = false;
 +    joshuaConfig.include_align_index = false;
 +    joshuaConfig.topN = 0;
 +    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
 +    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
 +    joshuaConfig.goal_symbol = "[GOAL]";
 +    joshuaConfig.default_non_terminal = "[X]";
 +    joshuaConfig.features.add("feature_function = OOVPenalty");
 +    joshuaConfig.weights.add("tm_pt_0 1");
 +    joshuaConfig.weights.add("tm_pt_1 1");
 +    joshuaConfig.weights.add("tm_pt_2 1");
 +    joshuaConfig.weights.add("tm_pt_3 1");
 +    joshuaConfig.weights.add("tm_pt_4 1");
 +    joshuaConfig.weights.add("tm_pt_5 1");
 +    joshuaConfig.weights.add("tm_glue_0 1");
 +    joshuaConfig.weights.add("OOVPenalty 1");
 +    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
 +                                             // is not even used by the
 +                                             // constructor/initialize)
 +  }
 +
 +  @After
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +  }
 +
 +  private Translation decode(String input) {
 +    Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +  
 +  @Test
 +  public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = false;
++    joshuaConfig.use_structured_output = false;
 +    joshuaConfig.outputFormat = "%s | %a ";
 +    
 +    // WHEN
 +    final String translation = decode(INPUT).toString().trim();
 +    
 +    // THEN
 +    assertEquals(EXPECTED_TRANSLATION + " | " + EXPECTED_WORD_ALIGNMENT_STRING, translation);
 +  }
 +  
 +  @Test
 +  public void givenInput_whenRegularOutputFormatWithTopN1_thenExpectedOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = false;
++    joshuaConfig.use_structured_output = false;
 +    joshuaConfig.outputFormat = "%s | %e | %a | %c";
 +    joshuaConfig.topN = 1;
 +    
 +    // WHEN
 +    final String translation = decode(INPUT).toString().trim();
 +    
 +    // THEN
 +    assertEquals(EXPECTED_TRANSLATION + " | " + INPUT + " | " + EXPECTED_WORD_ALIGNMENT_STRING + String.format(" | %.3f", EXPECTED_SCORE),
 +        translation);
 +  }
 +
 +  @Test
-   public void givenInput_whenStructuredOutputFormat_thenExpectedOutput() {
++  public void givenInput_whenStructuredOutputFormatWithTopN0_thenExpectedOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = true;
++    joshuaConfig.use_structured_output = true;
++    joshuaConfig.topN = 0;
++    
++    // WHEN
++    final Translation translation = decode(INPUT);
++    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
++    final String translationString = structuredTranslation.getTranslationString();
++    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
++    final float translationScore = structuredTranslation.getTranslationScore();
++    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
++    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
++    
++    // THEN
++    assertTrue(translation.getStructuredTranslations().size() == 1);
++    assertEquals(EXPECTED_TRANSLATION, translationString);
++    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
++    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
++    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
++    assertEquals(wordAlignment.size(), translatedTokens.size());
++    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
++  }
++  
++  @Test
++  public void givenInput_whenStructuredOutputFormatWithTopN1_thenExpectedOutput() {
++    // GIVEN
++    joshuaConfig.use_structured_output = true;
++    joshuaConfig.topN = 1;
 +    
 +    // WHEN
-     final StructuredTranslation translation = decode(INPUT).getStructuredTranslation();
-     final String translationString = translation.getTranslationString();
-     final List<String> translatedTokens = translation.getTranslationTokens();
-     final float translationScore = translation.getTranslationScore();
-     final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
-     final Map<String,Float> translationFeatures = translation.getTranslationFeatures();
++    final Translation translation = decode(INPUT);
++    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
++    final StructuredTranslation structuredTranslation = structuredTranslations.get(0);
++    final String translationString = structuredTranslation.getTranslationString();
++    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
++    final float translationScore = structuredTranslation.getTranslationScore();
++    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
++    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
 +    
 +    // THEN
++    assertTrue(structuredTranslations.size() == 1);
 +    assertEquals(EXPECTED_TRANSLATION, translationString);
 +    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
 +    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
 +    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
 +    assertEquals(wordAlignment.size(), translatedTokens.size());
 +    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
 +  }
 +  
 +  @Test
++  public void givenInput_whenStructuredOutputFormatWithKBest_thenExpectedOutput() {
++    // GIVEN
++    joshuaConfig.use_structured_output = true;
++    joshuaConfig.topN = 100;
++    
++    // WHEN
++    final Translation translation = decode(INPUT);
++    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
++    final StructuredTranslation viterbiTranslation = structuredTranslations.get(0);
++    final StructuredTranslation lastKBest = structuredTranslations.get(structuredTranslations.size() - 1);
++    
++    // THEN
++    assertEquals(structuredTranslations.size(), EXPECTED_NBEST_LIST_SIZE);
++    assertTrue(structuredTranslations.size() > 1);
++    assertEquals(EXPECTED_TRANSLATION, viterbiTranslation.getTranslationString());
++    assertEquals(EXPECTED_TRANSLATED_TOKENS, viterbiTranslation.getTranslationTokens());
++    assertEquals(EXPECTED_SCORE, viterbiTranslation.getTranslationScore(), 0.00001);
++    assertEquals(EXPECTED_WORD_ALIGNMENT, viterbiTranslation.getTranslationWordAlignments());
++    assertEquals(EXPECTED_FEATURES.entrySet(), viterbiTranslation.getTranslationFeatures().entrySet());
++    // last entry in KBEST is all input words untranslated, should have 8 OOVs.
++    assertEquals(INPUT, lastKBest.getTranslationString());
++    assertEquals(-800.0, lastKBest.getTranslationFeatures().get("OOVPenalty"), 0.0001);
++    
++  }
++  
++  @Test
 +  public void givenEmptyInput_whenStructuredOutputFormat_thenEmptyOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = true;
++    joshuaConfig.use_structured_output = true;
 +    
 +    // WHEN
-     final StructuredTranslation translation = decode("").getStructuredTranslation();
-     final String translationString = translation.getTranslationString();
-     final List<String> translatedTokens = translation.getTranslationTokens();
-     final float translationScore = translation.getTranslationScore();
-     final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
++    final Translation translation = decode("");
++    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
++    final String translationString = structuredTranslation.getTranslationString();
++    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
++    final float translationScore = structuredTranslation.getTranslationScore();
++    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
 +    
 +    // THEN
 +    assertEquals("", translationString);
 +    assertTrue(translatedTokens.isEmpty());
 +    assertEquals(0, translationScore, 0.00001);
 +    assertTrue(wordAlignment.isEmpty());
 +  }
 +  
 +  @Test
 +  public void givenOOVInput_whenStructuredOutputFormat_thenOOVOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = true;
++    joshuaConfig.use_structured_output = true;
 +    final String input = "gabarbl";
 +    
 +    // WHEN
-     final StructuredTranslation translation = decode(input).getStructuredTranslation();
-     final String translationString = translation.getTranslationString();
-     final List<String> translatedTokens = translation.getTranslationTokens();
-     final float translationScore = translation.getTranslationScore();
-     final List<List<Integer>> wordAlignment = translation.getTranslationWordAlignments();
++    final Translation translation = decode(input);
++    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
++    final String translationString = structuredTranslation.getTranslationString();
++    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
++    final float translationScore = structuredTranslation.getTranslationScore();
++    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
 +    
 +    // THEN
 +    assertEquals(input, translationString);
 +    assertTrue(translatedTokens.contains(input));
 +    assertEquals(-99.0, translationScore, 0.00001);
 +    assertTrue(wordAlignment.contains(asList(0)));
 +  }
 +  
 +  @Test
 +  public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
 +    // GIVEN
-     //joshuaConfig.construct_structured_output = false;
++    joshuaConfig.use_structured_output = false;
 +    
 +    // WHEN
 +    final Translation translation = decode("");
 +    final String translationString = translation.toString();
 +    
 +    // THEN
 +    assertEquals("\n", translationString);
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/util/FormatUtilsTest.java
index 51f22c6,0000000..84b418b
mode 100644,000000..100644
--- a/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
+++ b/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
@@@ -1,80 -1,0 +1,74 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.util;
 +
 +import static org.apache.joshua.util.FormatUtils.cleanNonTerminal;
 +import static org.apache.joshua.util.FormatUtils.escapeSpecialSymbols;
 +import static org.apache.joshua.util.FormatUtils.isNonterminal;
 +import static org.apache.joshua.util.FormatUtils.ensureNonTerminalBrackets;
 +import static org.apache.joshua.util.FormatUtils.stripNonTerminalIndex;
 +import static org.apache.joshua.util.FormatUtils.unescapeSpecialSymbols;
 +import static org.junit.Assert.*;
 +
 +import org.junit.Test;
 +
 +public class FormatUtilsTest {
 +  
 +  @Test
 +  public void givenTokens_whenIsNonTerminal_thenTokensCorrectlyClassified() {
 +    assertTrue(isNonterminal("[X]"));
 +    assertTrue(isNonterminal("[X,1]"));
 +    assertFalse(isNonterminal("[]"));
 +    assertFalse(isNonterminal("[X)"));
 +  }
 +  
 +  @Test
 +  public void givenTokens_whenCleanNonTerminal_thenCorrectlyCleaned() {
 +    assertEquals(cleanNonTerminal("[GOAL]"), "GOAL");
 +    assertEquals(cleanNonTerminal("[X]"), "X");
 +    assertEquals(cleanNonTerminal("[X,1]"), "X");
 +    assertEquals(cleanNonTerminal("bla"), "bla");
 +    assertEquals(cleanNonTerminal("[bla"), "[bla");
 +  }
 +  
 +  @Test
 +  public void givenTokens_whenStripNonTerminalIndex_thenCorrectlyStripped() {
 +    assertEquals(stripNonTerminalIndex("[X,1]"), "[X]");
 +    assertEquals(stripNonTerminalIndex("[X,114]"), "[X]");
 +    assertEquals(stripNonTerminalIndex("[X,]"), "[X]");
 +    assertEquals(stripNonTerminalIndex("[X]"), "[X]");
 +    assertEquals(stripNonTerminalIndex("[X"), "[[X]");
 +  }
 +  
 +  @Test
 +  public void givenTokens_whenMarkup_thenCorrectMarkup() {
 +    assertEquals(ensureNonTerminalBrackets("X"), "[X]");
-     /*
-     assertEquals(markup("X", 1), "[X,1]");
-     assertEquals(markup("X", 15), "[X,15]");
-     assertEquals(markup("[X]", 1), "[X,1]");
-     assertEquals(markup("[X,1]", 4), "[X,4]");
-     */
 +  }
 +  
 +  @Test
 +  public void givenSpecialSymbols_whenEscapeSpecialSymbols_thenCorrectlyEscaped() {
 +    assertEquals(escapeSpecialSymbols("[ ] | ["), "-lsb- -rsb- -pipe- -lsb-");
 +  }
 +  
 +  @Test
 +  public void givenEscapedSpecialSymbols_whenUnEscapeSpecialSymbols_thenCorrectlyUnEscaped() {
 +    assertEquals(unescapeSpecialSymbols("-lsb- -rsb- -pipe- -lsb-"), "[ ] | [");
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/resources/packed-grammar/test.sh
----------------------------------------------------------------------
diff --cc src/test/resources/packed-grammar/test.sh
index 9842f83,0000000..db58484
mode 100755,000000..100755
--- a/src/test/resources/packed-grammar/test.sh
+++ b/src/test/resources/packed-grammar/test.sh
@@@ -1,38 -1,0 +1,38 @@@
 +#!/bin/bash
 +#
 +# Licensed to the Apache Software Foundation (ASF) under one or more
 +# contributor license agreements.  See the NOTICE file distributed with
 +# this work for additional information regarding copyright ownership.
 +# The ASF licenses this file to You under the Apache License, Version 2.0
 +# (the "License"); you may not use this file except in compliance with
 +# the License.  You may obtain a copy of the License at
 +#
 +#     http://www.apache.org/licenses/LICENSE-2.0
 +#
 +# Unless required by applicable law or agreed to in writing, software
 +# distributed under the License is distributed on an "AS IS" BASIS,
 +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 +# See the License for the specific language governing permissions and
 +# limitations under the License.
 +#
 +set -u
 +
 +# pack the grammar
 +rm -rf grammar.packed
 +$JOSHUA/scripts/support/grammar-packer.pl -v -g grammar.gz -o grammar.packed 2> packer.log
 +
 +# generate the glue grammar
- java -Xmx2g -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.decoder.ff.tm.CreateGlueGrammar -g grammar.packed > grammar.glue 2> glue.log
++$JOSHUA/scripts/support/create_glue_grammar.sh grammar.packed > grammar.glue 2> glue.log
 +
 +# decode
 +cat input.bn | $JOSHUA/bin/joshua-decoder -m 1g -threads 2 -c joshua.config > output 2> log
 +
 +diff -u output output.gold > diff
 +
 +if [ $? -eq 0 ]; then
 +	#rm -f packer.log diff log output.bleu output grammar.glue glue.log
 +	rm -rf grammar.packed
 +	exit 0
 +else
 +	exit 1
 +fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/resources/thrax/filtering/test-exact.sh
----------------------------------------------------------------------
diff --cc src/test/resources/thrax/filtering/test-exact.sh
index 44c7338,0000000..44c7338
mode 100755,000000..100644
--- a/src/test/resources/thrax/filtering/test-exact.sh
+++ b/src/test/resources/thrax/filtering/test-exact.sh

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/resources/thrax/filtering/test-fast.sh
----------------------------------------------------------------------
diff --cc src/test/resources/thrax/filtering/test-fast.sh
index 1c1bd4f,0000000..1c1bd4f
mode 100755,000000..100644
--- a/src/test/resources/thrax/filtering/test-fast.sh
+++ b/src/test/resources/thrax/filtering/test-fast.sh

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/91400fe2/src/test/resources/thrax/filtering/test-loose.sh
----------------------------------------------------------------------
diff --cc src/test/resources/thrax/filtering/test-loose.sh
index 8a3b759,0000000..8a3b759
mode 100755,000000..100644
--- a/src/test/resources/thrax/filtering/test-loose.sh
+++ b/src/test/resources/thrax/filtering/test-loose.sh