You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:13 UTC

[14/15] incubator-joshua git commit: Merge branch 'master' into 7

Merge branch 'master' into 7

Moved into proper location under joshua-core

# Conflicts:
#	joshua-core/src/test/resources/decoder/num_translation_options/joshua.config.packed
#	joshua-core/src/test/resources/decoder/num_translation_options/test.sh
#	joshua-core/src/test/resources/decoder/oov-list/config
#	joshua-core/src/test/resources/decoder/oov-list/test.sh
#	joshua-core/src/test/resources/decoder/phrase/constrained/test.sh
#	joshua-core/src/test/resources/decoder/phrase/decode/corpus.es
#	joshua-core/src/test/resources/decoder/phrase/decode/lm.1.gz
#	joshua-core/src/test/resources/decoder/phrase/decode/rules.1.gz
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/README
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/config
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/output.gold
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/README
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
#	joshua-core/src/test/resources/decoder/rescoring/test.sh
#	joshua-core/src/test/resources/decoder/segment-oovs/config
#	joshua-core/src/test/resources/decoder/segment-oovs/input.txt
#	joshua-core/src/test/resources/decoder/segment-oovs/output.expected
#	joshua-core/src/test/resources/decoder/segment-oovs/test.sh
#	joshua-core/src/test/resources/decoder/source-annotations/test.sh
#	joshua-core/src/test/resources/decoder/target-bigram/out.gold
#	joshua-core/src/test/resources/decoder/target-bigram/test.sh
#	joshua-core/src/test/resources/decoder/too-long/output.gold
#	joshua-core/src/test/resources/decoder/too-long/test.sh
#	joshua-core/src/test/resources/decoder/tree-output/fragment-map.txt
#	joshua-core/src/test/resources/decoder/tree-output/test.sh
#	src/test/resources/decoder/num_translation_options/joshua-packed.config
#	src/test/resources/decoder/num_translation_options/joshua.config.packed
#	src/test/resources/decoder/oov-list/config
#	src/test/resources/decoder/oov-list/joshua.config


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/93055fd5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/93055fd5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/93055fd5

Branch: refs/heads/7
Commit: 93055fd5692d068b3932f6ae20480f41d9fc8b91
Parents: 5f46639 5d69748
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 15 15:43:20 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 15 15:43:20 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/cky/NAryTest.java |  18 ++++
 .../decoder/cky/NumTranslationOptionsTest.java  | 106 +++++++++++++++++++
 .../apache/joshua/decoder/cky/OOVListTest.java  |  66 ++++++++++++
 .../joshua/decoder/cky/RescoringTest.java       |  67 ++++++++++++
 .../decoder/cky/SourceAnnotationsTest.java      |  70 ++++++++++++
 .../apache/joshua/decoder/cky/TargetBigram.java |  75 +++++++++++++
 .../apache/joshua/decoder/cky/TooLongTest.java  |  86 +++++++++++++++
 .../joshua/decoder/cky/TreeOutputTest.java      |  65 ++++++++++++
 .../decoder/cky/UniqueHypothesesTest.java       |  74 +++++++++++++
 .../joshua-packed.config                        |  30 ++++++
 .../num_translation_options/joshua.config       |   6 +-
 .../output-no-dot-chart.gold                    |   4 +
 .../num_translation_options/output-packed.gold  |   4 +
 .../decoder/num_translation_options/output.gold |   8 --
 .../resources/decoder/oov-list/joshua.config    |  31 ++++++
 .../src/test/resources/decoder/oov-list/lm.gz   | Bin 0 -> 2466496 bytes
 .../test/resources/decoder/oov-list/output.gold |   6 +-
 .../phrase/unique-hypotheses/joshua.config      |   4 +-
 .../decoder/phrase/unique-hypotheses/lm.1.gz    | Bin 17 -> 2235 bytes
 .../decoder/phrase/unique-hypotheses/rules.1.gz | Bin 20 -> 2998042 bytes
 .../resources/decoder/rescoring/joshua.config   |   8 +-
 .../src/test/resources/decoder/rescoring/lm.gz  | Bin 0 -> 2466496 bytes
 .../resources/decoder/rescoring/output.gold     |  24 ++---
 .../decoder/source-annotations/joshua.config    |   8 +-
 .../resources/decoder/tree-output/joshua.config |   8 +-
 .../resources/decoder/tree-output/output.gold   |  10 +-
 26 files changed, 733 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
index 5440407,0000000..6a2071a
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
@@@ -1,64 -1,0 +1,82 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
 +package org.apache.joshua.decoder.cky;
 +
 +import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
 +import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
 +import static org.testng.Assert.assertEquals;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.Test;
 +
 +public class NAryTest {
 +  private JoshuaConfiguration joshuaConfig;
 +  private Decoder decoder;
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    if (decoder != null) {
 +      decoder.cleanUp();
 +      decoder = null;
 +    }
 +  }
 +
 +  @Test
 +  public void givenInput_whenNAryDecoding_thenScoreAndTranslationCorrect() throws Exception {
 +    // Given
 +    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/input.txt");
 +
 +    // When
 +    configureDecoder("src/test/resources/decoder/n-ary/joshua.config");
 +    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
 +
 +    // Then
 +    List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/output.gold");
 +    assertEquals(decodedStrings, goldStrings);
 +  }
 +
 +  public void configureDecoder(String pathToConfig) throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(pathToConfig);
 +    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
index 0000000,0000000..ec6f02d
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@@ -1,0 -1,0 +1,106 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Tests that num_translation_options is enforced for hierarchical decoders
++ */
++public class NumTranslationOptionsTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", true);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3AndNoDotChart_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", false);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3AndPacked_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua-packed.config",
++        true);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output-packed.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig, boolean useDotChart) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    joshuaConfig.use_dot_chart = useDotChart;
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
index 0000000,0000000..29ec23e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@@ -1,0 -1,0 +1,66 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class OOVListTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/oov-list/input.txt");
++
++    // When
++    configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/oov-list/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
index 0000000,0000000..a12a47b
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@@ -1,0 -1,0 +1,67 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class RescoringTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/rescoring/input.txt");
++
++    // When
++    configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/rescoring/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index 0000000,0000000..ce09506
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@@ -1,0 -1,0 +1,70 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class SourceAnnotationsTest {
++
++  private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
++  private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
++  private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
++
++  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp(false);
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp(true);
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
++  }
++
++  public void setUp(boolean sourceAnnotations) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
++    joshuaConfig.source_annotations = sourceAnnotations;
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
index 0000000,0000000..bce34ca
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
@@@ -1,0 -1,0 +1,75 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TargetBigram {
++
++  private static final String INPUT = "this is a test";
++  private static final String GOLD_TOPN2 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000";
++  private static final String GOLD_TOPN3_THRESHOLD20 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000";
++  private static final String GOLD_THRESHOLD10 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000";
++
++  private static final String VOCAB_PATH = "src/test/resources/decoder/target-bigram/vocab";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 2");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_TOPN2);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 3 -threshold 20");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_TOPN3_THRESHOLD20);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect2() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -threshold 10");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_THRESHOLD10);
++  }
++
++  public void setUp(String featureFunction) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.features.add(featureFunction);
++    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++    decoder = new Decoder(joshuaConfig, "");
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    decoder.cleanUp();
++    decoder = null;
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
index 0000000,0000000..0d4f7ce
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@@ -1,0 -1,0 +1,86 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that the decoder trims inputs when and only when it should
++ */
++public class TooLongTest {
++  private static final String INPUT1 = "as kingfishers draw fire";
++  private static final String GOLD1 = "as kingfishers ||| tm_glue_0=2.000 ||| 0.000";
++  private static final String INPUT2 = "dragonflies draw flame";
++  private static final String GOLD2 = "dragonflies ||| tm_glue_0=1.000 ||| 0.000";
++  private static final String INPUT3 = "(((as tumbled over rim in roundy wells stones ring";
++  private static final String GOLD3 = "(((as tumbled over rim in roundy wells stones ||| tm_glue_0=8.000 ||| 0.000";
++  private static final String INPUT4 = "(((like each tucked string tells";
++  private static final String GOLD4 = "|||  ||| 0.000";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenMaxLen2_thenOutputCorrect() throws Exception {
++    setUp(2, false);
++    String output = translate(INPUT1, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD1);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen1AndLatticeDecoding_thenOutputCorrect() throws Exception {
++    setUp(1, true);
++    String output = translate(INPUT2, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD2);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen8_thenOutputCorrect() throws Exception {
++    setUp(8, false);
++    String output = translate(INPUT3, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD3);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen3AndLatticeDecoding_thenOutputCorrect() throws Exception {
++    setUp(3, true);
++    String output = translate(INPUT4, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD4);
++  }
++
++  public void setUp(int maxLen, boolean latticeDecoding) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++    joshuaConfig.maxlen = maxLen;
++    joshuaConfig.lattice_decoding = latticeDecoding;
++    decoder = new Decoder(joshuaConfig, "");
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    decoder.cleanUp();
++    decoder = null;
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
index 0000000,0000000..f5e1005
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
@@@ -1,0 -1,0 +1,65 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TreeOutputTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithTreeOutput_thenOutputCorrect() throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/tree-output/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/tree-output/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/tree-output/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 0000000,0000000..bf65c5e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@@ -1,0 -1,0 +1,74 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.testng.Assert.assertEquals;
++
++import java.util.Arrays;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that derivations are unique for the phrase-based decoder.
++ */
++public class UniqueHypothesesTest {
++
++  public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
++
++  private JoshuaConfiguration joshuaConfig = null;
++  private Decoder decoder = null;
++
++  @Test
++  public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
++      throws Exception {
++    configureDecoder("src/test/resources/decoder/phrase/unique-hypotheses/joshua.config");
++    List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder,
++        joshuaConfig);
++
++    assertEquals(decodedStrings.size(), 300);
++
++    // if all strings are unique than the set should have the same size as the
++    // list
++    Set<String> uniqueDecodedStrings = new HashSet<>(decodedStrings);
++    assertEquals(decodedStrings.size(), uniqueDecodedStrings.size());
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
index 0000000,0000000..681c4d9
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
@@@ -1,0 -1,0 +1,30 @@@
++num_translation_options = 3
++
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
++
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.packed
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
++
++mark_oovs = false
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++output-format = %c ||| %s ||| %f
++
++#nbest config
++use_unique_nbest = true
++top_n = 5
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_pt_0 1
++tm_glue_0 1
++WordPenalty 1
++OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
index e37855c,0000000..88b0290
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
@@@ -1,30 -1,0 +1,30 @@@
 +num_translation_options = 3
 +
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
 +
 +mark_oovs = false
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
 +output-format = %c ||| %s ||| %f
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 5
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 1
 +tm_glue_0 1
 +WordPenalty 1
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output.gold
index 4203822,0000000..686122c
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
@@@ -1,12 -1,0 +1,4 @@@
 +-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
 +-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
 +-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
 +-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/joshua.config
index 0000000,0000000..8809206
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/oov-list/joshua.config
@@@ -1,0 -1,0 +1,31 @@@
++lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
++
++tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
++tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
++
++mark_oovs = true
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++#nbest config
++use_unique_nbest = true
++use_tree_nbest = false
++top_n = 1
++
++oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
++
++output-format=%s ||| %f ||| %c
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_phrase_0 1
++tm_glue_0 1
++WordPenalty -3.6942747832593694
++OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/output.gold
index d911c52,0000000..ee44a51
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/oov-list/output.gold
+++ b/joshua-core/src/test/resources/decoder/oov-list/output.gold
@@@ -1,3 -1,0 +1,3 @@@
- 0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
- 1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
- 2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
++Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
++i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
++goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
index c35b267,0000000..7cef08e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
+++ b/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
@@@ -1,23 -1,0 +1,23 @@@
- tm = moses pt 0 rules.1.gz
++tm = moses pt 0 src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
 +default-non-terminal = X
 +goal-symbol = GOAL
- lm = kenlm 5 true false 100 lm.1.gz
++lm = kenlm 5 true false 100 src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
 +mark-oovs = false
 +pop-limit = 100
 +top-n = 300
 +use-unique-nbest = true
 +output-format = %s
 +include-align-index = false
 +feature-function = OOVPenalty
 +feature-function = WordPenalty
 +feature_function = Distortion
 +feature_function = PhrasePenalty
 +lm_0 1.0
 +tm_pt_1 1.0
 +tm_pt_3 1.0
 +tm_pt_0 1.0
 +tm_pt_2 1.0
 +WordPenalty -2.844814
 +OOVPenalty 1.0
 +PhrasePenalty 1.0
 +Distortion 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
index 3655f03,0000000..3f4c453
mode 120000,000000..100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
index a6183d9,0000000..14466e9
mode 120000,000000..100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277,0000000..56efbfa
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/joshua.config
+++ b/joshua-core/src/test/resources/decoder/rescoring/joshua.config
@@@ -1,31 -1,0 +1,31 @@@
 +rescore-forest = true
 +rescore-forest-weight = 100
 +
- lm = kenlm 5 false false 100 ../constrained/lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
 +
 +mark-oovs = true
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
- #output-format = %i %c %s
++output-format = %s ||| %f ||| %c
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 2
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 1
 +tm_glue_0 1
 +WordPenalty -1
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/output.gold
index 5d6600d,0000000..1ea4237
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/output.gold
+++ b/joshua-core/src/test/resources/decoder/rescoring/output.gold
@@@ -1,12 -1,0 +1,12 @@@
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96,0000000..788505e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
+++ b/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
@@@ -1,140 -1,0 +1,140 @@@
 +# This file is a template for the Joshua pipeline; variables enclosed
 +# in <angle-brackets> are substituted by the pipeline script as
 +# appropriate.  This file also serves to document Joshua's many
 +# parameters.
 +
 +# These are the grammar file specifications.  Joshua supports an
 +# arbitrary number of grammar files, each specified on its own line
 +# using the following format:
 +#
 +#   tm = TYPE OWNER LIMIT FILE
 +# 
 +# TYPE is "packed", "thrax", or "samt".  The latter denotes the format
 +# used in Zollmann and Venugopal's SAMT decoder
 +# (http://www.cs.cmu.edu/~zollmann/samt/).
 +# 
 +# OWNER is the "owner" of the rules in the grammar; this is used to
 +# determine which set of phrasal features apply to the grammar's
 +# rules.  Having different owners allows different features to be
 +# applied to different grammars, and for grammars to share features
 +# across files.
 +#
 +# LIMIT is the maximum input span permitted for the application of
 +# grammar rules found in the grammar file.  A value of -1 implies no limit.
 +#
 +# FILE is the grammar file (or directory when using packed grammars).
 +# The file can be compressed with gzip, which is determined by the
 +# presence or absence of a ".gz" file extension.
 +#
 +# By a convention defined by Chiang (2007), the grammars are split
 +# into two files: the main translation grammar containing all the
 +# learned translation rules, and a glue grammar which supports
 +# monotonic concatenation of hierarchical phrases. The glue grammar's
 +# main distinction from the regular grammar is that the span limit
 +# does not apply to it.  
 +
- tm = thrax pt 20 grammar
- tm = thrax glue -1 grammar.glue
++tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
++tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
 +
 +# This symbol is used over unknown words in the source language
 +
 +default-non-terminal = X
 +
 +# This is the goal nonterminal, used to determine when a complete
 +# parse is found.  It should correspond to the root-level rules in the
 +# glue grammar.
 +
 +goal-symbol = GOAL
 +
 +# Language model config.
 +
 +# Multiple language models are supported.  For each language model,
 +# create a line in the following format, 
 +#
 +# lm = TYPE 5 false false 100 FILE
 +#
 +# where the six fields correspond to the following values:
 +# - LM type: one of "kenlm", "berkeleylm", "javalm" (not recommended), or "none"
 +# - LM order: the N of the N-gram language model
 +# - whether to use left equivalent state (currently not supported)
 +# - whether to use right equivalent state (currently not supported)
 +# - the ceiling cost of any n-gram (currently ignored)
 +# - LM file: the location of the language model file
 +# You also need to add a weight for each language model below.
 +
- lm = kenlm 5 false false 100 lm.kenlm
++lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
 +
 +# The suffix _OOV is appended to unknown source-language words if this
 +# is set to true.
 +
 +mark-oovs = true
 +
 +# The pop-limit for decoding.  This determines how many hypotheses are
 +# considered over each span of the input.
 +
 +pop-limit = 100
 +
 +# How many hypotheses to output
 +
 +top-n = 1
 +
 +# Whether those hypotheses should be distinct strings
 +
 +use-unique-nbest = true
 +
 +# This is the default format of the ouput printed to STDOUT.  The variables that can be
 +# substituted are:
 +#
 +# %i: the sentence number (0-indexed)
 +# %s: the translated sentence
 +# %t: the derivation tree
 +# %f: the feature string
 +# %c: the model cost
 +
- output-format = %i ||| %s ||| %f ||| %c
++output-format = %s ||| %f ||| %c
 +
 +# When printing the trees (%t in 'output-format'), this controls whether the alignments
 +# are also printed.
 +
 +include-align-index = false
 +
 +
 +## Feature functions and weights.
 +#
 +# This is the location of the file containing model weights.
 +#
 +
 +
 +# For each langage model line listed above, create a weight in the
 +# following format: the keyword "lm", a 0-based index, and the weight.
 +# lm_INDEX WEIGHT
 +
 +lm_0 1.0
 +
 +# The phrasal weights correspond to weights stored with each of the
 +# grammar rules.  The format is
 +#
 +#   tm_OWNER_COLUMN WEIGHT
 +#
 +# where COLUMN denotes the 0-based order of the parameter in the
 +# grammar file and WEIGHT is the corresponding weight.  In the future,
 +# we plan to add a sparse feature representation which will simplify
 +# this.
 +
 +tm_pt_0 0.049141264495762726
 +tm_glue_0 0.1663815584150378
 +
 +# The wordpenalty feature counts the number of words in each hypothesis.
 +
 +WordPenalty -1.5244636836685694
 +
 +# This feature counts the number of unknown words in the hypothesis.
 +
 +OOVPenalty 1
 +
 +# This feature weights paths through an input lattice.  It is only activated
 +# when decoding lattices.
 +
 +# And these are the feature functions to activate.
 +feature_function = OOVPenalty
 +feature_function = WordPenalty

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/joshua.config
index d68192d,0000000..7375cad
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/joshua.config
+++ b/joshua-core/src/test/resources/decoder/tree-output/joshua.config
@@@ -1,45 -1,0 +1,45 @@@
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/tree-output/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/tree-output/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/tree-output/glue-grammar
 +
 +mark_oovs = false
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
- output-format = %i %t
++output-format = %t
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 1
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 -2.4497429277910214
 +tm_pt_1 0.7224581556224123
 +tm_pt_2 -0.31689069155153504
 +tm_pt_3 0.33861043967238036
 +tm_pt_4 0.03553113401320236
 +tm_pt_5 0.19138972284064748
 +tm_pt_6 0.3417994095521415
 +tm_pt_7 -0.9936312455671283
 +tm_pt_8 0.9070737587091975
 +tm_pt_9 0.8202511858619419
 +tm_pt_10 0.2593091306160006
 +tm_pt_11 0.25597137004462134
 +tm_pt_12 0.3538894647790496
 +tm_pt_13 -0.36212061186692646
 +tm_pt_14 -0.32923261148678096
 +tm_pt_15 0.5524863522177359
 +tm_pt_16 0.23451595442127693
 +tm_glue_0 1
 +WordPenalty -3.6942747832593694
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/output.gold
index 68a1610,0000000..abbeb79
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/output.gold
+++ b/joshua-core/src/test/resources/decoder/tree-output/output.gold
@@@ -1,5 -1,0 +1,5 @@@
- 0 (GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
- 1 ()
- 2 (GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
- 3 (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
- 4 (GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)
++(GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
++()
++(GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
++(GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
++(GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)