You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:13 UTC
[14/15] incubator-joshua git commit: Merge branch 'master' into 7
Merge branch 'master' into 7
Moved into proper location under joshua-core
# Conflicts:
# joshua-core/src/test/resources/decoder/num_translation_options/joshua.config.packed
# joshua-core/src/test/resources/decoder/num_translation_options/test.sh
# joshua-core/src/test/resources/decoder/oov-list/config
# joshua-core/src/test/resources/decoder/oov-list/test.sh
# joshua-core/src/test/resources/decoder/phrase/constrained/test.sh
# joshua-core/src/test/resources/decoder/phrase/decode/corpus.es
# joshua-core/src/test/resources/decoder/phrase/decode/lm.1.gz
# joshua-core/src/test/resources/decoder/phrase/decode/rules.1.gz
# joshua-core/src/test/resources/decoder/phrase/include-align-index/README
# joshua-core/src/test/resources/decoder/phrase/include-align-index/config
# joshua-core/src/test/resources/decoder/phrase/include-align-index/output.gold
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/README
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
# joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
# joshua-core/src/test/resources/decoder/rescoring/test.sh
# joshua-core/src/test/resources/decoder/segment-oovs/config
# joshua-core/src/test/resources/decoder/segment-oovs/input.txt
# joshua-core/src/test/resources/decoder/segment-oovs/output.expected
# joshua-core/src/test/resources/decoder/segment-oovs/test.sh
# joshua-core/src/test/resources/decoder/source-annotations/test.sh
# joshua-core/src/test/resources/decoder/target-bigram/out.gold
# joshua-core/src/test/resources/decoder/target-bigram/test.sh
# joshua-core/src/test/resources/decoder/too-long/output.gold
# joshua-core/src/test/resources/decoder/too-long/test.sh
# joshua-core/src/test/resources/decoder/tree-output/fragment-map.txt
# joshua-core/src/test/resources/decoder/tree-output/test.sh
# src/test/resources/decoder/num_translation_options/joshua-packed.config
# src/test/resources/decoder/num_translation_options/joshua.config.packed
# src/test/resources/decoder/oov-list/config
# src/test/resources/decoder/oov-list/joshua.config
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/93055fd5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/93055fd5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/93055fd5
Branch: refs/heads/7
Commit: 93055fd5692d068b3932f6ae20480f41d9fc8b91
Parents: 5f46639 5d69748
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 15 15:43:20 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 15 15:43:20 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/decoder/cky/NAryTest.java | 18 ++++
.../decoder/cky/NumTranslationOptionsTest.java | 106 +++++++++++++++++++
.../apache/joshua/decoder/cky/OOVListTest.java | 66 ++++++++++++
.../joshua/decoder/cky/RescoringTest.java | 67 ++++++++++++
.../decoder/cky/SourceAnnotationsTest.java | 70 ++++++++++++
.../apache/joshua/decoder/cky/TargetBigram.java | 75 +++++++++++++
.../apache/joshua/decoder/cky/TooLongTest.java | 86 +++++++++++++++
.../joshua/decoder/cky/TreeOutputTest.java | 65 ++++++++++++
.../decoder/cky/UniqueHypothesesTest.java | 74 +++++++++++++
.../joshua-packed.config | 30 ++++++
.../num_translation_options/joshua.config | 6 +-
.../output-no-dot-chart.gold | 4 +
.../num_translation_options/output-packed.gold | 4 +
.../decoder/num_translation_options/output.gold | 8 --
.../resources/decoder/oov-list/joshua.config | 31 ++++++
.../src/test/resources/decoder/oov-list/lm.gz | Bin 0 -> 2466496 bytes
.../test/resources/decoder/oov-list/output.gold | 6 +-
.../phrase/unique-hypotheses/joshua.config | 4 +-
.../decoder/phrase/unique-hypotheses/lm.1.gz | Bin 17 -> 2235 bytes
.../decoder/phrase/unique-hypotheses/rules.1.gz | Bin 20 -> 2998042 bytes
.../resources/decoder/rescoring/joshua.config | 8 +-
.../src/test/resources/decoder/rescoring/lm.gz | Bin 0 -> 2466496 bytes
.../resources/decoder/rescoring/output.gold | 24 ++---
.../decoder/source-annotations/joshua.config | 8 +-
.../resources/decoder/tree-output/joshua.config | 8 +-
.../resources/decoder/tree-output/output.gold | 10 +-
26 files changed, 733 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
index 5440407,0000000..6a2071a
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
@@@ -1,64 -1,0 +1,82 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class NAryTest {
+ private JoshuaConfiguration joshuaConfig;
+ private Decoder decoder;
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ if (decoder != null) {
+ decoder.cleanUp();
+ decoder = null;
+ }
+ }
+
+ @Test
+ public void givenInput_whenNAryDecoding_thenScoreAndTranslationCorrect() throws Exception {
+ // Given
+ List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/input.txt");
+
+ // When
+ configureDecoder("src/test/resources/decoder/n-ary/joshua.config");
+ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+ // Then
+ List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/output.gold");
+ assertEquals(decodedStrings, goldStrings);
+ }
+
+ public void configureDecoder(String pathToConfig) throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(pathToConfig);
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
index 0000000,0000000..ec6f02d
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@@ -1,0 -1,0 +1,106 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Tests that num_translation_options is enforced for hierarchical decoders
++ */
++public class NumTranslationOptionsTest {
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithNumTranslationOptions3_thenScoreAndTranslationCorrect()
++ throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/input");
++
++ // When
++ configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", true);
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/output.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithNumTranslationOptions3AndNoDotChart_thenScoreAndTranslationCorrect()
++ throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/input");
++
++ // When
++ configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", false);
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithNumTranslationOptions3AndPacked_thenScoreAndTranslationCorrect()
++ throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/input");
++
++ // When
++ configureDecoder("src/test/resources/decoder/num_translation_options/joshua-packed.config",
++ true);
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/num_translation_options/output-packed.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ public void configureDecoder(String pathToConfig, boolean useDotChart) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(pathToConfig);
++ joshuaConfig.use_dot_chart = useDotChart;
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
index 0000000,0000000..29ec23e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@@ -1,0 -1,0 +1,66 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class OOVListTest {
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile(
++ "src/test/resources/decoder/oov-list/input.txt");
++
++ // When
++ configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/oov-list/output.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ public void configureDecoder(String pathToConfig) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(pathToConfig);
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
index 0000000,0000000..a12a47b
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@@ -1,0 -1,0 +1,67 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class RescoringTest {
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
++ throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile(
++ "src/test/resources/decoder/rescoring/input.txt");
++
++ // When
++ configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/rescoring/output.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ public void configureDecoder(String pathToConfig) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(pathToConfig);
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index 0000000,0000000..ce09506
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@@ -1,0 -1,0 +1,70 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class SourceAnnotationsTest {
++
++ private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
++ private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
++ private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
++
++ private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
++
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @Test
++ public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++ setUp(false);
++ String output = translate(INPUT, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
++ }
++
++ @Test
++ public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++ setUp(true);
++ String output = translate(INPUT, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
++ }
++
++ public void setUp(boolean sourceAnnotations) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
++ joshuaConfig.source_annotations = sourceAnnotations;
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
index 0000000,0000000..bce34ca
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
@@@ -1,0 -1,0 +1,75 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TargetBigram {
++
++ private static final String INPUT = "this is a test";
++ private static final String GOLD_TOPN2 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000";
++ private static final String GOLD_TOPN3_THRESHOLD20 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000";
++ private static final String GOLD_THRESHOLD10 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000";
++
++ private static final String VOCAB_PATH = "src/test/resources/decoder/target-bigram/vocab";
++
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @Test
++ public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++ setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 2");
++ String output = translate(INPUT, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD_TOPN2);
++ }
++
++ @Test
++ public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++ setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 3 -threshold 20");
++ String output = translate(INPUT, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD_TOPN3_THRESHOLD20);
++ }
++
++ @Test
++ public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect2() throws Exception {
++ setUp("TargetBigram -vocab " + VOCAB_PATH + " -threshold 10");
++ String output = translate(INPUT, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD_THRESHOLD10);
++ }
++
++ public void setUp(String featureFunction) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.features.add(featureFunction);
++ joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++ decoder = new Decoder(joshuaConfig, "");
++ }
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ decoder.cleanUp();
++ decoder = null;
++ }
++
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
index 0000000,0000000..0d4f7ce
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@@ -1,0 -1,0 +1,86 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that the decoder trims inputs when and only when it should
++ */
++public class TooLongTest {
++ private static final String INPUT1 = "as kingfishers draw fire";
++ private static final String GOLD1 = "as kingfishers ||| tm_glue_0=2.000 ||| 0.000";
++ private static final String INPUT2 = "dragonflies draw flame";
++ private static final String GOLD2 = "dragonflies ||| tm_glue_0=1.000 ||| 0.000";
++ private static final String INPUT3 = "(((as tumbled over rim in roundy wells stones ring";
++ private static final String GOLD3 = "(((as tumbled over rim in roundy wells stones ||| tm_glue_0=8.000 ||| 0.000";
++ private static final String INPUT4 = "(((like each tucked string tells";
++ private static final String GOLD4 = "||| ||| 0.000";
++
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @Test
++ public void givenInput_whenMaxLen2_thenOutputCorrect() throws Exception {
++ setUp(2, false);
++ String output = translate(INPUT1, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD1);
++ }
++
++ @Test
++ public void givenInput_whenMaxLen1AndLatticeDecoding_thenOutputCorrect() throws Exception {
++ setUp(1, true);
++ String output = translate(INPUT2, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD2);
++ }
++
++ @Test
++ public void givenInput_whenMaxLen8_thenOutputCorrect() throws Exception {
++ setUp(8, false);
++ String output = translate(INPUT3, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD3);
++ }
++
++ @Test
++ public void givenInput_whenMaxLen3AndLatticeDecoding_thenOutputCorrect() throws Exception {
++ setUp(3, true);
++ String output = translate(INPUT4, decoder, joshuaConfig);
++ assertEquals(output.trim(), GOLD4);
++ }
++
++ public void setUp(int maxLen, boolean latticeDecoding) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++ joshuaConfig.maxlen = maxLen;
++ joshuaConfig.lattice_decoding = latticeDecoding;
++ decoder = new Decoder(joshuaConfig, "");
++ }
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ decoder.cleanUp();
++ decoder = null;
++ }
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
index 0000000,0000000..f5e1005
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
@@@ -1,0 -1,0 +1,65 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TreeOutputTest {
++ private JoshuaConfiguration joshuaConfig;
++ private Decoder decoder;
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++ @Test
++ public void givenInput_whenDecodingWithTreeOutput_thenOutputCorrect() throws Exception {
++ // Given
++ List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/tree-output/input");
++
++ // When
++ configureDecoder("src/test/resources/decoder/tree-output/joshua.config");
++ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++ // Then
++ List<String> goldStrings = loadStringsFromFile(
++ "src/test/resources/decoder/tree-output/output.gold");
++ assertEquals(decodedStrings, goldStrings);
++ }
++
++ public void configureDecoder(String pathToConfig) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(pathToConfig);
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 0000000,0000000..bf65c5e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@@ -1,0 -1,0 +1,74 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements. See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership. The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License. You may obtain a copy of the License at
++ *
++ * http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied. See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.testng.Assert.assertEquals;
++
++import java.util.Arrays;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that derivations are unique for the phrase-based decoder.
++ */
++public class UniqueHypothesesTest {
++
++ public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
++
++ private JoshuaConfiguration joshuaConfig = null;
++ private Decoder decoder = null;
++
++ @Test
++ public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
++ throws Exception {
++ configureDecoder("src/test/resources/decoder/phrase/unique-hypotheses/joshua.config");
++ List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder,
++ joshuaConfig);
++
++ assertEquals(decodedStrings.size(), 300);
++
++ // if all strings are unique than the set should have the same size as the
++ // list
++ Set<String> uniqueDecodedStrings = new HashSet<>(decodedStrings);
++ assertEquals(decodedStrings.size(), uniqueDecodedStrings.size());
++ }
++
++ public void configureDecoder(String pathToConfig) throws Exception {
++ joshuaConfig = new JoshuaConfiguration();
++ joshuaConfig.readConfigFile(pathToConfig);
++ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++ }
++
++ @AfterMethod
++ public void tearDown() throws Exception {
++ if (decoder != null) {
++ decoder.cleanUp();
++ decoder = null;
++ }
++ }
++
++}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
index 0000000,0000000..681c4d9
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
@@@ -1,0 -1,0 +1,30 @@@
++num_translation_options = 3
++
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
++
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.packed
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
++
++mark_oovs = false
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++output-format = %c ||| %s ||| %f
++
++#nbest config
++use_unique_nbest = true
++top_n = 5
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_pt_0 1
++tm_glue_0 1
++WordPenalty 1
++OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
index e37855c,0000000..88b0290
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
@@@ -1,30 -1,0 +1,30 @@@
+num_translation_options = 3
+
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
+
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
+
+mark_oovs = false
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+output-format = %c ||| %s ||| %f
+
+#nbest config
+use_unique_nbest = true
+top_n = 5
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_pt_0 1
+tm_glue_0 1
+WordPenalty 1
+OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output.gold
index 4203822,0000000..686122c
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
@@@ -1,12 -1,0 +1,4 @@@
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/joshua.config
index 0000000,0000000..8809206
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/oov-list/joshua.config
@@@ -1,0 -1,0 +1,31 @@@
++lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
++
++tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
++tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
++
++mark_oovs = true
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++#nbest config
++use_unique_nbest = true
++use_tree_nbest = false
++top_n = 1
++
++oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203
++
++output-format=%s ||| %f ||| %c
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_phrase_0 1
++tm_glue_0 1
++WordPenalty -3.6942747832593694
++OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/output.gold
index d911c52,0000000..ee44a51
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/oov-list/output.gold
+++ b/joshua-core/src/test/resources/decoder/oov-list/output.gold
@@@ -1,3 -1,0 +1,3 @@@
- 0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
- 1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
- 2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
++Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
++i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
++goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
index c35b267,0000000..7cef08e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
+++ b/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
@@@ -1,23 -1,0 +1,23 @@@
- tm = moses pt 0 rules.1.gz
++tm = moses pt 0 src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
+default-non-terminal = X
+goal-symbol = GOAL
- lm = kenlm 5 true false 100 lm.1.gz
++lm = kenlm 5 true false 100 src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
+mark-oovs = false
+pop-limit = 100
+top-n = 300
+use-unique-nbest = true
+output-format = %s
+include-align-index = false
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature_function = Distortion
+feature_function = PhrasePenalty
+lm_0 1.0
+tm_pt_1 1.0
+tm_pt_3 1.0
+tm_pt_0 1.0
+tm_pt_2 1.0
+WordPenalty -2.844814
+OOVPenalty 1.0
+PhrasePenalty 1.0
+Distortion 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
index 3655f03,0000000..3f4c453
mode 120000,000000..100644
Binary files differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
index a6183d9,0000000..14466e9
mode 120000,000000..100644
Binary files differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277,0000000..56efbfa
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/joshua.config
+++ b/joshua-core/src/test/resources/decoder/rescoring/joshua.config
@@@ -1,31 -1,0 +1,31 @@@
+rescore-forest = true
+rescore-forest-weight = 100
+
- lm = kenlm 5 false false 100 ../constrained/lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
+
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
+
+mark-oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
- #output-format = %i %c %s
++output-format = %s ||| %f ||| %c
+
+#nbest config
+use_unique_nbest = true
+top_n = 2
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_pt_0 1
+tm_glue_0 1
+WordPenalty -1
+OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/output.gold
index 5d6600d,0000000..1ea4237
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/output.gold
+++ b/joshua-core/src/test/resources/decoder/rescoring/output.gold
@@@ -1,12 -1,0 +1,12 @@@
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96,0000000..788505e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
+++ b/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
@@@ -1,140 -1,0 +1,140 @@@
+# This file is a template for the Joshua pipeline; variables enclosed
+# in <angle-brackets> are substituted by the pipeline script as
+# appropriate. This file also serves to document Joshua's many
+# parameters.
+
+# These are the grammar file specifications. Joshua supports an
+# arbitrary number of grammar files, each specified on its own line
+# using the following format:
+#
+# tm = TYPE OWNER LIMIT FILE
+#
+# TYPE is "packed", "thrax", or "samt". The latter denotes the format
+# used in Zollmann and Venugopal's SAMT decoder
+# (http://www.cs.cmu.edu/~zollmann/samt/).
+#
+# OWNER is the "owner" of the rules in the grammar; this is used to
+# determine which set of phrasal features apply to the grammar's
+# rules. Having different owners allows different features to be
+# applied to different grammars, and for grammars to share features
+# across files.
+#
+# LIMIT is the maximum input span permitted for the application of
+# grammar rules found in the grammar file. A value of -1 implies no limit.
+#
+# FILE is the grammar file (or directory when using packed grammars).
+# The file can be compressed with gzip, which is determined by the
+# presence or absence of a ".gz" file extension.
+#
+# By a convention defined by Chiang (2007), the grammars are split
+# into two files: the main translation grammar containing all the
+# learned translation rules, and a glue grammar which supports
+# monotonic concatenation of hierarchical phrases. The glue grammar's
+# main distinction from the regular grammar is that the span limit
+# does not apply to it.
+
- tm = thrax pt 20 grammar
- tm = thrax glue -1 grammar.glue
++tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
++tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
+
+# This symbol is used over unknown words in the source language
+
+default-non-terminal = X
+
+# This is the goal nonterminal, used to determine when a complete
+# parse is found. It should correspond to the root-level rules in the
+# glue grammar.
+
+goal-symbol = GOAL
+
+# Language model config.
+
+# Multiple language models are supported. For each language model,
+# create a line in the following format,
+#
+# lm = TYPE 5 false false 100 FILE
+#
+# where the six fields correspond to the following values:
+# - LM type: one of "kenlm", "berkeleylm", "javalm" (not recommended), or "none"
+# - LM order: the N of the N-gram language model
+# - whether to use left equivalent state (currently not supported)
+# - whether to use right equivalent state (currently not supported)
+# - the ceiling cost of any n-gram (currently ignored)
+# - LM file: the location of the language model file
+# You also need to add a weight for each language model below.
+
- lm = kenlm 5 false false 100 lm.kenlm
++lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
+
+# The suffix _OOV is appended to unknown source-language words if this
+# is set to true.
+
+mark-oovs = true
+
+# The pop-limit for decoding. This determines how many hypotheses are
+# considered over each span of the input.
+
+pop-limit = 100
+
+# How many hypotheses to output
+
+top-n = 1
+
+# Whether those hypotheses should be distinct strings
+
+use-unique-nbest = true
+
+# This is the default format of the ouput printed to STDOUT. The variables that can be
+# substituted are:
+#
+# %i: the sentence number (0-indexed)
+# %s: the translated sentence
+# %t: the derivation tree
+# %f: the feature string
+# %c: the model cost
+
- output-format = %i ||| %s ||| %f ||| %c
++output-format = %s ||| %f ||| %c
+
+# When printing the trees (%t in 'output-format'), this controls whether the alignments
+# are also printed.
+
+include-align-index = false
+
+
+## Feature functions and weights.
+#
+# This is the location of the file containing model weights.
+#
+
+
+# For each langage model line listed above, create a weight in the
+# following format: the keyword "lm", a 0-based index, and the weight.
+# lm_INDEX WEIGHT
+
+lm_0 1.0
+
+# The phrasal weights correspond to weights stored with each of the
+# grammar rules. The format is
+#
+# tm_OWNER_COLUMN WEIGHT
+#
+# where COLUMN denotes the 0-based order of the parameter in the
+# grammar file and WEIGHT is the corresponding weight. In the future,
+# we plan to add a sparse feature representation which will simplify
+# this.
+
+tm_pt_0 0.049141264495762726
+tm_glue_0 0.1663815584150378
+
+# The wordpenalty feature counts the number of words in each hypothesis.
+
+WordPenalty -1.5244636836685694
+
+# This feature counts the number of unknown words in the hypothesis.
+
+OOVPenalty 1
+
+# This feature weights paths through an input lattice. It is only activated
+# when decoding lattices.
+
+# And these are the feature functions to activate.
+feature_function = OOVPenalty
+feature_function = WordPenalty
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/joshua.config
index d68192d,0000000..7375cad
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/joshua.config
+++ b/joshua-core/src/test/resources/decoder/tree-output/joshua.config
@@@ -1,45 -1,0 +1,45 @@@
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/tree-output/lm.gz
+
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/tree-output/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/tree-output/glue-grammar
+
+mark_oovs = false
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
- output-format = %i %t
++output-format = %t
+
+#nbest config
+use_unique_nbest = true
+top_n = 1
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+
+lm_0 1.2373676802179452
+
+tm_pt_0 -2.4497429277910214
+tm_pt_1 0.7224581556224123
+tm_pt_2 -0.31689069155153504
+tm_pt_3 0.33861043967238036
+tm_pt_4 0.03553113401320236
+tm_pt_5 0.19138972284064748
+tm_pt_6 0.3417994095521415
+tm_pt_7 -0.9936312455671283
+tm_pt_8 0.9070737587091975
+tm_pt_9 0.8202511858619419
+tm_pt_10 0.2593091306160006
+tm_pt_11 0.25597137004462134
+tm_pt_12 0.3538894647790496
+tm_pt_13 -0.36212061186692646
+tm_pt_14 -0.32923261148678096
+tm_pt_15 0.5524863522177359
+tm_pt_16 0.23451595442127693
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/output.gold
index 68a1610,0000000..abbeb79
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/output.gold
+++ b/joshua-core/src/test/resources/decoder/tree-output/output.gold
@@@ -1,5 -1,0 +1,5 @@@
- 0 (GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
- 1 ()
- 2 (GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
- 3 (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
- 4 (GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)
++(GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
++()
++(GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
++(GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
++(GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)