You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:00 UTC

[01/15] incubator-joshua git commit: Moved regression test decoder/target-bigram to unit test. Cleaned up the corresponding directory.

Repository: incubator-joshua
Updated Branches:
  refs/heads/7 5f46639d8 -> 7e7baaffc


Moved regression test decoder/target-bigram to unit test. Cleaned up the corresponding directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b9e6ffac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b9e6ffac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b9e6ffac

Branch: refs/heads/7
Commit: b9e6ffac00fa15906f6642107b7b8140e6465cfe
Parents: b429cc7
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 14:19:30 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/TargetBigram.java | 75 ++++++++++++++++++++
 .../resources/decoder/target-bigram/out.gold    |  3 -
 .../resources/decoder/target-bigram/test.sh     | 32 ---------
 3 files changed, 75 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b9e6ffac/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java b/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
new file mode 100644
index 0000000..bce34ca
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class TargetBigram {
+
+  private static final String INPUT = "this is a test";
+  private static final String GOLD_TOPN2 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000";
+  private static final String GOLD_TOPN3_THRESHOLD20 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000";
+  private static final String GOLD_THRESHOLD10 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000";
+
+  private static final String VOCAB_PATH = "src/test/resources/decoder/target-bigram/vocab";
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @Test
+  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 2");
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_TOPN2);
+  }
+
+  @Test
+  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 3 -threshold 20");
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_TOPN3_THRESHOLD20);
+  }
+
+  @Test
+  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect2() throws Exception {
+    setUp("TargetBigram -vocab " + VOCAB_PATH + " -threshold 10");
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_THRESHOLD10);
+  }
+
+  public void setUp(String featureFunction) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.features.add(featureFunction);
+    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
+    decoder = new Decoder(joshuaConfig, "");
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b9e6ffac/src/test/resources/decoder/target-bigram/out.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/target-bigram/out.gold b/src/test/resources/decoder/target-bigram/out.gold
deleted file mode 100644
index 8d53a28..0000000
--- a/src/test/resources/decoder/target-bigram/out.gold
+++ /dev/null
@@ -1,3 +0,0 @@
-0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000
-0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000
-0 ||| this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b9e6ffac/src/test/resources/decoder/target-bigram/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/target-bigram/test.sh b/src/test/resources/decoder/target-bigram/test.sh
deleted file mode 100755
index 10679b7..0000000
--- a/src/test/resources/decoder/target-bigram/test.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-(echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -top-n 2";
-echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -top-n 3 -threshold 20";
-echo "this is a test" | $JOSHUA/bin/joshua-decoder -feature-function "TargetBigram -vocab vocab -threshold 10") 2>log > out
-
-# Compare
-diff -u out out.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff out log
-  exit 0
-else
-  exit 1
-fi
-
-


[06/15] incubator-joshua git commit: Moved regression test decoder/rescoring to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from constrained to remove dependency to other test resource directory.

Posted by mj...@apache.org.
Moved regression test decoder/rescoring to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from constrained to remove dependency to other test resource directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/11b87c38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/11b87c38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/11b87c38

Branch: refs/heads/7
Commit: 11b87c38ccfcdbe967d46b1f06834a5c5355fed3
Parents: 931a67d
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:58:33 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/cky/RescoringTest.java       |  67 +++++++++++++++++++
 .../resources/decoder/rescoring/joshua.config   |   8 +--
 src/test/resources/decoder/rescoring/lm.gz      | Bin 0 -> 2466496 bytes
 .../resources/decoder/rescoring/output.gold     |  24 +++----
 src/test/resources/decoder/rescoring/test.sh    |  30 ---------
 5 files changed, 83 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
new file mode 100644
index 0000000..a12a47b
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class RescoringTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
+      throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/rescoring/input.txt");
+
+    // When
+    configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/rescoring/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/joshua.config b/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277..56efbfa 100644
--- a/src/test/resources/decoder/rescoring/joshua.config
+++ b/src/test/resources/decoder/rescoring/joshua.config
@@ -1,10 +1,10 @@
 rescore-forest = true
 rescore-forest-weight = 100
 
-lm = kenlm 5 false false 100 ../constrained/lm.gz
+lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
 
-tm = thrax pt 12 grammar.gz
-tm = thrax glue -1 glue-grammar
+tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
+tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
 
 mark-oovs = true
 
@@ -14,7 +14,7 @@ goalSymbol = GOAL
 #pruning config
 pop-limit = 100
 
-#output-format = %i %c %s
+output-format = %s ||| %f ||| %c
 
 #nbest config
 use_unique_nbest = true

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/lm.gz b/src/test/resources/decoder/rescoring/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/rescoring/lm.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/output.gold b/src/test/resources/decoder/rescoring/output.gold
index 5d6600d..1ea4237 100644
--- a/src/test/resources/decoder/rescoring/output.gold
+++ b/src/test/resources/decoder/rescoring/output.gold
@@ -1,12 +1,12 @@
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/test.sh b/src/test/resources/decoder/rescoring/test.sh
deleted file mode 100755
index 58f2d2d..0000000
--- a/src/test/resources/decoder/rescoring/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c joshua.config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-	rm -f diff log output 
-	exit 0
-else
-	exit 1
-fi


[13/15] incubator-joshua git commit: Fixed failing unit test: Added missing KenLM guard.

Posted by mj...@apache.org.
Fixed failing unit test: Added missing KenLM guard.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5d697489
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5d697489
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5d697489

Branch: refs/heads/7
Commit: 5d69748957378e99377765d5ce20daaf207eaa64
Parents: 7fc205f
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 14:40:46 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:40:46 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/SourceAnnotationsTest.java    | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5d697489/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index 000ba7e..ce09506 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -23,6 +23,7 @@ import static org.testng.Assert.assertEquals;
 
 import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.Test;
 
@@ -55,13 +56,15 @@ public class SourceAnnotationsTest {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
     joshuaConfig.source_annotations = sourceAnnotations;
-    decoder = new Decoder(joshuaConfig, "");
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
   }
 
   @AfterMethod
   public void tearDown() throws Exception {
-    decoder.cleanUp();
-    decoder = null;
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
   }
 
 }


[05/15] incubator-joshua git commit: Removed resources of unused regression test decoder/segment-oovs

Posted by mj...@apache.org.
Removed resources of unused regression test decoder/segment-oovs


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d9f34aa2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d9f34aa2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d9f34aa2

Branch: refs/heads/7
Commit: d9f34aa24b2eb884947a7c0113819cfdb0534559
Parents: 11b87c3
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 12:00:04 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 src/test/resources/decoder/segment-oovs/config  | 41 ----------
 .../resources/decoder/segment-oovs/input.txt    |  1 -
 .../decoder/segment-oovs/output.expected        | 82 --------------------
 src/test/resources/decoder/segment-oovs/test.sh | 31 --------
 4 files changed, 155 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9f34aa2/src/test/resources/decoder/segment-oovs/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/config b/src/test/resources/decoder/segment-oovs/config
deleted file mode 100644
index 0541bee..0000000
--- a/src/test/resources/decoder/segment-oovs/config
+++ /dev/null
@@ -1,41 +0,0 @@
-tm = thrax pt 1 ../../lattice/grammar.test
-tm = thrax glue -1 ../../lattice/glue-grammar
-
-#lm config
-lm = kenlm 3 false false 100 ../../lattice/test.lm
-
-#tm config
-default_non_terminal=X
-goalSymbol=GOAL
-
-#pruning config
-pop-limit = 100
-
-#nbest config
-use_unique_nbest = true 
-include-align-index = false
-top_n = 300
-
-# this shouldn't apply to the lattice
-maxlen = 1
-
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = SourcePath
-
-###### model weights
-#lm order weight
-lm_0 0.0
-
-#phrasemodel owner column(0-indexed) weight
-tm_pt_0 0.2
-tm_pt_1 0.3
-tm_pt_2 0.5
-
-tm_glue_0 0.0
-
-#wordpenalty weight
-WordPenalty -1.0
-SourcePath 1.0
-
-OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9f34aa2/src/test/resources/decoder/segment-oovs/input.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/input.txt b/src/test/resources/decoder/segment-oovs/input.txt
deleted file mode 100644
index 01f142f..0000000
--- a/src/test/resources/decoder/segment-oovs/input.txt
+++ /dev/null
@@ -1 +0,0 @@
-ein golfloch

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9f34aa2/src/test/resources/decoder/segment-oovs/output.expected
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/output.expected b/src/test/resources/decoder/segment-oovs/output.expected
deleted file mode 100644
index a2786c2..0000000
--- a/src/test/resources/decoder/segment-oovs/output.expected
+++ /dev/null
@@ -1,82 +0,0 @@
-0 ||| a g o lf ho le ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 ||| 3.406
-0 ||| a g o lf hole ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 ||| 2.972
-0 ||| a go lf ho le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 ||| 2.972
-0 ||| a go lf hole ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 SourcePath=0.000 ||| 2.537
-0 ||| a golf hole ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 SourcePath=0.000 ||| 2.103
-0 ||| ein_OOV g o lf ho le ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.526
-0 ||| a g_OOV o lf ho le ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.594
-0 ||| a g o lf ho ch_OOV ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.594
-0 ||| a g o_OOV lf ho le ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.594
-0 ||| ein_OOV g o lf hole ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.960
-0 ||| ein_OOV go lf ho le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -96.960
-0 ||| a go_OOV lf ho le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.028
-0 ||| a g_OOV o lf hole ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.028
-0 ||| a go lf ho ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.028
-0 ||| a g o lflo_OOV le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.028
-0 ||| a g o_OOV lf hole ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.028
-0 ||| ein_OOV go lf hole ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.394
-0 ||| a go_OOV lf hole ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.463
-0 ||| a g o lfloch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.463
-0 ||| a go lflo_OOV le ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.463
-0 ||| ein_OOV golf hole ||| lm_0=-302.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.829
-0 ||| a golf_OOV hole ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.897
-0 ||| a go lfloch_OOV ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.897
-0 ||| a golf loch_OOV ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=2.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-100.000 ||| -97.897
-0 ||| a golfloch_OOV ||| lm_0=-103.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=0.000 OOVPenalty=-100.000 ||| -98.331
-0 ||| ein_OOV g o lf ho ch_OOV ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.526
-0 ||| ein_OOV g o_OOV lf ho le ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.526
-0 ||| ein_OOV g_OOV o lf ho le ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.526
-0 ||| a g_OOV o lf ho ch_OOV ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.594
-0 ||| a g_OOV o_OOV lf ho le ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.594
-0 ||| a g o_OOV lf ho ch_OOV ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.594
-0 ||| ein_OOV g o lflo_OOV le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.960
-0 ||| ein_OOV go lf ho ch_OOV ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.960
-0 ||| ein_OOV go_OOV lf ho le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.960
-0 ||| ein_OOV g o_OOV lf hole ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.960
-0 ||| ein_OOV g_OOV o lf hole ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -196.960
-0 ||| a g_OOV o lflo_OOV le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.028
-0 ||| a g o lflo_OOV ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.028
-0 ||| a g o_OOV lflo_OOV le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.028
-0 ||| a go_OOV lf ho ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.028
-0 ||| a g_OOV o_OOV lf hole ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.028
-0 ||| ein_OOV go_OOV lf hole ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.394
-0 ||| ein_OOV go lflo_OOV le ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.394
-0 ||| ein_OOV g o lfloch_OOV ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.394
-0 ||| a g_OOV o lfloch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.463
-0 ||| a go lflo_OOV ch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.463
-0 ||| a g o_OOV lfloch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.463
-0 ||| a go_OOV lflo_OOV le ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.463
-0 ||| ein_OOV go lfloch_OOV ||| lm_0=-302.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.829
-0 ||| ein_OOV golf loch_OOV ||| lm_0=-302.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.829
-0 ||| ein_OOV golf_OOV hole ||| lm_0=-302.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.829
-0 ||| a golf_OOV loch_OOV ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.897
-0 ||| a go_OOV lfloch_OOV ||| lm_0=-203.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-200.000 ||| -197.897
-0 ||| ein_OOV golfloch_OOV ||| lm_0=-202.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=2.000 WordPenalty=-1.737 SourcePath=0.000 OOVPenalty=-200.000 ||| -198.263
-0 ||| ein_OOV g_OOV o_OOV lf ho le ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.526
-0 ||| ein_OOV g o_OOV lf ho ch_OOV ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.526
-0 ||| ein_OOV g_OOV o lf ho ch_OOV ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.526
-0 ||| a g_OOV o_OOV lf ho ch_OOV ||| lm_0=-503.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.594
-0 ||| ein_OOV g o lflo_OOV ch_OOV ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.960
-0 ||| ein_OOV g_OOV o lflo_OOV le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.960
-0 ||| ein_OOV g_OOV o_OOV lf hole ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.960
-0 ||| ein_OOV go_OOV lf ho ch_OOV ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.960
-0 ||| ein_OOV g o_OOV lflo_OOV le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -296.960
-0 ||| a g_OOV o lflo_OOV ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.028
-0 ||| a g_OOV o_OOV lflo_OOV le ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.028
-0 ||| a g o_OOV lflo_OOV ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.028
-0 ||| ein_OOV g_OOV o lfloch_OOV ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.394
-0 ||| ein_OOV go lflo_OOV ch_OOV ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.394
-0 ||| ein_OOV g o_OOV lfloch_OOV ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.394
-0 ||| ein_OOV go_OOV lflo_OOV le ||| lm_0=-402.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.394
-0 ||| a go_OOV lflo_OOV ch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.463
-0 ||| a g_OOV o_OOV lfloch_OOV ||| lm_0=-303.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.463
-0 ||| ein_OOV golf_OOV loch_OOV ||| lm_0=-302.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.829
-0 ||| ein_OOV go_OOV lfloch_OOV ||| lm_0=-302.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=3.000 WordPenalty=-2.171 SourcePath=0.000 OOVPenalty=-300.000 ||| -297.829
-0 ||| ein_OOV g_OOV o_OOV lf ho ch_OOV ||| lm_0=-602.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.474 SourcePath=0.000 OOVPenalty=-400.000 ||| -396.526
-0 ||| ein_OOV g_OOV o_OOV lflo_OOV le ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-400.000 ||| -396.960
-0 ||| ein_OOV g_OOV o lflo_OOV ch_OOV ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-400.000 ||| -396.960
-0 ||| ein_OOV g o_OOV lflo_OOV ch_OOV ||| lm_0=-502.125 tm_pt_0=-0.000 tm_pt_1=-0.000 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-400.000 ||| -396.960
-0 ||| a g_OOV o_OOV lflo_OOV ch_OOV ||| lm_0=-403.040 tm_pt_0=-0.000 tm_pt_1=-0.228 tm_pt_2=-0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-400.000 ||| -397.028
-0 ||| ein_OOV go_OOV lflo_OOV ch_OOV ||| lm_0=-402.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-400.000 ||| -397.394
-0 ||| ein_OOV g_OOV o_OOV lfloch_OOV ||| lm_0=-402.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=4.000 WordPenalty=-2.606 SourcePath=0.000 OOVPenalty=-400.000 ||| -397.394
-0 ||| ein_OOV g_OOV o_OOV lflo_OOV ch_OOV ||| lm_0=-502.125 tm_pt_0=0.000 tm_pt_1=0.000 tm_pt_2=0.000 tm_glue_0=5.000 WordPenalty=-3.040 SourcePath=0.000 OOVPenalty=-500.000 ||| -496.960

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9f34aa2/src/test/resources/decoder/segment-oovs/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/test.sh b/src/test/resources/decoder/segment-oovs/test.sh
deleted file mode 100644
index d941de4..0000000
--- a/src/test/resources/decoder/segment-oovs/test.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/sh
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 500m -c config -maxlen 10 -segment-oovs > output 2> log
-
-if [[ $? -ne 0 ]]; then
-	exit 1
-fi
-
-diff -u output output.expected > diff
-
-if [[ $? -eq 0 ]]; then
-  rm -f output log diff
-  exit 0
-else
-  exit 1
-fi


[10/15] incubator-joshua git commit: Moved regression test decoder/tree-output to unit test. Regenerated gold output and cleaned up the directory.

Posted by mj...@apache.org.
Moved regression test decoder/tree-output to unit test. Regenerated gold output and cleaned up the directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/99b475b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/99b475b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/99b475b3

Branch: refs/heads/7
Commit: 99b475b3ffe08f2bfcc8105e906293dbd9d1c12f
Parents: 17ecec1
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 13:00:38 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/cky/TreeOutputTest.java      | 65 ++++++++++++++++++++
 .../decoder/tree-output/fragment-map.txt        |  2 -
 .../resources/decoder/tree-output/joshua.config |  8 +--
 .../resources/decoder/tree-output/output.gold   | 10 +--
 src/test/resources/decoder/tree-output/test.sh  | 30 ---------
 5 files changed, 74 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/99b475b3/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java b/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
new file mode 100644
index 0000000..f5e1005
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class TreeOutputTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithTreeOutput_thenOutputCorrect() throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/tree-output/input");
+
+    // When
+    configureDecoder("src/test/resources/decoder/tree-output/joshua.config");
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/tree-output/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/99b475b3/src/test/resources/decoder/tree-output/fragment-map.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/tree-output/fragment-map.txt b/src/test/resources/decoder/tree-output/fragment-map.txt
deleted file mode 100644
index daed866..0000000
--- a/src/test/resources/decoder/tree-output/fragment-map.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-(S (NP (PRP "I")) (VP (VB "am"))) ||| I AM
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/99b475b3/src/test/resources/decoder/tree-output/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/tree-output/joshua.config b/src/test/resources/decoder/tree-output/joshua.config
index d68192d..7375cad 100644
--- a/src/test/resources/decoder/tree-output/joshua.config
+++ b/src/test/resources/decoder/tree-output/joshua.config
@@ -1,7 +1,7 @@
-lm = kenlm 5 false false 100 lm.gz
+lm = kenlm 5 false false 100 src/test/resources/decoder/tree-output/lm.gz
 
-tm = thrax pt 12 grammar.gz
-tm = thrax glue -1 glue-grammar
+tm = thrax pt 12 src/test/resources/decoder/tree-output/grammar.gz
+tm = thrax glue -1 src/test/resources/decoder/tree-output/glue-grammar
 
 mark_oovs = false
 
@@ -11,7 +11,7 @@ goalSymbol = GOAL
 #pruning config
 pop-limit = 100
 
-output-format = %i %t
+output-format = %t
 
 #nbest config
 use_unique_nbest = true

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/99b475b3/src/test/resources/decoder/tree-output/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/tree-output/output.gold b/src/test/resources/decoder/tree-output/output.gold
index 68a1610..abbeb79 100644
--- a/src/test/resources/decoder/tree-output/output.gold
+++ b/src/test/resources/decoder/tree-output/output.gold
@@ -1,5 +1,5 @@
-0 (GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
-1 ()
-2 (GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
-3 (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
-4 (GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)
+(GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
+()
+(GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
+(GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
+(GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/99b475b3/src/test/resources/decoder/tree-output/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/tree-output/test.sh b/src/test/resources/decoder/tree-output/test.sh
deleted file mode 100755
index ea2d126..0000000
--- a/src/test/resources/decoder/tree-output/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c joshua.config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-	rm -f diff log output output.scores
-	exit 0
-else
-	exit 1
-fi


[12/15] incubator-joshua git commit: Fixed failing unit test. Added null check for decoder.

Posted by mj...@apache.org.
Fixed failing unit test. Added null check for decoder.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/7fc205fd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/7fc205fd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/7fc205fd

Branch: refs/heads/7
Commit: 7fc205fd3ce3dc68639ef0cdd7de784acf62550a
Parents: b09081a
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 14:34:02 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:34:02 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/cky/UniqueHypothesesTest.java    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fc205fd/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java b/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 8dc0e56..bf65c5e 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@ -65,8 +65,10 @@ public class UniqueHypothesesTest {
 
   @AfterMethod
   public void tearDown() throws Exception {
-    decoder.cleanUp();
-    decoder = null;
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
   }
 
 }


[08/15] incubator-joshua git commit: Removed resources of unused regression test decoder/phrase/include-align-index.

Posted by mj...@apache.org.
Removed resources of unused regression test decoder/phrase/include-align-index.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d9bb3e5a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d9bb3e5a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d9bb3e5a

Branch: refs/heads/7
Commit: d9bb3e5a841998c0bb8b87018c06ef28b46176c4
Parents: 99b475b
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 13:03:04 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../decoder/phrase/include-align-index/README   |   2 --
 .../decoder/phrase/include-align-index/config   |  29 -----------------
 .../phrase/include-align-index/corpus.es        |   1 -
 .../decoder/phrase/include-align-index/lm.1.gz  | Bin 2235 -> 0 bytes
 .../phrase/include-align-index/output.gold      |   1 -
 .../phrase/include-align-index/rules.1.gz       | Bin 2998042 -> 0 bytes
 .../decoder/phrase/include-align-index/test.sh  |  32 -------------------
 7 files changed, 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/README b/src/test/resources/decoder/phrase/include-align-index/README
deleted file mode 100644
index d0c0813..0000000
--- a/src/test/resources/decoder/phrase/include-align-index/README
+++ /dev/null
@@ -1,2 +0,0 @@
-Added non-functioning example that will test outputting phrase alignments if
-that ability is ever restored.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/config b/src/test/resources/decoder/phrase/include-align-index/config
deleted file mode 100644
index f30014d..0000000
--- a/src/test/resources/decoder/phrase/include-align-index/config
+++ /dev/null
@@ -1,29 +0,0 @@
-tm = moses -owner pt -maxspan 0 -path rules.1.gz -max-source-len 5
-feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
-
-search = stack
-
-mark-oovs = false
-pop-limit = 10
-top-n = 1
-
-output-format = %i ||| %s ||| %f ||| %c
-
-include-align-index = true
-reordering-limit = 6
-
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
-
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-tm_pt_0 0.0370068
-tm_pt_1 0.0495759
-tm_pt_2 0.196742
-tm_pt_3 0.0745423
-lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/corpus.es b/src/test/resources/decoder/phrase/include-align-index/corpus.es
deleted file mode 100644
index 6e255f9..0000000
--- a/src/test/resources/decoder/phrase/include-align-index/corpus.es
+++ /dev/null
@@ -1 +0,0 @@
-una estrategia republicana para obstaculizar la reelecci�n de Obama 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/lm.1.gz b/src/test/resources/decoder/phrase/include-align-index/lm.1.gz
deleted file mode 100644
index 3f4c453..0000000
Binary files a/src/test/resources/decoder/phrase/include-align-index/lm.1.gz and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/output.gold b/src/test/resources/decoder/phrase/include-align-index/output.gold
deleted file mode 100644
index 509a3de..0000000
--- a/src/test/resources/decoder/phrase/include-align-index/output.gold
+++ /dev/null
@@ -1 +0,0 @@
-0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/rules.1.gz b/src/test/resources/decoder/phrase/include-align-index/rules.1.gz
deleted file mode 100644
index 14466e9..0000000
Binary files a/src/test/resources/decoder/phrase/include-align-index/rules.1.gz and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9bb3e5a/src/test/resources/decoder/phrase/include-align-index/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/test.sh b/src/test/resources/decoder/phrase/include-align-index/test.sh
deleted file mode 100644
index 7703aa4..0000000
--- a/src/test/resources/decoder/phrase/include-align-index/test.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff output log
-  exit 0
-else
-  exit 1
-fi
-
-


[14/15] incubator-joshua git commit: Merge branch 'master' into 7

Posted by mj...@apache.org.
Merge branch 'master' into 7

Moved into proper location under joshua-core

# Conflicts:
#	joshua-core/src/test/resources/decoder/num_translation_options/joshua.config.packed
#	joshua-core/src/test/resources/decoder/num_translation_options/test.sh
#	joshua-core/src/test/resources/decoder/oov-list/config
#	joshua-core/src/test/resources/decoder/oov-list/test.sh
#	joshua-core/src/test/resources/decoder/phrase/constrained/test.sh
#	joshua-core/src/test/resources/decoder/phrase/decode/corpus.es
#	joshua-core/src/test/resources/decoder/phrase/decode/lm.1.gz
#	joshua-core/src/test/resources/decoder/phrase/decode/rules.1.gz
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/README
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/config
#	joshua-core/src/test/resources/decoder/phrase/include-align-index/output.gold
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/README
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
#	joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
#	joshua-core/src/test/resources/decoder/rescoring/test.sh
#	joshua-core/src/test/resources/decoder/segment-oovs/config
#	joshua-core/src/test/resources/decoder/segment-oovs/input.txt
#	joshua-core/src/test/resources/decoder/segment-oovs/output.expected
#	joshua-core/src/test/resources/decoder/segment-oovs/test.sh
#	joshua-core/src/test/resources/decoder/source-annotations/test.sh
#	joshua-core/src/test/resources/decoder/target-bigram/out.gold
#	joshua-core/src/test/resources/decoder/target-bigram/test.sh
#	joshua-core/src/test/resources/decoder/too-long/output.gold
#	joshua-core/src/test/resources/decoder/too-long/test.sh
#	joshua-core/src/test/resources/decoder/tree-output/fragment-map.txt
#	joshua-core/src/test/resources/decoder/tree-output/test.sh
#	src/test/resources/decoder/num_translation_options/joshua-packed.config
#	src/test/resources/decoder/num_translation_options/joshua.config.packed
#	src/test/resources/decoder/oov-list/config
#	src/test/resources/decoder/oov-list/joshua.config


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/93055fd5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/93055fd5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/93055fd5

Branch: refs/heads/7
Commit: 93055fd5692d068b3932f6ae20480f41d9fc8b91
Parents: 5f46639 5d69748
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 15 15:43:20 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 15 15:43:20 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/cky/NAryTest.java |  18 ++++
 .../decoder/cky/NumTranslationOptionsTest.java  | 106 +++++++++++++++++++
 .../apache/joshua/decoder/cky/OOVListTest.java  |  66 ++++++++++++
 .../joshua/decoder/cky/RescoringTest.java       |  67 ++++++++++++
 .../decoder/cky/SourceAnnotationsTest.java      |  70 ++++++++++++
 .../apache/joshua/decoder/cky/TargetBigram.java |  75 +++++++++++++
 .../apache/joshua/decoder/cky/TooLongTest.java  |  86 +++++++++++++++
 .../joshua/decoder/cky/TreeOutputTest.java      |  65 ++++++++++++
 .../decoder/cky/UniqueHypothesesTest.java       |  74 +++++++++++++
 .../joshua-packed.config                        |  30 ++++++
 .../num_translation_options/joshua.config       |   6 +-
 .../output-no-dot-chart.gold                    |   4 +
 .../num_translation_options/output-packed.gold  |   4 +
 .../decoder/num_translation_options/output.gold |   8 --
 .../resources/decoder/oov-list/joshua.config    |  31 ++++++
 .../src/test/resources/decoder/oov-list/lm.gz   | Bin 0 -> 2466496 bytes
 .../test/resources/decoder/oov-list/output.gold |   6 +-
 .../phrase/unique-hypotheses/joshua.config      |   4 +-
 .../decoder/phrase/unique-hypotheses/lm.1.gz    | Bin 17 -> 2235 bytes
 .../decoder/phrase/unique-hypotheses/rules.1.gz | Bin 20 -> 2998042 bytes
 .../resources/decoder/rescoring/joshua.config   |   8 +-
 .../src/test/resources/decoder/rescoring/lm.gz  | Bin 0 -> 2466496 bytes
 .../resources/decoder/rescoring/output.gold     |  24 ++---
 .../decoder/source-annotations/joshua.config    |   8 +-
 .../resources/decoder/tree-output/joshua.config |   8 +-
 .../resources/decoder/tree-output/output.gold   |  10 +-
 26 files changed, 733 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
index 5440407,0000000..6a2071a
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
@@@ -1,64 -1,0 +1,82 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
 +package org.apache.joshua.decoder.cky;
 +
 +import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
 +import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
 +import static org.testng.Assert.assertEquals;
 +
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.Test;
 +
 +public class NAryTest {
 +  private JoshuaConfiguration joshuaConfig;
 +  private Decoder decoder;
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    if (decoder != null) {
 +      decoder.cleanUp();
 +      decoder = null;
 +    }
 +  }
 +
 +  @Test
 +  public void givenInput_whenNAryDecoding_thenScoreAndTranslationCorrect() throws Exception {
 +    // Given
 +    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/input.txt");
 +
 +    // When
 +    configureDecoder("src/test/resources/decoder/n-ary/joshua.config");
 +    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
 +
 +    // Then
 +    List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/output.gold");
 +    assertEquals(decodedStrings, goldStrings);
 +  }
 +
 +  public void configureDecoder(String pathToConfig) throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(pathToConfig);
 +    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
index 0000000,0000000..ec6f02d
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@@ -1,0 -1,0 +1,106 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Tests that num_translation_options is enforced for hierarchical decoders
++ */
++public class NumTranslationOptionsTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", true);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3AndNoDotChart_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", false);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithNumTranslationOptions3AndPacked_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/num_translation_options/joshua-packed.config",
++        true);
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/num_translation_options/output-packed.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig, boolean useDotChart) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    joshuaConfig.use_dot_chart = useDotChart;
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
index 0000000,0000000..29ec23e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@@ -1,0 -1,0 +1,66 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class OOVListTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/oov-list/input.txt");
++
++    // When
++    configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/oov-list/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
index 0000000,0000000..a12a47b
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@@ -1,0 -1,0 +1,67 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class RescoringTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
++      throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile(
++        "src/test/resources/decoder/rescoring/input.txt");
++
++    // When
++    configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/rescoring/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index 0000000,0000000..ce09506
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@@ -1,0 -1,0 +1,70 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class SourceAnnotationsTest {
++
++  private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
++  private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
++  private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
++
++  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp(false);
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp(true);
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
++  }
++
++  public void setUp(boolean sourceAnnotations) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
++    joshuaConfig.source_annotations = sourceAnnotations;
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
index 0000000,0000000..bce34ca
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
@@@ -1,0 -1,0 +1,75 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TargetBigram {
++
++  private static final String INPUT = "this is a test";
++  private static final String GOLD_TOPN2 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_this=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_is_UNK=1.000 TargetBigram_this_is=1.000 ||| 0.000";
++  private static final String GOLD_TOPN3_THRESHOLD20 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_UNK=1.000 TargetBigram_UNK_a=1.000 TargetBigram_a_UNK=1.000 ||| 0.000";
++  private static final String GOLD_THRESHOLD10 = "this is a test ||| tm_glue_0=4.000 TargetBigram_<s>_UNK=1.000 TargetBigram_UNK_</s>=1.000 TargetBigram_UNK_is=1.000 TargetBigram_a_UNK=1.000 TargetBigram_is_a=1.000 ||| 0.000";
++
++  private static final String VOCAB_PATH = "src/test/resources/decoder/target-bigram/vocab";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 2");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_TOPN2);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -top-n 3 -threshold 20");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_TOPN3_THRESHOLD20);
++  }
++
++  @Test
++  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect2() throws Exception {
++    setUp("TargetBigram -vocab " + VOCAB_PATH + " -threshold 10");
++    String output = translate(INPUT, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD_THRESHOLD10);
++  }
++
++  public void setUp(String featureFunction) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.features.add(featureFunction);
++    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++    decoder = new Decoder(joshuaConfig, "");
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    decoder.cleanUp();
++    decoder = null;
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
index 0000000,0000000..0d4f7ce
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@@ -1,0 -1,0 +1,86 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.translate;
++import static org.testng.Assert.assertEquals;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that the decoder trims inputs when and only when it should
++ */
++public class TooLongTest {
++  private static final String INPUT1 = "as kingfishers draw fire";
++  private static final String GOLD1 = "as kingfishers ||| tm_glue_0=2.000 ||| 0.000";
++  private static final String INPUT2 = "dragonflies draw flame";
++  private static final String GOLD2 = "dragonflies ||| tm_glue_0=1.000 ||| 0.000";
++  private static final String INPUT3 = "(((as tumbled over rim in roundy wells stones ring";
++  private static final String GOLD3 = "(((as tumbled over rim in roundy wells stones ||| tm_glue_0=8.000 ||| 0.000";
++  private static final String INPUT4 = "(((like each tucked string tells";
++  private static final String GOLD4 = "|||  ||| 0.000";
++
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @Test
++  public void givenInput_whenMaxLen2_thenOutputCorrect() throws Exception {
++    setUp(2, false);
++    String output = translate(INPUT1, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD1);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen1AndLatticeDecoding_thenOutputCorrect() throws Exception {
++    setUp(1, true);
++    String output = translate(INPUT2, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD2);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen8_thenOutputCorrect() throws Exception {
++    setUp(8, false);
++    String output = translate(INPUT3, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD3);
++  }
++
++  @Test
++  public void givenInput_whenMaxLen3AndLatticeDecoding_thenOutputCorrect() throws Exception {
++    setUp(3, true);
++    String output = translate(INPUT4, decoder, joshuaConfig);
++    assertEquals(output.trim(), GOLD4);
++  }
++
++  public void setUp(int maxLen, boolean latticeDecoding) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
++    joshuaConfig.maxlen = maxLen;
++    joshuaConfig.lattice_decoding = latticeDecoding;
++    decoder = new Decoder(joshuaConfig, "");
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    decoder.cleanUp();
++    decoder = null;
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
index 0000000,0000000..f5e1005
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
@@@ -1,0 -1,0 +1,65 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
++import static org.testng.Assert.assertEquals;
++
++import java.util.List;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++public class TreeOutputTest {
++  private JoshuaConfiguration joshuaConfig;
++  private Decoder decoder;
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++  @Test
++  public void givenInput_whenDecodingWithTreeOutput_thenOutputCorrect() throws Exception {
++    // Given
++    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/tree-output/input");
++
++    // When
++    configureDecoder("src/test/resources/decoder/tree-output/joshua.config");
++    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
++
++    // Then
++    List<String> goldStrings = loadStringsFromFile(
++        "src/test/resources/decoder/tree-output/output.gold");
++    assertEquals(decodedStrings, goldStrings);
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 0000000,0000000..bf65c5e
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@@ -1,0 -1,0 +1,74 @@@
++/*
++ * Licensed to the Apache Software Foundation (ASF) under one
++ * or more contributor license agreements.  See the NOTICE file
++ * distributed with this work for additional information
++ * regarding copyright ownership.  The ASF licenses this file
++ * to you under the Apache License, Version 2.0 (the
++ * "License"); you may not use this file except in compliance
++ * with the License.  You may obtain a copy of the License at
++ *
++ *  http://www.apache.org/licenses/LICENSE-2.0
++ *
++ * Unless required by applicable law or agreed to in writing,
++ * software distributed under the License is distributed on an
++ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++ * KIND, either express or implied.  See the License for the
++ * specific language governing permissions and limitations
++ * under the License.
++ */
++package org.apache.joshua.decoder.cky;
++
++import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
++import static org.testng.Assert.assertEquals;
++
++import java.util.Arrays;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import org.apache.joshua.decoder.Decoder;
++import org.apache.joshua.decoder.JoshuaConfiguration;
++import org.apache.joshua.util.io.KenLmTestUtil;
++import org.testng.annotations.AfterMethod;
++import org.testng.annotations.Test;
++
++/**
++ * Ensures that derivations are unique for the phrase-based decoder.
++ */
++public class UniqueHypothesesTest {
++
++  public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
++
++  private JoshuaConfiguration joshuaConfig = null;
++  private Decoder decoder = null;
++
++  @Test
++  public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
++      throws Exception {
++    configureDecoder("src/test/resources/decoder/phrase/unique-hypotheses/joshua.config");
++    List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder,
++        joshuaConfig);
++
++    assertEquals(decodedStrings.size(), 300);
++
++    // if all strings are unique than the set should have the same size as the
++    // list
++    Set<String> uniqueDecodedStrings = new HashSet<>(decodedStrings);
++    assertEquals(decodedStrings.size(), uniqueDecodedStrings.size());
++  }
++
++  public void configureDecoder(String pathToConfig) throws Exception {
++    joshuaConfig = new JoshuaConfiguration();
++    joshuaConfig.readConfigFile(pathToConfig);
++    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
++  }
++
++  @AfterMethod
++  public void tearDown() throws Exception {
++    if (decoder != null) {
++      decoder.cleanUp();
++      decoder = null;
++    }
++  }
++
++}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
index 0000000,0000000..681c4d9
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua-packed.config
@@@ -1,0 -1,0 +1,30 @@@
++num_translation_options = 3
++
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
++
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.packed
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
++
++mark_oovs = false
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++output-format = %c ||| %s ||| %f
++
++#nbest config
++use_unique_nbest = true
++top_n = 5
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_pt_0 1
++tm_glue_0 1
++WordPenalty 1
++OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
index e37855c,0000000..88b0290
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/joshua.config
@@@ -1,30 -1,0 +1,30 @@@
 +num_translation_options = 3
 +
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
 +
 +mark_oovs = false
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
 +output-format = %c ||| %s ||| %f
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 5
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 1
 +tm_glue_0 1
 +WordPenalty 1
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
index 0000000,0000000..686122c
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output-packed.gold
@@@ -1,0 -1,0 +1,4 @@@
++-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
++-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
++-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
++-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/num_translation_options/output.gold
index 4203822,0000000..686122c
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
+++ b/joshua-core/src/test/resources/decoder/num_translation_options/output.gold
@@@ -1,12 -1,0 +1,4 @@@
 +-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
 +-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
 +-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
 +-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
- -19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
- -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
- -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
- -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/joshua.config
index 0000000,0000000..8809206
new file mode 100644
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/oov-list/joshua.config
@@@ -1,0 -1,0 +1,31 @@@
++lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
++
++tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
++tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
++
++mark_oovs = true
++
++default-non-terminal = X
++goalSymbol = GOAL
++
++#pruning config
++pop-limit = 100
++
++#nbest config
++use_unique_nbest = true
++use_tree_nbest = false
++top_n = 1
++
++oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
++
++output-format=%s ||| %f ||| %c
++
++feature-function = WordPenalty
++feature-function = OOVPenalty
++
++lm_0 1.2373676802179452
++
++tm_phrase_0 1
++tm_glue_0 1
++WordPenalty -3.6942747832593694
++OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/oov-list/output.gold
index d911c52,0000000..ee44a51
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/oov-list/output.gold
+++ b/joshua-core/src/test/resources/decoder/oov-list/output.gold
@@@ -1,3 -1,0 +1,3 @@@
- 0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
- 1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
- 2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
++Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
++i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
++goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
index c35b267,0000000..7cef08e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
+++ b/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
@@@ -1,23 -1,0 +1,23 @@@
- tm = moses pt 0 rules.1.gz
++tm = moses pt 0 src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
 +default-non-terminal = X
 +goal-symbol = GOAL
- lm = kenlm 5 true false 100 lm.1.gz
++lm = kenlm 5 true false 100 src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
 +mark-oovs = false
 +pop-limit = 100
 +top-n = 300
 +use-unique-nbest = true
 +output-format = %s
 +include-align-index = false
 +feature-function = OOVPenalty
 +feature-function = WordPenalty
 +feature_function = Distortion
 +feature_function = PhrasePenalty
 +lm_0 1.0
 +tm_pt_1 1.0
 +tm_pt_3 1.0
 +tm_pt_0 1.0
 +tm_pt_2 1.0
 +WordPenalty -2.844814
 +OOVPenalty 1.0
 +PhrasePenalty 1.0
 +Distortion 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
index 3655f03,0000000..3f4c453
mode 120000,000000..100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
index a6183d9,0000000..14466e9
mode 120000,000000..100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277,0000000..56efbfa
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/joshua.config
+++ b/joshua-core/src/test/resources/decoder/rescoring/joshua.config
@@@ -1,31 -1,0 +1,31 @@@
 +rescore-forest = true
 +rescore-forest-weight = 100
 +
- lm = kenlm 5 false false 100 ../constrained/lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
 +
 +mark-oovs = true
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
- #output-format = %i %c %s
++output-format = %s ||| %f ||| %c
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 2
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 1
 +tm_glue_0 1
 +WordPenalty -1
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/lm.gz
index 0000000,0000000..a26335e
new file mode 100644
Binary files differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/rescoring/output.gold
index 5d6600d,0000000..1ea4237
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/rescoring/output.gold
+++ b/joshua-core/src/test/resources/decoder/rescoring/output.gold
@@@ -1,12 -1,0 +1,12 @@@
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
- 0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
- 1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
- 1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
++the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
++the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96,0000000..788505e
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
+++ b/joshua-core/src/test/resources/decoder/source-annotations/joshua.config
@@@ -1,140 -1,0 +1,140 @@@
 +# This file is a template for the Joshua pipeline; variables enclosed
 +# in <angle-brackets> are substituted by the pipeline script as
 +# appropriate.  This file also serves to document Joshua's many
 +# parameters.
 +
 +# These are the grammar file specifications.  Joshua supports an
 +# arbitrary number of grammar files, each specified on its own line
 +# using the following format:
 +#
 +#   tm = TYPE OWNER LIMIT FILE
 +# 
 +# TYPE is "packed", "thrax", or "samt".  The latter denotes the format
 +# used in Zollmann and Venugopal's SAMT decoder
 +# (http://www.cs.cmu.edu/~zollmann/samt/).
 +# 
 +# OWNER is the "owner" of the rules in the grammar; this is used to
 +# determine which set of phrasal features apply to the grammar's
 +# rules.  Having different owners allows different features to be
 +# applied to different grammars, and for grammars to share features
 +# across files.
 +#
 +# LIMIT is the maximum input span permitted for the application of
 +# grammar rules found in the grammar file.  A value of -1 implies no limit.
 +#
 +# FILE is the grammar file (or directory when using packed grammars).
 +# The file can be compressed with gzip, which is determined by the
 +# presence or absence of a ".gz" file extension.
 +#
 +# By a convention defined by Chiang (2007), the grammars are split
 +# into two files: the main translation grammar containing all the
 +# learned translation rules, and a glue grammar which supports
 +# monotonic concatenation of hierarchical phrases. The glue grammar's
 +# main distinction from the regular grammar is that the span limit
 +# does not apply to it.  
 +
- tm = thrax pt 20 grammar
- tm = thrax glue -1 grammar.glue
++tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
++tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
 +
 +# This symbol is used over unknown words in the source language
 +
 +default-non-terminal = X
 +
 +# This is the goal nonterminal, used to determine when a complete
 +# parse is found.  It should correspond to the root-level rules in the
 +# glue grammar.
 +
 +goal-symbol = GOAL
 +
 +# Language model config.
 +
 +# Multiple language models are supported.  For each language model,
 +# create a line in the following format, 
 +#
 +# lm = TYPE 5 false false 100 FILE
 +#
 +# where the six fields correspond to the following values:
 +# - LM type: one of "kenlm", "berkeleylm", "javalm" (not recommended), or "none"
 +# - LM order: the N of the N-gram language model
 +# - whether to use left equivalent state (currently not supported)
 +# - whether to use right equivalent state (currently not supported)
 +# - the ceiling cost of any n-gram (currently ignored)
 +# - LM file: the location of the language model file
 +# You also need to add a weight for each language model below.
 +
- lm = kenlm 5 false false 100 lm.kenlm
++lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
 +
 +# The suffix _OOV is appended to unknown source-language words if this
 +# is set to true.
 +
 +mark-oovs = true
 +
 +# The pop-limit for decoding.  This determines how many hypotheses are
 +# considered over each span of the input.
 +
 +pop-limit = 100
 +
 +# How many hypotheses to output
 +
 +top-n = 1
 +
 +# Whether those hypotheses should be distinct strings
 +
 +use-unique-nbest = true
 +
 +# This is the default format of the ouput printed to STDOUT.  The variables that can be
 +# substituted are:
 +#
 +# %i: the sentence number (0-indexed)
 +# %s: the translated sentence
 +# %t: the derivation tree
 +# %f: the feature string
 +# %c: the model cost
 +
- output-format = %i ||| %s ||| %f ||| %c
++output-format = %s ||| %f ||| %c
 +
 +# When printing the trees (%t in 'output-format'), this controls whether the alignments
 +# are also printed.
 +
 +include-align-index = false
 +
 +
 +## Feature functions and weights.
 +#
 +# This is the location of the file containing model weights.
 +#
 +
 +
 +# For each langage model line listed above, create a weight in the
 +# following format: the keyword "lm", a 0-based index, and the weight.
 +# lm_INDEX WEIGHT
 +
 +lm_0 1.0
 +
 +# The phrasal weights correspond to weights stored with each of the
 +# grammar rules.  The format is
 +#
 +#   tm_OWNER_COLUMN WEIGHT
 +#
 +# where COLUMN denotes the 0-based order of the parameter in the
 +# grammar file and WEIGHT is the corresponding weight.  In the future,
 +# we plan to add a sparse feature representation which will simplify
 +# this.
 +
 +tm_pt_0 0.049141264495762726
 +tm_glue_0 0.1663815584150378
 +
 +# The wordpenalty feature counts the number of words in each hypothesis.
 +
 +WordPenalty -1.5244636836685694
 +
 +# This feature counts the number of unknown words in the hypothesis.
 +
 +OOVPenalty 1
 +
 +# This feature weights paths through an input lattice.  It is only activated
 +# when decoding lattices.
 +
 +# And these are the feature functions to activate.
 +feature_function = OOVPenalty
 +feature_function = WordPenalty

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/joshua.config
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/joshua.config
index d68192d,0000000..7375cad
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/joshua.config
+++ b/joshua-core/src/test/resources/decoder/tree-output/joshua.config
@@@ -1,45 -1,0 +1,45 @@@
- lm = kenlm 5 false false 100 lm.gz
++lm = kenlm 5 false false 100 src/test/resources/decoder/tree-output/lm.gz
 +
- tm = thrax pt 12 grammar.gz
- tm = thrax glue -1 glue-grammar
++tm = thrax pt 12 src/test/resources/decoder/tree-output/grammar.gz
++tm = thrax glue -1 src/test/resources/decoder/tree-output/glue-grammar
 +
 +mark_oovs = false
 +
 +default-non-terminal = X
 +goalSymbol = GOAL
 +
 +#pruning config
 +pop-limit = 100
 +
- output-format = %i %t
++output-format = %t
 +
 +#nbest config
 +use_unique_nbest = true
 +top_n = 1
 +
 +feature-function = WordPenalty
 +feature-function = OOVPenalty
 +
 +
 +lm_0 1.2373676802179452
 +
 +tm_pt_0 -2.4497429277910214
 +tm_pt_1 0.7224581556224123
 +tm_pt_2 -0.31689069155153504
 +tm_pt_3 0.33861043967238036
 +tm_pt_4 0.03553113401320236
 +tm_pt_5 0.19138972284064748
 +tm_pt_6 0.3417994095521415
 +tm_pt_7 -0.9936312455671283
 +tm_pt_8 0.9070737587091975
 +tm_pt_9 0.8202511858619419
 +tm_pt_10 0.2593091306160006
 +tm_pt_11 0.25597137004462134
 +tm_pt_12 0.3538894647790496
 +tm_pt_13 -0.36212061186692646
 +tm_pt_14 -0.32923261148678096
 +tm_pt_15 0.5524863522177359
 +tm_pt_16 0.23451595442127693
 +tm_glue_0 1
 +WordPenalty -3.6942747832593694
 +OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/93055fd5/joshua-core/src/test/resources/decoder/tree-output/output.gold
----------------------------------------------------------------------
diff --cc joshua-core/src/test/resources/decoder/tree-output/output.gold
index 68a1610,0000000..abbeb79
mode 100644,000000..100644
--- a/joshua-core/src/test/resources/decoder/tree-output/output.gold
+++ b/joshua-core/src/test/resources/decoder/tree-output/output.gold
@@@ -1,5 -1,0 +1,5 @@@
- 0 (GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
- 1 ()
- 2 (GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
- 3 (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
- 4 (GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)
++(GOAL{0-5} (GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (A{1-3} (B{1-2} foo) (C{2-3} bar))) (D{3-4} baz)) </s>)
++()
++(GOAL{0-3} (GOAL{0-2} (GOAL{0-1} <s>) (D{1-2} baz)) </s>)
++(GOAL{0-4} (GOAL{0-3} (GOAL{0-1} <s>) (S{1-3} I AM)) </s>)
++(GOAL{0-4} <s> (NP\DT{1-3} right (NN{2-3} xslot)) </s>)


[11/15] incubator-joshua git commit: Added missing license texts.

Posted by mj...@apache.org.
Added missing license texts.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b09081ae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b09081ae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b09081ae

Branch: refs/heads/7
Commit: b09081aede83f3fbe895bf0f4e4b79577a4c70a6
Parents: b9e6ffa
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 14:23:42 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:23:42 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/cky/NAryTest.java   | 18 ++++++++++++++++++
 .../decoder/cky/NumTranslationOptionsTest.java    | 18 ++++++++++++++++++
 .../apache/joshua/decoder/cky/TooLongTest.java    | 18 ++++++++++++++++++
 3 files changed, 54 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b09081ae/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java b/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
index 31a347a..e96a904 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.joshua.decoder.cky;
 
 import static org.apache.joshua.decoder.cky.TestUtil.decodeList;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b09081ae/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java b/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
index bfb13e1..ec6f02d 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.joshua.decoder.cky;
 
 import static org.apache.joshua.decoder.cky.TestUtil.decodeList;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b09081ae/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java b/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
index b7c22f6..0d4f7ce 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.joshua.decoder.cky;
 
 import static org.apache.joshua.decoder.cky.TestUtil.translate;


[09/15] incubator-joshua git commit: Moved regression test decoder/too-long to unit test. Removed resources as they are no longer needed.

Posted by mj...@apache.org.
Moved regression test decoder/too-long to unit test. Removed resources as they are no longer needed.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/17ecec13
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/17ecec13
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/17ecec13

Branch: refs/heads/7
Commit: 17ecec13e942e08855074611df5c9d36cb5b57d7
Parents: 32f2753
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 12:53:58 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/TooLongTest.java  | 68 ++++++++++++++++++++
 src/test/resources/decoder/too-long/output.gold |  4 --
 src/test/resources/decoder/too-long/test.sh     | 36 -----------
 3 files changed, 68 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/17ecec13/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java b/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
new file mode 100644
index 0000000..b7c22f6
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@ -0,0 +1,68 @@
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Ensures that the decoder trims inputs when and only when it should
+ */
+public class TooLongTest {
+  private static final String INPUT1 = "as kingfishers draw fire";
+  private static final String GOLD1 = "as kingfishers ||| tm_glue_0=2.000 ||| 0.000";
+  private static final String INPUT2 = "dragonflies draw flame";
+  private static final String GOLD2 = "dragonflies ||| tm_glue_0=1.000 ||| 0.000";
+  private static final String INPUT3 = "(((as tumbled over rim in roundy wells stones ring";
+  private static final String GOLD3 = "(((as tumbled over rim in roundy wells stones ||| tm_glue_0=8.000 ||| 0.000";
+  private static final String INPUT4 = "(((like each tucked string tells";
+  private static final String GOLD4 = "|||  ||| 0.000";
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @Test
+  public void givenInput_whenMaxLen2_thenOutputCorrect() throws Exception {
+    setUp(2, false);
+    String output = translate(INPUT1, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD1);
+  }
+
+  @Test
+  public void givenInput_whenMaxLen1AndLatticeDecoding_thenOutputCorrect() throws Exception {
+    setUp(1, true);
+    String output = translate(INPUT2, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD2);
+  }
+
+  @Test
+  public void givenInput_whenMaxLen8_thenOutputCorrect() throws Exception {
+    setUp(8, false);
+    String output = translate(INPUT3, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD3);
+  }
+
+  @Test
+  public void givenInput_whenMaxLen3AndLatticeDecoding_thenOutputCorrect() throws Exception {
+    setUp(3, true);
+    String output = translate(INPUT4, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD4);
+  }
+
+  public void setUp(int maxLen, boolean latticeDecoding) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.outputFormat = "%s ||| %f ||| %c";
+    joshuaConfig.maxlen = maxLen;
+    joshuaConfig.lattice_decoding = latticeDecoding;
+    decoder = new Decoder(joshuaConfig, "");
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/17ecec13/src/test/resources/decoder/too-long/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/too-long/output.gold b/src/test/resources/decoder/too-long/output.gold
deleted file mode 100644
index 8773765..0000000
--- a/src/test/resources/decoder/too-long/output.gold
+++ /dev/null
@@ -1,4 +0,0 @@
-0 ||| as kingfishers ||| tm_glue_0=2.000 ||| 0.000
-0 ||| dragonflies ||| tm_glue_0=1.000 ||| 0.000
-0 ||| (((as tumbled over rim in roundy wells stones ||| tm_glue_0=8.000 ||| 0.000
-0 |||  |||  ||| 0.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/17ecec13/src/test/resources/decoder/too-long/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/too-long/test.sh b/src/test/resources/decoder/too-long/test.sh
deleted file mode 100755
index 9491fd7..0000000
--- a/src/test/resources/decoder/too-long/test.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-# Ensures that the decoder trims inputs when and only when it should
-
-(
-echo as kingfishers draw fire | $JOSHUA/bin/joshua -maxlen 2
-echo dragonflies draw flame | $JOSHUA/bin/joshua -maxlen 1 -lattice-decoding
-echo "(((as tumbled over rim in roundy wells stones ring" | $JOSHUA/bin/joshua -maxlen 8
-echo "(((like each tucked string tells" | $JOSHUA/bin/joshua -maxlen 3 -lattice-decoding
-) > output 2> log
-
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-    rm -f log output diff
-    exit 0
-else
-    exit 1
-fi


[02/15] incubator-joshua git commit: Moved regression test decoder/phrase/unique-hypothesis to unit test. Replaced broken soft-links with actual files and cleaned up the directory.

Posted by mj...@apache.org.
Moved regression test decoder/phrase/unique-hypothesis to unit test. Replaced broken soft-links with actual files and cleaned up the directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b429cc7d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b429cc7d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b429cc7d

Branch: refs/heads/7
Commit: b429cc7d8802a67c0eb948339984d5c7f5bef24e
Parents: d9bb3e5
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 14:09:51 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../decoder/cky/UniqueHypothesesTest.java       |  72 +++++
 .../decoder/phrase/unique-hypotheses/README     |   1 -
 .../decoder/phrase/unique-hypotheses/corpus.es  |   1 -
 .../phrase/unique-hypotheses/joshua.config      |   4 +-
 .../decoder/phrase/unique-hypotheses/lm.1.gz    | Bin 17 -> 2235 bytes
 .../phrase/unique-hypotheses/output.gold        | 300 -------------------
 .../decoder/phrase/unique-hypotheses/rules.1.gz | Bin 20 -> 2998042 bytes
 .../decoder/phrase/unique-hypotheses/test.sh    |  32 --
 8 files changed, 74 insertions(+), 336 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java b/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
new file mode 100644
index 0000000..8dc0e56
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.testng.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Ensures that derivations are unique for the phrase-based decoder.
+ */
+public class UniqueHypothesesTest {
+
+  public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+
+  @Test
+  public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
+      throws Exception {
+    configureDecoder("src/test/resources/decoder/phrase/unique-hypotheses/joshua.config");
+    List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder,
+        joshuaConfig);
+
+    assertEquals(decodedStrings.size(), 300);
+
+    // if all strings are unique than the set should have the same size as the
+    // list
+    Set<String> uniqueDecodedStrings = new HashSet<>(decodedStrings);
+    assertEquals(decodedStrings.size(), uniqueDecodedStrings.size());
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/README b/src/test/resources/decoder/phrase/unique-hypotheses/README
deleted file mode 100644
index 753f57e..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/README
+++ /dev/null
@@ -1 +0,0 @@
-Ensures that derivations are unique for the phrase-based decoder.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es b/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
deleted file mode 120000
index 11373db..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
+++ /dev/null
@@ -1 +0,0 @@
-../decode/corpus.es
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config b/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
index c35b267..7cef08e 100644
--- a/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
@@ -1,7 +1,7 @@
-tm = moses pt 0 rules.1.gz
+tm = moses pt 0 src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
 default-non-terminal = X
 goal-symbol = GOAL
-lm = kenlm 5 true false 100 lm.1.gz
+lm = kenlm 5 true false 100 src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
 mark-oovs = false
 pop-limit = 100
 top-n = 300

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
deleted file mode 120000
index 3655f03..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
+++ /dev/null
@@ -1 +0,0 @@
-../decode/lm.1.gz
\ No newline at end of file
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
new file mode 100644
index 0000000..3f4c453
Binary files /dev/null and b/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/output.gold b/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
deleted file mode 100644
index 0e5fb98..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
+++ /dev/null
@@ -1,300 +0,0 @@
-a strategy republican for hinder the re @-@ election of Obama
-a strategy republican for hinder the reelection of Obama
-a strategy republican for obstruct the re @-@ election of Obama
-a strategy republican for obstruct the reelection of Obama
-a strategy republican for hamper the re @-@ election of Obama
-a strategy republican for hamper the reelection of Obama
-a strategy republican to obstruct the re @-@ election of Obama
-a strategy republican for hinder reelection of Obama
-a strategy republican to obstruct the reelection of Obama
-a strategy republican to hinder the re @-@ election of Obama
-a strategy republican to hinder the reelection of Obama
-a strategy republican for obstruct reelection of Obama
-a strategy republican for hinder the reelection Obama
-a strategy republican for hamper reelection of Obama
-a strategy republican for hindering the re @-@ election of Obama
-a strategy republican for obstruct the reelection Obama
-a strategy republican for hindering the reelection of Obama
-an strategy republican for hinder the re @-@ election of Obama
-a strategy republican for hamper the reelection Obama
-an strategy republican for hinder the reelection of Obama
-a strategy republican for obstructing the re @-@ election of Obama
-a strategy republican to hinder reelection of Obama
-a strategy republican hinder for the re @-@ election of Obama
-a strategy republican for obstructing the reelection of Obama
-an strategy republican for obstruct the re @-@ election of Obama
-a strategy republican for the hinder reelection of Obama
-a strategy republican hinder for the reelection of Obama
-a strategy republican for hinder reelection the of Obama
-an strategy republican for obstruct the reelection of Obama
-a strategy republican for hinder reelection Obama
-a strategy republican to obstruct the reelection Obama
-strategy a republican for hinder the re @-@ election of Obama
-a strategy republican obstruct for the re @-@ election of Obama
-a strategy republican for hinder the reelection Obama of
-a strategy republican to hinder the reelection Obama
-a strategy republican for hinder reelection of the Obama
-strategy a republican for hinder the reelection of Obama
-a strategy republican obstruct for the reelection of Obama
-an strategy republican for hamper the re @-@ election of Obama
-a strategy republican for obstruct reelection the of Obama
-a strategy republican for hinder the of reelection Obama
-a strategy republican to obstruct reelection of Obama
-a strategy republican for obstruct reelection Obama
-an strategy republican for hamper the reelection of Obama
-strategy a republican for obstruct the re @-@ election of Obama
-a strategy republican for obstruct the reelection Obama of
-a strategy republican hamper for the re @-@ election of Obama
-a strategy republican for obstruct reelection of the Obama
-strategy a republican for obstruct the reelection of Obama
-a strategy republican for obstruct the of reelection Obama
-a strategy republican for hinder the of Obama reelection
-a strategy republican for hindering reelection of Obama
-a strategy republican hamper for the reelection of Obama
-a strategy republican for hamper reelection the of Obama
-an strategy republican to obstruct the re @-@ election of Obama
-a strategy republican for hamper reelection Obama
-a strategy republican for hinder of the reelection Obama
-an strategy republican for hinder reelection of Obama
-strategy a republican for hamper the re @-@ election of Obama
-an strategy republican to obstruct the reelection of Obama
-an strategy republican to hinder the re @-@ election of Obama
-a strategy republican for hamper the reelection Obama of
-a strategy republican for hindering the reelection Obama
-a strategy republican for hinder the re @-@ election Obama
-a strategy republican for obstruct the of Obama reelection
-a strategy republican for hamper reelection of the Obama
-strategy a republican for hamper the reelection of Obama
-an strategy republican to hinder the reelection of Obama
-one strategy republican for hinder the re @-@ election of Obama
-a strategy republican for obstructing reelection of Obama
-a strategy republican for hamper the of reelection Obama
-a strategy republican for hinder the reelection from Obama
-a strategy republican for hinder reelection the Obama
-one strategy republican for hinder the reelection of Obama
-a strategy republican for obstruct of the reelection Obama
-a strategy republican for hinder the Obama reelection of
-an strategy republican for obstruct reelection of Obama
-a strategy republican for hinder reelection of Obama the
-a strategy republican for the reelection hinder of Obama
-a strategy republican for obstruct the re @-@ election Obama
-an strategy republican for hinder the reelection Obama
-a strategy republican to hinder reelection the of Obama
-a strategy republican for hinder &apos;s reelection Obama
-strategy a republican to obstruct the re @-@ election of Obama
-one strategy republican for obstruct the re @-@ election of Obama
-a strategy republican for obstruct the reelection from Obama
-a strategy republican for hamper the of Obama reelection
-a strategy republican for hinder reelection Obama of
-a strategy republican to hinder reelection Obama
-strategy a republican for hinder reelection of Obama
-a strategy republican to obstruct the reelection Obama of
-a strategy republican for obstruct reelection the Obama
-strategy a republican to obstruct the reelection of Obama
-one strategy republican for obstruct the reelection of Obama
-strategy a republican to hinder the re @-@ election of Obama
-a strategy republican for obstructing the reelection Obama
-a strategy republican for obstruct the Obama reelection of
-a strategy republican for obstruct reelection of Obama the
-a strategy republican to obstruct the of reelection Obama
-a strategy republican to hinder the reelection Obama of
-a strategy republican for the hinder reelection Obama
-a strategy republican hinder for the reelection Obama
-a strategy republican for hamper of the reelection Obama
-a strategy republican to hinder reelection of the Obama
-an strategy republican for hamper reelection of Obama
-a strategy republican for the reelection obstruct of Obama
-strategy a republican to hinder the reelection of Obama
-an strategy republican for hindering the re @-@ election of Obama
-an strategy republican for obstruct the reelection Obama
-a strategy republican for hinder the reelection Obama &apos;s
-a strategy republican to hinder the of reelection Obama
-a strategy republican for obstruct &apos;s reelection Obama
-a strategy republican for hinder the reelection for Obama
-a strategy republican for hamper the re @-@ election Obama
-a strategy republican for obstruct reelection Obama of
-an strategy republican for hindering the reelection of Obama
-strategy a republican for obstruct reelection of Obama
-one strategy republican for hamper the re @-@ election of Obama
-a strategy republican for hamper the reelection from Obama
-a strategy republican for hinder the Obama of reelection
-a strategy republican for hinder the Obama reelection
-a strategy republican for the re @-@ election of Obama hinder
-a strategy republican to obstruct reelection the of Obama
-a strategy republican for hinder of Obama the reelection
-a strategy republican to obstruct the of Obama reelection
-strategy a republican for hinder the reelection Obama
-a strategy republican for hamper reelection the Obama
-a strategy republican obstruct for the reelection Obama
-one strategy republican for hamper the reelection of Obama
-a strategy republican for hamper the Obama reelection of
-a strategy republican for hamper reelection of Obama the
-a strategy republican for the reelection of Obama hinder
-a strategy republican to obstruct reelection Obama
-a strategy republican for obstruct the reelection Obama &apos;s
-a strategy republican to hinder the of Obama reelection
-a strategy republican for obstruct the reelection for Obama
-an strategy republican for hamper the reelection Obama
-a strategy republican for hinder reelection Obama of the
-a strategy republican for hamper &apos;s reelection Obama
-a strategy republican to obstruct reelection of the Obama
-a strategy republican for obstruct the Obama of reelection
-a strategy republican for hamper reelection Obama of
-a strategy republican for obstruct the Obama reelection
-a strategy republican for hinder &apos;s re @-@ election Obama
-a strategy republican for the re @-@ election of Obama obstruct
-an strategy republican for obstructing the re @-@ election of Obama
-strategy a republican for hamper reelection of Obama
-a strategy republican to hinder of the reelection Obama
-a strategy republican for obstruct of Obama the reelection
-a strategy republican to obstruct the re @-@ election Obama
-strategy a republican for hindering the re @-@ election of Obama
-strategy a republican for obstruct the reelection Obama
-an strategy republican to hinder reelection of Obama
-a strategy republican for hindering reelection the of Obama
-a strategy republican for hinder reelection from Obama
-an strategy republican hinder for the re @-@ election of Obama
-strategy an republican for hinder the re @-@ election of Obama
-one strategy republican to obstruct the re @-@ election of Obama
-a strategy republican for the reelection of Obama obstruct
-a strategy republican to obstruct the reelection from Obama
-an strategy republican for obstructing the reelection of Obama
-a strategy republican for hindering the reelection Obama of
-a strategy republican for hinder the re @-@ election Obama of
-a strategy republican for hindering reelection Obama
-a strategy republican hamper for the reelection Obama
-strategy a republican for hindering the reelection of Obama
-a strategy republican to hinder the re @-@ election Obama
-one strategy republican for hinder reelection of Obama
-an strategy republican for the hinder reelection of Obama
-an strategy republican hinder for the reelection of Obama
-strategy an republican for hinder the reelection of Obama
-a strategy republican for hamper the reelection Obama &apos;s
-a strategy republican for hindering the of reelection Obama
-one strategy republican to obstruct the reelection of Obama
-one strategy republican to hinder the re @-@ election of Obama
-an strategy republican for hinder reelection the of Obama
-a strategy republican to hinder the reelection from Obama
-a strategy republican for hinder re @-@ election of the Obama
-a strategy republican to obstruct the Obama reelection of
-a strategy republican for obstruct reelection Obama of the
-a strategy republican for hamper the reelection for Obama
-a strategy republican for reelection hinder of Obama
-a strategy republican for hinder reelection the Obama of
-a strategy republican for hindering reelection of the Obama
-a strategy republican for obstruct &apos;s re @-@ election Obama
-an strategy republican for hinder reelection Obama
-a strategy republican to hinder reelection the Obama
-a strategy republican hinder the re @-@ election of Obama for
-one strategy republican to hinder the reelection of Obama
-a strategy republican for hinder of reelection the Obama
-a strategy republican to hinder the Obama reelection of
-an strategy republican to obstruct the reelection Obama
-a strategy republican to hinder reelection of Obama the
-a strategy republican for hamper the Obama of reelection
-a strategy republican for hinder the of re @-@ election Obama
-a strategy republican for hamper the Obama reelection
-a strategy republican for obstruct reelection from Obama
-an strategy republican obstruct for the re @-@ election of Obama
-a strategy republican for the re @-@ election of Obama hamper
-strategy an republican for obstruct the re @-@ election of Obama
-a strategy republican for obstructing reelection the of Obama
-a strategy republican for hinder of Obama reelection the
-a strategy republican for reelection of Obama hinder the
-a strategy republican for hamper of Obama the reelection
-a strategy republican hinder the reelection of Obama for
-strategy a republican for hamper the reelection Obama
-a strategy republican for obstruct the re @-@ election Obama of
-an strategy republican for hinder the reelection Obama of
-an strategy republican to hinder the reelection Obama
-an strategy republican for hinder reelection of the Obama
-one strategy republican for obstruct reelection of Obama
-a strategy republican for obstructing reelection Obama
-an strategy republican obstruct for the reelection of Obama
-a strategy republican for the reelection of Obama hamper
-strategy an republican for obstruct the reelection of Obama
-a strategy republican to hinder &apos;s reelection Obama
-a strategy republican to obstruct of the reelection Obama
-a strategy republican for hindering the of Obama reelection
-an strategy republican for obstruct reelection the of Obama
-a strategy republican for obstruct re @-@ election of the Obama
-an strategy republican for hinder the of reelection Obama
-strategy a republican for obstructing the re @-@ election of Obama
-an strategy republican to obstruct reelection of Obama
-a strategy republican to hinder reelection Obama of
-a strategy republican for reelection obstruct of Obama
-strategy a republican to hinder reelection of Obama
-one strategy republican for hinder the reelection Obama
-a strategy republican for obstruct reelection the Obama of
-a strategy republican to the reelection hinder of Obama
-strategy a republican hinder for the re @-@ election of Obama
-an strategy republican for obstruct reelection Obama
-a strategy republican for obstructing the reelection Obama of
-a strategy republican for hinder reelection Obama the
-a strategy republican for hinder reelection Obama &apos;s
-a strategy republican obstruct the re @-@ election of Obama for
-a strategy republican for hamper reelection Obama of the
-a strategy republican for reelection of hinder the Obama
-a strategy republican for obstructing reelection of the Obama
-a strategy republican for obstruct of reelection the Obama
-strategy a republican for obstructing the reelection of Obama
-a strategy republican to obstruct the reelection Obama &apos;s
-a strategy republican for obstruct the of re @-@ election Obama
-a strategy republican for hinder reelection for Obama
-a strategy republican for hamper &apos;s re @-@ election Obama
-a strategy republican for the hinder reelection Obama of
-a strategy republican hinder for the reelection Obama of
-a strategy republican for hinder of the re @-@ election Obama
-a strategy republican for the reelection hinder Obama
-a strategy republican to obstruct the reelection for Obama
-a strategy republican for hinder reelection Obama the of
-a strategy republican for obstructing the of reelection Obama
-a strategy republican for obstruct of Obama reelection the
-strategy a republican for the hinder reelection of Obama
-strategy a republican hinder for the reelection of Obama
-a strategy republican obstruct the reelection of Obama for
-a strategy republican for the reelection of hinder Obama
-an strategy republican for obstruct the reelection Obama of
-a strategy republican for hamper reelection from Obama
-an strategy republican hamper for the re @-@ election of Obama
-strategy a republican for hinder reelection the of Obama
-a strategy republican to hinder the reelection Obama &apos;s
-an strategy republican for obstruct reelection of the Obama
-strategy an republican for hamper the re @-@ election of Obama
-a strategy republican hinder for the of reelection Obama
-a strategy republican to obstruct reelection the Obama
-a strategy republican to obstruct the Obama of reelection
-a strategy republican to hinder the reelection for Obama
-a strategy republican for hamper the re @-@ election Obama of
-a strategy republican for hindering the re @-@ election Obama
-a strategy republican to obstruct the Obama reelection
-an strategy republican for obstruct the of reelection Obama
-a strategy republican for hindering of the reelection Obama
-a strategy republican to obstruct reelection of Obama the
-strategy a republican for hinder reelection Obama
-an strategy republican for hinder the of Obama reelection
-one strategy republican for hamper reelection of Obama
-an strategy republican for hindering reelection of Obama
-an strategy republican hamper for the reelection of Obama
-one strategy republican for hindering the re @-@ election of Obama
-strategy a republican to obstruct the reelection Obama
-strategy an republican for hamper the reelection of Obama
-one strategy republican for obstruct the reelection Obama
-a strategy republican for hindering the reelection from Obama
-a strategy republican to the reelection obstruct of Obama
-a strategy republican for reelection of Obama hinder
-a strategy republican for hinder of Obama reelection
-an strategy republican for hamper reelection the of Obama
-strategy a republican obstruct for the re @-@ election of Obama
-a strategy republican for hamper re @-@ election of the Obama
-a strategy republican for obstruct reelection Obama the
-a strategy republican for obstruct reelection Obama &apos;s
-a strategy republican to hinder the Obama of reelection
-a strategy republican to hinder the Obama reelection
-a strategy for hinder the re @-@ election of Obama republican
-a strategy republican for obstruct reelection for Obama
-strategy a republican for hinder the reelection Obama of
-a strategy republican for hamper reelection the Obama of
-a strategy republican obstruct for the reelection Obama of
-a strategy republican for obstruct of the re @-@ election Obama
-one strategy republican for hindering the reelection of Obama

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
deleted file mode 120000
index a6183d9..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
+++ /dev/null
@@ -1 +0,0 @@
-../decode/rules.1.gz
\ No newline at end of file
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
new file mode 100644
index 0000000..14466e9
Binary files /dev/null and b/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b429cc7d/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/test.sh b/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
deleted file mode 100755
index 6b25957..0000000
--- a/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
-
-# Compare
-num=$(sort -u output | wc -l)
-
-if [ $num -eq 300 ]; then
-  rm -f output log
-  exit 0
-else
-  exit 1
-fi
-
-


[07/15] incubator-joshua git commit: Moved regression test decoder/source-annotations to unit test. Regenerated gold output and cleaned up the directory.

Posted by mj...@apache.org.
Moved regression test decoder/source-annotations to unit test. Regenerated gold output and cleaned up the directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32f27536
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32f27536
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32f27536

Branch: refs/heads/7
Commit: 32f27536978879ecfb7be69c820cdfa33e7fc7f5
Parents: d9f34aa
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 12:34:44 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../decoder/cky/SourceAnnotationsTest.java      | 67 ++++++++++++++++++++
 .../decoder/source-annotations/joshua.config    |  8 +--
 .../decoder/source-annotations/test.sh          | 36 -----------
 3 files changed, 71 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
new file mode 100644
index 0000000..000ba7e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class SourceAnnotationsTest {
+
+  private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
+  private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
+  private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
+
+  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @Test
+  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp(false);
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
+  }
+
+  @Test
+  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp(true);
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
+  }
+
+  public void setUp(boolean sourceAnnotations) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
+    joshuaConfig.source_annotations = sourceAnnotations;
+    decoder = new Decoder(joshuaConfig, "");
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/joshua.config b/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96..788505e 100644
--- a/src/test/resources/decoder/source-annotations/joshua.config
+++ b/src/test/resources/decoder/source-annotations/joshua.config
@@ -33,8 +33,8 @@
 # main distinction from the regular grammar is that the span limit
 # does not apply to it.  
 
-tm = thrax pt 20 grammar
-tm = thrax glue -1 grammar.glue
+tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
+tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
 
 # This symbol is used over unknown words in the source language
 
@@ -62,7 +62,7 @@ goal-symbol = GOAL
 # - LM file: the location of the language model file
 # You also need to add a weight for each language model below.
 
-lm = kenlm 5 false false 100 lm.kenlm
+lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
 
 # The suffix _OOV is appended to unknown source-language words if this
 # is set to true.
@@ -91,7 +91,7 @@ use-unique-nbest = true
 # %f: the feature string
 # %c: the model cost
 
-output-format = %i ||| %s ||| %f ||| %c
+output-format = %s ||| %f ||| %c
 
 # When printing the trees (%t in 'output-format'), this controls whether the alignments
 # are also printed.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/test.sh b/src/test/resources/decoder/source-annotations/test.sh
deleted file mode 100755
index e352af3..0000000
--- a/src/test/resources/decoder/source-annotations/test.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Tests the language model code that uses the source-side projection instead of the word itself.
-# When translating a word, if there is a source-side annotation of the label "class", and
-# -source-annotations was added to the invocation, the LM will use that source-side class instead
-# of the translated word.
-
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config -source-annotations >> output 2>> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff output log output.scores
-  exit 0
-else
-  exit 1
-fi


[04/15] incubator-joshua git commit: Moved regression test decoder/num_translation_options to unit test. Regenerated and split gold output into several files to be used by different test methods.

Posted by mj...@apache.org.
Moved regression test decoder/num_translation_options to unit test. Regenerated and split gold output into several files to be used by different test methods.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9cbc045f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9cbc045f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9cbc045f

Branch: refs/heads/7
Commit: 9cbc045fe8b058b3fac028180ad99390b9fbf709
Parents: 3ff30fe
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Wed Sep 14 19:45:50 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../decoder/cky/NumTranslationOptionsTest.java  | 88 ++++++++++++++++++++
 .../joshua-packed.config                        | 30 +++++++
 .../num_translation_options/joshua.config       |  6 +-
 .../joshua.config.packed                        | 30 -------
 .../output-no-dot-chart.gold                    |  4 +
 .../num_translation_options/output-packed.gold  |  4 +
 .../decoder/num_translation_options/output.gold |  8 --
 .../decoder/num_translation_options/test.sh     | 17 ----
 8 files changed, 129 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java b/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
new file mode 100644
index 0000000..bfb13e1
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@ -0,0 +1,88 @@
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Tests that num_translation_options is enforced for hierarchical decoders
+ */
+public class NumTranslationOptionsTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithNumTranslationOptions3_thenScoreAndTranslationCorrect()
+      throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/input");
+
+    // When
+    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", true);
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithNumTranslationOptions3AndNoDotChart_thenScoreAndTranslationCorrect()
+      throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/input");
+
+    // When
+    configureDecoder("src/test/resources/decoder/num_translation_options/joshua.config", false);
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithNumTranslationOptions3AndPacked_thenScoreAndTranslationCorrect()
+      throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/input");
+
+    // When
+    configureDecoder("src/test/resources/decoder/num_translation_options/joshua-packed.config",
+        true);
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/num_translation_options/output-packed.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig, boolean useDotChart) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    joshuaConfig.use_dot_chart = useDotChart;
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/joshua-packed.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/joshua-packed.config b/src/test/resources/decoder/num_translation_options/joshua-packed.config
new file mode 100644
index 0000000..681c4d9
--- /dev/null
+++ b/src/test/resources/decoder/num_translation_options/joshua-packed.config
@@ -0,0 +1,30 @@
+num_translation_options = 3
+
+lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
+
+tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.packed
+tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
+
+mark_oovs = false
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+output-format = %c ||| %s ||| %f
+
+#nbest config
+use_unique_nbest = true
+top_n = 5
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_pt_0 1
+tm_glue_0 1
+WordPenalty 1
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/joshua.config b/src/test/resources/decoder/num_translation_options/joshua.config
index e37855c..88b0290 100644
--- a/src/test/resources/decoder/num_translation_options/joshua.config
+++ b/src/test/resources/decoder/num_translation_options/joshua.config
@@ -1,9 +1,9 @@
 num_translation_options = 3
 
-lm = kenlm 5 false false 100 lm.gz
+lm = kenlm 5 false false 100 src/test/resources/decoder/num_translation_options/lm.gz
 
-tm = thrax pt 12 grammar.gz
-tm = thrax glue -1 glue-grammar
+tm = thrax pt 12 src/test/resources/decoder/num_translation_options/grammar.gz
+tm = thrax glue -1 src/test/resources/decoder/num_translation_options/glue-grammar
 
 mark_oovs = false
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/joshua.config.packed
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/joshua.config.packed b/src/test/resources/decoder/num_translation_options/joshua.config.packed
deleted file mode 100644
index 2d52db2..0000000
--- a/src/test/resources/decoder/num_translation_options/joshua.config.packed
+++ /dev/null
@@ -1,30 +0,0 @@
-num_translation_options = 3
-
-lm = kenlm 5 false false 100 lm.gz
-
-tm = thrax pt 12 grammar.packed
-tm = thrax glue -1 glue-grammar
-
-mark_oovs = false
-
-default-non-terminal = X
-goalSymbol = GOAL
-
-#pruning config
-pop-limit = 100
-
-output-format = %c ||| %s ||| %f
-
-#nbest config
-use_unique_nbest = true
-top_n = 5
-
-feature-function = WordPenalty
-feature-function = OOVPenalty
-
-lm_0 1.2373676802179452
-
-tm_pt_0 1
-tm_glue_0 1
-WordPenalty 1
-OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold b/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
new file mode 100644
index 0000000..686122c
--- /dev/null
+++ b/src/test/resources/decoder/num_translation_options/output-no-dot-chart.gold
@@ -0,0 +1,4 @@
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/output-packed.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/output-packed.gold b/src/test/resources/decoder/num_translation_options/output-packed.gold
new file mode 100644
index 0000000..686122c
--- /dev/null
+++ b/src/test/resources/decoder/num_translation_options/output-packed.gold
@@ -0,0 +1,4 @@
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/output.gold b/src/test/resources/decoder/num_translation_options/output.gold
index 4203822..686122c 100644
--- a/src/test/resources/decoder/num_translation_options/output.gold
+++ b/src/test/resources/decoder/num_translation_options/output.gold
@@ -2,11 +2,3 @@
 -19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
 -22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
 -424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
--19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
--19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
--22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
--424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
--19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
--19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
--22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
--424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9cbc045f/src/test/resources/decoder/num_translation_options/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/test.sh b/src/test/resources/decoder/num_translation_options/test.sh
deleted file mode 100755
index e413526..0000000
--- a/src/test/resources/decoder/num_translation_options/test.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-
-set -u
-
-cat input | $JOSHUA/bin/joshua-decoder -c joshua.config > output 2> log
-cat input | $JOSHUA/bin/joshua-decoder -c joshua.config -no-dot-chart >> output 2>> log
-cat input | $JOSHUA/bin/joshua-decoder -c joshua.config.packed >> output 2>> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff log output output.scores
-  exit 0
-else
-  exit 1
-fi


[15/15] incubator-joshua git commit: Updated for new Decoder(config) constructor

Posted by mj...@apache.org.
Updated for new Decoder(config) constructor


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/7e7baaff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/7e7baaff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/7e7baaff

Branch: refs/heads/7
Commit: 7e7baaffcfeaaf3bc4d52f5a90e1663a49853f70
Parents: 93055fd
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 15 15:43:33 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 15 15:43:33 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java   | 2 +-
 .../src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java   | 2 +-
 .../src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java | 2 +-
 .../java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java  | 2 +-
 .../src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java  | 2 +-
 .../src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java   | 2 +-
 .../test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java    | 2 +-
 .../java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java   | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
index ec6f02d..7038241 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/NumTranslationOptionsTest.java
@@ -101,6 +101,6 @@ public class NumTranslationOptionsTest {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(pathToConfig);
     joshuaConfig.use_dot_chart = useDotChart;
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
index 29ec23e..86282bf 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@ -61,6 +61,6 @@ public class OOVListTest {
   public void configureDecoder(String pathToConfig) throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
index a12a47b..4e2ec05 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@ -62,6 +62,6 @@ public class RescoringTest {
   public void configureDecoder(String pathToConfig) throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index ce09506..485cf9f 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -56,7 +56,7 @@ public class SourceAnnotationsTest {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
     joshuaConfig.source_annotations = sourceAnnotations;
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 
   @AfterMethod

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
index bce34ca..30e6d1a 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TargetBigram.java
@@ -63,7 +63,7 @@ public class TargetBigram {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.features.add(featureFunction);
     joshuaConfig.outputFormat = "%s ||| %f ||| %c";
-    decoder = new Decoder(joshuaConfig, "");
+    decoder = new Decoder(joshuaConfig);
   }
 
   @AfterMethod

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
index 0d4f7ce..ca97b95 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TooLongTest.java
@@ -75,7 +75,7 @@ public class TooLongTest {
     joshuaConfig.outputFormat = "%s ||| %f ||| %c";
     joshuaConfig.maxlen = maxLen;
     joshuaConfig.lattice_decoding = latticeDecoding;
-    decoder = new Decoder(joshuaConfig, "");
+    decoder = new Decoder(joshuaConfig);
   }
 
   @AfterMethod

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
index f5e1005..578790d 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TreeOutputTest.java
@@ -60,6 +60,6 @@ public class TreeOutputTest {
   public void configureDecoder(String pathToConfig) throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7e7baaff/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index bf65c5e..825de45 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@ -60,7 +60,7 @@ public class UniqueHypothesesTest {
   public void configureDecoder(String pathToConfig) throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
   }
 
   @AfterMethod


[03/15] incubator-joshua git commit: Moved regression test decoder/oov-list to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from n-ary to remove dependency to other test resource directory.

Posted by mj...@apache.org.
Moved regression test decoder/oov-list to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from n-ary to remove dependency to other test resource directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/931a67d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/931a67d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/931a67d7

Branch: refs/heads/7
Commit: 931a67d78c8f7de69695b400ed8c2db046d86784
Parents: 9cbc045
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:46:32 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/OOVListTest.java  |  66 +++++++++++++++++++
 src/test/resources/decoder/oov-list/config      |  29 --------
 .../resources/decoder/oov-list/joshua.config    |  31 +++++++++
 src/test/resources/decoder/oov-list/lm.gz       | Bin 0 -> 2466496 bytes
 src/test/resources/decoder/oov-list/output.gold |   6 +-
 src/test/resources/decoder/oov-list/test.sh     |  30 ---------
 6 files changed, 100 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
new file mode 100644
index 0000000..29ec23e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class OOVListTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/oov-list/input.txt");
+
+    // When
+    configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/oov-list/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/config b/src/test/resources/decoder/oov-list/config
deleted file mode 100644
index 048b517..0000000
--- a/src/test/resources/decoder/oov-list/config
+++ /dev/null
@@ -1,29 +0,0 @@
-lm = kenlm 5 false false 100 ../n-ary/lm.gz
-
-tm = thrax phrase 20 grammar
-tm = thrax glue -1 glue-grammar
-
-mark_oovs = true
-
-default-non-terminal = X
-goalSymbol = GOAL
-
-#pruning config
-pop-limit = 100
-
-#nbest config
-use_unique_nbest = true
-use_tree_nbest = false
-top_n = 1
-
-oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
-
-feature-function = WordPenalty
-feature-function = OOVPenalty
-
-lm_0 1.2373676802179452
-
-tm_phrase_0 1
-tm_glue_0 1
-WordPenalty -3.6942747832593694
-OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/joshua.config b/src/test/resources/decoder/oov-list/joshua.config
new file mode 100644
index 0000000..8809206
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/joshua.config
@@ -0,0 +1,31 @@
+lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
+
+tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
+tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
+
+mark_oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+#nbest config
+use_unique_nbest = true
+use_tree_nbest = false
+top_n = 1
+
+oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
+
+output-format=%s ||| %f ||| %c
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_phrase_0 1
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/lm.gz b/src/test/resources/decoder/oov-list/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/oov-list/lm.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/output.gold b/src/test/resources/decoder/oov-list/output.gold
index d911c52..ee44a51 100644
--- a/src/test/resources/decoder/oov-list/output.gold
+++ b/src/test/resources/decoder/oov-list/output.gold
@@ -1,3 +1,3 @@
-0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
-1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
-2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
+Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
+i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
+goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/test.sh b/src/test/resources/decoder/oov-list/test.sh
deleted file mode 100755
index 38c1718..0000000
--- a/src/test/resources/decoder/oov-list/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-	rm -f diff log output output.scores
-	exit 0
-else
-	exit 1
-fi