You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:02 UTC

[03/15] incubator-joshua git commit: Moved regression test decoder/oov-list to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from n-ary to remove dependency to other test resource directory.

Moved regression test decoder/oov-list to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from n-ary to remove dependency to other test resource directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/931a67d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/931a67d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/931a67d7

Branch: refs/heads/7
Commit: 931a67d78c8f7de69695b400ed8c2db046d86784
Parents: 9cbc045
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:46:32 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/OOVListTest.java  |  66 +++++++++++++++++++
 src/test/resources/decoder/oov-list/config      |  29 --------
 .../resources/decoder/oov-list/joshua.config    |  31 +++++++++
 src/test/resources/decoder/oov-list/lm.gz       | Bin 0 -> 2466496 bytes
 src/test/resources/decoder/oov-list/output.gold |   6 +-
 src/test/resources/decoder/oov-list/test.sh     |  30 ---------
 6 files changed, 100 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
new file mode 100644
index 0000000..29ec23e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class OOVListTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/oov-list/input.txt");
+
+    // When
+    configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/oov-list/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/config b/src/test/resources/decoder/oov-list/config
deleted file mode 100644
index 048b517..0000000
--- a/src/test/resources/decoder/oov-list/config
+++ /dev/null
@@ -1,29 +0,0 @@
-lm = kenlm 5 false false 100 ../n-ary/lm.gz
-
-tm = thrax phrase 20 grammar
-tm = thrax glue -1 glue-grammar
-
-mark_oovs = true
-
-default-non-terminal = X
-goalSymbol = GOAL
-
-#pruning config
-pop-limit = 100
-
-#nbest config
-use_unique_nbest = true
-use_tree_nbest = false
-top_n = 1
-
-oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
-
-feature-function = WordPenalty
-feature-function = OOVPenalty
-
-lm_0 1.2373676802179452
-
-tm_phrase_0 1
-tm_glue_0 1
-WordPenalty -3.6942747832593694
-OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/joshua.config b/src/test/resources/decoder/oov-list/joshua.config
new file mode 100644
index 0000000..8809206
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/joshua.config
@@ -0,0 +1,31 @@
+lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
+
+tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
+tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
+
+mark_oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+#nbest config
+use_unique_nbest = true
+use_tree_nbest = false
+top_n = 1
+
+oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
+
+output-format=%s ||| %f ||| %c
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_phrase_0 1
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/lm.gz b/src/test/resources/decoder/oov-list/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/oov-list/lm.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/output.gold b/src/test/resources/decoder/oov-list/output.gold
index d911c52..ee44a51 100644
--- a/src/test/resources/decoder/oov-list/output.gold
+++ b/src/test/resources/decoder/oov-list/output.gold
@@ -1,3 +1,3 @@
-0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
-1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
-2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
+Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
+i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
+goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/test.sh b/src/test/resources/decoder/oov-list/test.sh
deleted file mode 100755
index 38c1718..0000000
--- a/src/test/resources/decoder/oov-list/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-	rm -f diff log output output.scores
-	exit 0
-else
-	exit 1
-fi