You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:31:31 UTC
[05/13] incubator-joshua git commit: Moved regression test
decoder/oov-list to unit test. Regenerated gold output and cleaned up the
directory. Copied the lm from n-ary to remove dependency to other test
resource directory.
Moved regression test decoder/oov-list to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from n-ary to remove dependency to other test resource directory.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/931a67d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/931a67d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/931a67d7
Branch: refs/heads/master
Commit: 931a67d78c8f7de69695b400ed8c2db046d86784
Parents: 9cbc045
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:46:32 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200
----------------------------------------------------------------------
.../apache/joshua/decoder/cky/OOVListTest.java | 66 +++++++++++++++++++
src/test/resources/decoder/oov-list/config | 29 --------
.../resources/decoder/oov-list/joshua.config | 31 +++++++++
src/test/resources/decoder/oov-list/lm.gz | Bin 0 -> 2466496 bytes
src/test/resources/decoder/oov-list/output.gold | 6 +-
src/test/resources/decoder/oov-list/test.sh | 30 ---------
6 files changed, 100 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
new file mode 100644
index 0000000..29ec23e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class OOVListTest {
+ private JoshuaConfiguration joshuaConfig;
+ private Decoder decoder;
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ if (decoder != null) {
+ decoder.cleanUp();
+ decoder = null;
+ }
+ }
+
+ @Test
+ public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
+ // Given
+ List<String> inputStrings = loadStringsFromFile(
+ "src/test/resources/decoder/oov-list/input.txt");
+
+ // When
+ configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
+ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+ // Then
+ List<String> goldStrings = loadStringsFromFile(
+ "src/test/resources/decoder/oov-list/output.gold");
+ assertEquals(decodedStrings, goldStrings);
+ }
+
+ public void configureDecoder(String pathToConfig) throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(pathToConfig);
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/config b/src/test/resources/decoder/oov-list/config
deleted file mode 100644
index 048b517..0000000
--- a/src/test/resources/decoder/oov-list/config
+++ /dev/null
@@ -1,29 +0,0 @@
-lm = kenlm 5 false false 100 ../n-ary/lm.gz
-
-tm = thrax phrase 20 grammar
-tm = thrax glue -1 glue-grammar
-
-mark_oovs = true
-
-default-non-terminal = X
-goalSymbol = GOAL
-
-#pruning config
-pop-limit = 100
-
-#nbest config
-use_unique_nbest = true
-use_tree_nbest = false
-top_n = 1
-
-oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203
-
-feature-function = WordPenalty
-feature-function = OOVPenalty
-
-lm_0 1.2373676802179452
-
-tm_phrase_0 1
-tm_glue_0 1
-WordPenalty -3.6942747832593694
-OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/joshua.config b/src/test/resources/decoder/oov-list/joshua.config
new file mode 100644
index 0000000..8809206
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/joshua.config
@@ -0,0 +1,31 @@
+lm = kenlm 5 false false 100 src/test/resources/decoder/oov-list/lm.gz
+
+tm = thrax phrase 20 src/test/resources/decoder/oov-list/grammar
+tm = thrax glue -1 src/test/resources/decoder/oov-list/glue-grammar
+
+mark_oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+#nbest config
+use_unique_nbest = true
+use_tree_nbest = false
+top_n = 1
+
+oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203
+
+output-format=%s ||| %f ||| %c
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_phrase_0 1
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/lm.gz b/src/test/resources/decoder/oov-list/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/oov-list/lm.gz differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/output.gold b/src/test/resources/decoder/oov-list/output.gold
index d911c52..ee44a51 100644
--- a/src/test/resources/decoder/oov-list/output.gold
+++ b/src/test/resources/decoder/oov-list/output.gold
@@ -1,3 +1,3 @@
-0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
-1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
-2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
+Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
+i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
+goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/931a67d7/src/test/resources/decoder/oov-list/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/test.sh b/src/test/resources/decoder/oov-list/test.sh
deleted file mode 100755
index 38c1718..0000000
--- a/src/test/resources/decoder/oov-list/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
- rm -f diff log output output.scores
- exit 0
-else
- exit 1
-fi