You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:05 UTC

[06/15] incubator-joshua git commit: Moved regression test decoder/rescoring to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from constrained to remove dependency to other test resource directory.

Moved regression test decoder/rescoring to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from constrained to remove dependency to other test resource directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/11b87c38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/11b87c38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/11b87c38

Branch: refs/heads/7
Commit: 11b87c38ccfcdbe967d46b1f06834a5c5355fed3
Parents: 931a67d
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:58:33 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/cky/RescoringTest.java       |  67 +++++++++++++++++++
 .../resources/decoder/rescoring/joshua.config   |   8 +--
 src/test/resources/decoder/rescoring/lm.gz      | Bin 0 -> 2466496 bytes
 .../resources/decoder/rescoring/output.gold     |  24 +++----
 src/test/resources/decoder/rescoring/test.sh    |  30 ---------
 5 files changed, 83 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
new file mode 100644
index 0000000..a12a47b
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class RescoringTest {
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+
+  @Test
+  public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
+      throws Exception {
+    // Given
+    List<String> inputStrings = loadStringsFromFile(
+        "src/test/resources/decoder/rescoring/input.txt");
+
+    // When
+    configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
+    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+    // Then
+    List<String> goldStrings = loadStringsFromFile(
+        "src/test/resources/decoder/rescoring/output.gold");
+    assertEquals(decodedStrings, goldStrings);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/joshua.config b/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277..56efbfa 100644
--- a/src/test/resources/decoder/rescoring/joshua.config
+++ b/src/test/resources/decoder/rescoring/joshua.config
@@ -1,10 +1,10 @@
 rescore-forest = true
 rescore-forest-weight = 100
 
-lm = kenlm 5 false false 100 ../constrained/lm.gz
+lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
 
-tm = thrax pt 12 grammar.gz
-tm = thrax glue -1 glue-grammar
+tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
+tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
 
 mark-oovs = true
 
@@ -14,7 +14,7 @@ goalSymbol = GOAL
 #pruning config
 pop-limit = 100
 
-#output-format = %i %c %s
+output-format = %s ||| %f ||| %c
 
 #nbest config
 use_unique_nbest = true

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/lm.gz b/src/test/resources/decoder/rescoring/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/rescoring/lm.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/output.gold b/src/test/resources/decoder/rescoring/output.gold
index 5d6600d..1ea4237 100644
--- a/src/test/resources/decoder/rescoring/output.gold
+++ b/src/test/resources/decoder/rescoring/output.gold
@@ -1,12 +1,12 @@
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/test.sh b/src/test/resources/decoder/rescoring/test.sh
deleted file mode 100755
index 58f2d2d..0000000
--- a/src/test/resources/decoder/rescoring/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c joshua.config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-	rm -f diff log output 
-	exit 0
-else
-	exit 1
-fi