You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:44:05 UTC
[06/15] incubator-joshua git commit: Moved regression test
decoder/rescoring to unit test. Regenerated gold output and cleaned up the
directory. Copied the lm from constrained to remove dependency to other test
resource directory.
Moved regression test decoder/rescoring to unit test. Regenerated gold output and cleaned up the directory. Copied the lm from constrained to remove dependency to other test resource directory.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/11b87c38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/11b87c38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/11b87c38
Branch: refs/heads/7
Commit: 11b87c38ccfcdbe967d46b1f06834a5c5355fed3
Parents: 931a67d
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 11:58:33 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200
----------------------------------------------------------------------
.../joshua/decoder/cky/RescoringTest.java | 67 +++++++++++++++++++
.../resources/decoder/rescoring/joshua.config | 8 +--
src/test/resources/decoder/rescoring/lm.gz | Bin 0 -> 2466496 bytes
.../resources/decoder/rescoring/output.gold | 24 +++----
src/test/resources/decoder/rescoring/test.sh | 30 ---------
5 files changed, 83 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
new file mode 100644
index 0000000..a12a47b
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/RescoringTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
+import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class RescoringTest {
+ private JoshuaConfiguration joshuaConfig;
+ private Decoder decoder;
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ if (decoder != null) {
+ decoder.cleanUp();
+ decoder = null;
+ }
+ }
+
+ @Test
+ public void givenInput_whenDecodingWithRescoring_thenScoreAndTranslationCorrect()
+ throws Exception {
+ // Given
+ List<String> inputStrings = loadStringsFromFile(
+ "src/test/resources/decoder/rescoring/input.txt");
+
+ // When
+ configureDecoder("src/test/resources/decoder/rescoring/joshua.config");
+ List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+
+ // Then
+ List<String> goldStrings = loadStringsFromFile(
+ "src/test/resources/decoder/rescoring/output.gold");
+ assertEquals(decodedStrings, goldStrings);
+ }
+
+ public void configureDecoder(String pathToConfig) throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(pathToConfig);
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/joshua.config b/src/test/resources/decoder/rescoring/joshua.config
index 0e4a277..56efbfa 100644
--- a/src/test/resources/decoder/rescoring/joshua.config
+++ b/src/test/resources/decoder/rescoring/joshua.config
@@ -1,10 +1,10 @@
rescore-forest = true
rescore-forest-weight = 100
-lm = kenlm 5 false false 100 ../constrained/lm.gz
+lm = kenlm 5 false false 100 src/test/resources/decoder/rescoring/lm.gz
-tm = thrax pt 12 grammar.gz
-tm = thrax glue -1 glue-grammar
+tm = thrax pt 12 src/test/resources/decoder/rescoring/grammar.gz
+tm = thrax glue -1 src/test/resources/decoder/rescoring/glue-grammar
mark-oovs = true
@@ -14,7 +14,7 @@ goalSymbol = GOAL
#pruning config
pop-limit = 100
-#output-format = %i %c %s
+output-format = %s ||| %f ||| %c
#nbest config
use_unique_nbest = true
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/lm.gz b/src/test/resources/decoder/rescoring/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/rescoring/lm.gz differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/output.gold b/src/test/resources/decoder/rescoring/output.gold
index 5d6600d..1ea4237 100644
--- a/src/test/resources/decoder/rescoring/output.gold
+++ b/src/test/resources/decoder/rescoring/output.gold
@@ -1,12 +1,12 @@
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
-0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
-1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
-1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/11b87c38/src/test/resources/decoder/rescoring/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/test.sh b/src/test/resources/decoder/rescoring/test.sh
deleted file mode 100755
index 58f2d2d..0000000
--- a/src/test/resources/decoder/rescoring/test.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c joshua.config > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
- rm -f diff log output
- exit 0
-else
- exit 1
-fi