You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:31:33 UTC
[07/13] incubator-joshua git commit: Moved regression test
decoder/source-annotations to unit test. Regenerated gold output and cleaned
up the directory.
Moved regression test decoder/source-annotations to unit test. Regenerated gold output and cleaned up the directory.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32f27536
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32f27536
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32f27536
Branch: refs/heads/master
Commit: 32f27536978879ecfb7be69c820cdfa33e7fc7f5
Parents: d9f34aa
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 12:34:44 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200
----------------------------------------------------------------------
.../decoder/cky/SourceAnnotationsTest.java | 67 ++++++++++++++++++++
.../decoder/source-annotations/joshua.config | 8 +--
.../decoder/source-annotations/test.sh | 36 -----------
3 files changed, 71 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
new file mode 100644
index 0000000..000ba7e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class SourceAnnotationsTest {
+
+ private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
+ private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
+ private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
+
+ private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
+
+ private JoshuaConfiguration joshuaConfig;
+ private Decoder decoder;
+
+ @Test
+ public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+ setUp(false);
+ String output = translate(INPUT, decoder, joshuaConfig);
+ assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
+ }
+
+ @Test
+ public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+ setUp(true);
+ String output = translate(INPUT, decoder, joshuaConfig);
+ assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
+ }
+
+ public void setUp(boolean sourceAnnotations) throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
+ joshuaConfig.source_annotations = sourceAnnotations;
+ decoder = new Decoder(joshuaConfig, "");
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/joshua.config b/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96..788505e 100644
--- a/src/test/resources/decoder/source-annotations/joshua.config
+++ b/src/test/resources/decoder/source-annotations/joshua.config
@@ -33,8 +33,8 @@
# main distinction from the regular grammar is that the span limit
# does not apply to it.
-tm = thrax pt 20 grammar
-tm = thrax glue -1 grammar.glue
+tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
+tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
# This symbol is used over unknown words in the source language
@@ -62,7 +62,7 @@ goal-symbol = GOAL
# - LM file: the location of the language model file
# You also need to add a weight for each language model below.
-lm = kenlm 5 false false 100 lm.kenlm
+lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
# The suffix _OOV is appended to unknown source-language words if this
# is set to true.
@@ -91,7 +91,7 @@ use-unique-nbest = true
# %f: the feature string
# %c: the model cost
-output-format = %i ||| %s ||| %f ||| %c
+output-format = %s ||| %f ||| %c
# When printing the trees (%t in 'output-format'), this controls whether the alignments
# are also printed.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/test.sh b/src/test/resources/decoder/source-annotations/test.sh
deleted file mode 100755
index e352af3..0000000
--- a/src/test/resources/decoder/source-annotations/test.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Tests the language model code that uses the source-side projection instead of the word itself.
-# When translating a word, if there is a source-side annotation of the label "class", and
-# -source-annotations was added to the invocation, the LM will use that source-side class instead
-# of the translated word.
-
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config -source-annotations >> output 2>> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
- rm -f diff output log output.scores
- exit 0
-else
- exit 1
-fi