You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 13:31:33 UTC

[07/13] incubator-joshua git commit: Moved regression test decoder/source-annotations to unit test. Regenerated gold output and cleaned up the directory.

Moved regression test decoder/source-annotations to unit test. Regenerated gold output and cleaned up the directory.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32f27536
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32f27536
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32f27536

Branch: refs/heads/master
Commit: 32f27536978879ecfb7be69c820cdfa33e7fc7f5
Parents: d9f34aa
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Thu Sep 15 12:34:44 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Thu Sep 15 14:21:05 2016 +0200

----------------------------------------------------------------------
 .../decoder/cky/SourceAnnotationsTest.java      | 67 ++++++++++++++++++++
 .../decoder/source-annotations/joshua.config    |  8 +--
 .../decoder/source-annotations/test.sh          | 36 -----------
 3 files changed, 71 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
new file mode 100644
index 0000000..000ba7e
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class SourceAnnotationsTest {
+
+  private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
+  private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-11.974 OOVPenalty=0.000 WordPenalty=-2.606 ||| -7.650";
+  private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| tm_pt_0=-3.000 tm_glue_0=3.000 lm_0=-111.513 OOVPenalty=0.000 WordPenalty=-2.606 ||| -107.189";
+
+  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @Test
+  public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp(false);
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
+  }
+
+  @Test
+  public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
+    setUp(true);
+    String output = translate(INPUT, decoder, joshuaConfig);
+    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
+  }
+
+  public void setUp(boolean sourceAnnotations) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
+    joshuaConfig.source_annotations = sourceAnnotations;
+    decoder = new Decoder(joshuaConfig, "");
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/joshua.config b/src/test/resources/decoder/source-annotations/joshua.config
index ffd2f96..788505e 100644
--- a/src/test/resources/decoder/source-annotations/joshua.config
+++ b/src/test/resources/decoder/source-annotations/joshua.config
@@ -33,8 +33,8 @@
 # main distinction from the regular grammar is that the span limit
 # does not apply to it.  
 
-tm = thrax pt 20 grammar
-tm = thrax glue -1 grammar.glue
+tm = thrax pt 20 src/test/resources/decoder/source-annotations/grammar
+tm = thrax glue -1 src/test/resources/decoder/source-annotations/grammar.glue
 
 # This symbol is used over unknown words in the source language
 
@@ -62,7 +62,7 @@ goal-symbol = GOAL
 # - LM file: the location of the language model file
 # You also need to add a weight for each language model below.
 
-lm = kenlm 5 false false 100 lm.kenlm
+lm = kenlm 5 false false 100 src/test/resources/decoder/source-annotations/lm.kenlm
 
 # The suffix _OOV is appended to unknown source-language words if this
 # is set to true.
@@ -91,7 +91,7 @@ use-unique-nbest = true
 # %f: the feature string
 # %c: the model cost
 
-output-format = %i ||| %s ||| %f ||| %c
+output-format = %s ||| %f ||| %c
 
 # When printing the trees (%t in 'output-format'), this controls whether the alignments
 # are also printed.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32f27536/src/test/resources/decoder/source-annotations/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/source-annotations/test.sh b/src/test/resources/decoder/source-annotations/test.sh
deleted file mode 100755
index e352af3..0000000
--- a/src/test/resources/decoder/source-annotations/test.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Tests the language model code that uses the source-side projection instead of the word itself.
-# When translating a word, if there is a source-side annotation of the label "class", and
-# -source-annotations was added to the invocation, the LM will use that source-side class instead
-# of the translated word.
-
-set -u
-
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
-cat input.txt | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config -source-annotations >> output 2>> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff output log output.scores
-  exit 0
-else
-  exit 1
-fi