You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/19 10:15:29 UTC

[08/21] incubator-joshua git commit: Fixed UniqueHypothesesTest, SourceAnnotationsTest and OOVListTest to work with new configuration. Compiles but does not pass yet.

Fixed UniqueHypothesesTest, SourceAnnotationsTest and OOVListTest to work with new configuration. Compiles but does not pass yet.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/3ed109af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/3ed109af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/3ed109af

Branch: refs/heads/7_confsystem
Commit: 3ed109afab7c91dc93a92a387053bc140029784b
Parents: 3b70a97
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Sun Sep 18 19:20:12 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Sun Sep 18 19:23:35 2016 +0200

----------------------------------------------------------------------
 .../apache/joshua/decoder/cky/OOVListTest.conf  | 28 ++++++++++++++++
 .../apache/joshua/decoder/cky/OOVListTest.gold  |  3 ++
 .../apache/joshua/decoder/cky/OOVListTest.in    |  3 ++
 .../apache/joshua/decoder/cky/OOVListTest.java  | 34 ++++++--------------
 .../cky/SourceAnnotationsNotUsingTest.conf      | 27 ++++++++++++++++
 .../decoder/cky/SourceAnnotationsTest.java      | 28 ++++++++--------
 .../decoder/cky/SourceAnnotationsUsingTest.conf | 28 ++++++++++++++++
 .../decoder/cky/UniqueHypothesesTest.config     | 32 ++++++++++++++++++
 .../decoder/cky/UniqueHypothesesTest.java       | 19 +++++------
 9 files changed, 152 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.conf
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.conf b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.conf
new file mode 100644
index 0000000..d045e22
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.conf
@@ -0,0 +1,28 @@
+mark_oovs = true
+default_non_terminal = X
+goalSymbol = GOAL
+pop_limit = 100
+use_unique_nbest = true
+use_tree_nbest = false
+top_n = 1
+oov_list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203
+output_format = %s ||| %f ||| %c
+
+feature_functions = [
+  { class = LanguageModel, lm_type = kenlm, lm_order = 5, lm_file = src/test/resources/decoder/oov-list/lm.gz }
+  { class = WordPenalty }
+  { class = OOVPenalty }
+]
+
+grammars = [
+  { class = TextGrammar, owner = phrase, span_limit = 20, path = src/test/resources/decoder/oov-list/grammar }
+  { class = TextGrammar, owner = glue, span_limit = -1, path = src/test/resources/decoder/oov-list/glue-grammar }
+]
+
+weights = {
+  OOVPenalty = 1.0
+  WordPenalty = -3.6942747832593694
+  glue_0 = -1
+  lm_0 = 1.2373676802179452
+  phrase_0 = -1
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.gold
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.gold b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.gold
new file mode 100644
index 0000000..7cebb7b
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.gold
@@ -0,0 +1,3 @@
+Goats eat cheese ||| phrase_0=0.000000 lm_0=-16.587435 glue_0=-1.000000 WordPenalty=-2.171473 ||| -11.503
+i will go home ||| phrase_0=0.000000 lm_0=-12.155182 glue_0=-1.000000 WordPenalty=-2.605767 ||| -4.414
+goets_OOV eet_OOV cheez_OOV ||| phrase_0=0.000000 lm_0=-17.699974 glue_0=-1.000000 WordPenalty=-2.171473 OOVPenalty=-7.749294 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.in
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.in b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.in
new file mode 100644
index 0000000..23fd45a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.in
@@ -0,0 +1,3 @@
+CHEESE GOATS EAT
+3 1 4 2
+goets eet cheez

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
index 86282bf..394d806 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/OOVListTest.java
@@ -18,20 +18,17 @@
  */
 package org.apache.joshua.decoder.cky;
 
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
+import static com.typesafe.config.ConfigFactory.parseResources;
+import static org.apache.joshua.decoder.cky.TestUtil.decodeAndAssertDecodedOutputEqualsGold;
 
 import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.Test;
 
+import com.typesafe.config.Config;
+
 public class OOVListTest {
-  private JoshuaConfiguration joshuaConfig;
   private Decoder decoder;
 
   @AfterMethod
@@ -44,23 +41,12 @@ public class OOVListTest {
 
   @Test
   public void givenInput_whenDecodingWithOOVList_thenScoreAndTranslationCorrect() throws Exception {
-    // Given
-    List<String> inputStrings = loadStringsFromFile(
-        "src/test/resources/decoder/oov-list/input.txt");
-
-    // When
-    configureDecoder("src/test/resources/decoder/oov-list/joshua.config");
-    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-    // Then
-    List<String> goldStrings = loadStringsFromFile(
-        "src/test/resources/decoder/oov-list/output.gold");
-    assertEquals(decodedStrings, goldStrings);
-  }
+    String inputPath = this.getClass().getResource("OOVListTest.in").getFile();
+    String goldPath = this.getClass().getResource("OOVListTest.gold").getFile();
+    Config config = parseResources(this.getClass(), "OOVListTest.conf")
+        .withFallback(Decoder.getDefaultFlags());
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
 
-  public void configureDecoder(String pathToConfig) throws Exception {
-    joshuaConfig = new JoshuaConfiguration();
-    joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+    decodeAndAssertDecodedOutputEqualsGold(inputPath, decoder, goldPath);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsNotUsingTest.conf
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsNotUsingTest.conf b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsNotUsingTest.conf
new file mode 100644
index 0000000..a37e9b0
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsNotUsingTest.conf
@@ -0,0 +1,27 @@
+default_non_terminal = X
+goal_symbol = GOAL
+mark_oovs = true
+pop_limit = 100
+top_n = 1
+use_unique_nbest = true
+output_format = %s ||| %f ||| %c
+include_align_index = false
+feature_function = OOVPenalty
+feature_function = WordPenalty
+
+feature_functions = [
+  { class = LanguageModel, lm_type = kenlm, lm_order = 5, lm_file = src/test/resources/decoder/source-annotations/lm.kenlm }
+]
+
+grammars = [
+  { class = TextGrammar, owner = pt, span_limit = 20, path = src/test/resources/decoder/source-annotations/grammar }
+  { class = TextGrammar, owner = glue, span_limit = -1, path = src/test/resources/decoder/source-annotations/grammar.glue }
+]
+
+weights = {
+  OOVPenalty = 1
+  WordPenalty = -1.5244636836685694
+  glue_0 = -0.1663815584150378
+  lm_0 = 1.0
+  pt_0 = -0.049141264495762726
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
index 28fc9e3..ee0f7aa 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsTest.java
@@ -18,45 +18,43 @@
  */
 package org.apache.joshua.decoder.cky;
 
+import static com.typesafe.config.ConfigFactory.parseResources;
 import static org.apache.joshua.decoder.cky.TestUtil.translate;
 import static org.testng.Assert.assertEquals;
 
 import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.Test;
 
+import com.typesafe.config.Config;
+
 public class SourceAnnotationsTest {
 
   private static final String INPUT = "mis[tag=ADJ;num=PL;class=OOV] amigos me llaman";
   private static final String GOLD_WITHOUT_ANNOTATIONS = "my friends call me ||| pt_0=3.000000 lm_0=-11.973694 glue_0=-3.000000 WordPenalty=-2.605767 ||| -7.650";
   private static final String GOLD_WITH_ANNOTATIONS = "my friends call me ||| pt_0=3.000000 lm_0=-111.512733 glue_0=-3.000000 WordPenalty=-2.605767 ||| -107.189";
 
-  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/source-annotations/joshua.config";
-
-  private JoshuaConfiguration joshuaConfig;
   private Decoder decoder;
 
   @Test
   public void givenInput_whenNotUsingSourceAnnotations_thenOutputCorrect() throws Exception {
-    setUp(false);
-    String output = translate(INPUT, decoder, joshuaConfig);
+    Config config = parseResources(this.getClass(), "SourceAnnotationsNotUsingTest.conf")
+        .withFallback(Decoder.getDefaultFlags());
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
+
+    String output = translate(INPUT, decoder);
     assertEquals(output.trim(), GOLD_WITHOUT_ANNOTATIONS);
   }
 
   @Test
   public void givenInput_whenUsingSourceAnnotations_thenOutputCorrect() throws Exception {
-    setUp(true);
-    String output = translate(INPUT, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
-  }
+    Config config = parseResources(this.getClass(), "SourceAnnotationsUsingTest.conf")
+        .withFallback(Decoder.getDefaultFlags());
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
 
-  public void setUp(boolean sourceAnnotations) throws Exception {
-    joshuaConfig = new JoshuaConfiguration();
-    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
-    joshuaConfig.source_annotations = sourceAnnotations;
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+    String output = translate(INPUT, decoder);
+    assertEquals(output.trim(), GOLD_WITH_ANNOTATIONS);
   }
 
   @AfterMethod

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsUsingTest.conf
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsUsingTest.conf b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsUsingTest.conf
new file mode 100644
index 0000000..7533ee0
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/SourceAnnotationsUsingTest.conf
@@ -0,0 +1,28 @@
+default_non_terminal = X
+goal_symbol = GOAL
+mark_oovs = true
+pop_limit = 100
+top_n = 1
+use_unique_nbest = true
+output_format = %s ||| %f --- %c
+include_align_index = false
+feature_function = OOVPenalty
+feature_function = WordPenalty
+source_annotations = true
+
+feature_functions = [
+  { class = LanguageModel, lm_type = kenlm, lm_order = 5, lm_file = src/test/resources/decoder/source-annotations/lm.kenlm }
+]
+
+grammars = [
+  { class = TextGrammar, owner = pt, span_limit = 20, path = src/test/resources/decoder/source-annotations/grammar }
+  { class = TextGrammar, owner = glue, span_limit = -1, path = src/test/resources/decoder/source-annotations/grammar.glue }
+]
+
+weights = {
+  OOVPenalty = 1
+  WordPenalty = -1.5244636836685694
+  glue_0 = -0.1663815584150378
+  lm_0 = 1.0
+  pt_0 = -0.049141264495762726
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.config b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.config
new file mode 100644
index 0000000..b7172ef
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.config
@@ -0,0 +1,32 @@
+default_non_terminal = X
+goal_symbol = GOAL
+mark_oovs = false
+pop_limit = 100
+top_n = 300
+use_unique_nbest = true
+output_format = %s
+include_align_index = false
+feature_function = Distortion
+feature_function = PhrasePenalty
+
+feature_functions = [
+  { class = StateMinimizingLanguageModel, lm_type = kenlm, lm_order = 5, lm_file = src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz }
+  { class = OOVPenalty }
+  { class = WordPenalty }
+]
+
+grammars = [
+  { class = TextGrammar, owner = pt, span_limit = 0, path = src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz }
+]
+
+weights = {
+  Distortion = 1.0
+  OOVPenalty = 1.0
+  PhrasePenalty = 1.0
+  WordPenalty = -2.844814
+  lm_0 = 1.0
+  tm_pt_0 = 1.0
+  tm_pt_1 = 1.0
+  tm_pt_2 = 1.0
+  tm_pt_3 = 1.0
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3ed109af/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 825de45..6e163e5 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.joshua.decoder.cky;
 
+import static com.typesafe.config.ConfigFactory.parseResources;
 import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
 import static org.testng.Assert.assertEquals;
 
@@ -27,11 +28,12 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.Test;
 
+import com.typesafe.config.Config;
+
 /**
  * Ensures that derivations are unique for the phrase-based decoder.
  */
@@ -39,15 +41,16 @@ public class UniqueHypothesesTest {
 
   public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
 
-  private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;
 
   @Test
   public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
       throws Exception {
-    configureDecoder("src/test/resources/decoder/phrase/unique-hypotheses/joshua.config");
-    List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder,
-        joshuaConfig);
+    Config config = parseResources(this.getClass(), "UniqueHypothesesTest.config")
+        .withFallback(Decoder.getDefaultFlags());
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
+
+    List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder);
 
     assertEquals(decodedStrings.size(), 300);
 
@@ -57,12 +60,6 @@ public class UniqueHypothesesTest {
     assertEquals(decodedStrings.size(), uniqueDecodedStrings.size());
   }
 
-  public void configureDecoder(String pathToConfig) throws Exception {
-    joshuaConfig = new JoshuaConfiguration();
-    joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
-  }
-
   @AfterMethod
   public void tearDown() throws Exception {
     if (decoder != null) {