You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/19 13:37:46 UTC

incubator-joshua git commit: UniqueDecoding fixed [Forced Update!]

Repository: incubator-joshua
Updated Branches:
  refs/heads/7_confsystem 039ecd59b -> 531ba99d9 (forced update)


UniqueDecoding fixed

converted rule file from Moses format, added search_algorithm = stack, changed TextGrammar \u2192 PhraseTable.

Also fixed the config file for search_algorithm


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/531ba99d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/531ba99d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/531ba99d

Branch: refs/heads/7_confsystem
Commit: 531ba99d9678afaf3b586b9d61835f4a1b7aab80
Parents: 0c28fef
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Sep 19 09:35:25 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Sep 19 09:37:24 2016 -0400

----------------------------------------------------------------------
 .../decoder/cky/UniqueHypothesesTest.conf       |  11 ++++++-----
 .../decoder/cky/UniqueHypothesesTest.java       |  15 +++++++++++----
 .../decoder/phrase/unique-hypotheses/rules.1.gz | Bin 2998042 -> 3799317 bytes
 scripts/compat/sevenize_my_conf_plz.py          |   2 +-
 4 files changed, 18 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/531ba99d/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.conf
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.conf b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.conf
index 2300ff8..4c881a6 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.conf
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.conf
@@ -6,6 +6,7 @@ top_n = 300
 use_unique_nbest = true
 output_format = %s
 include_align_index = false
+search_algorithm = stack
 
 feature_functions = [
   { class = StateMinimizingLanguageModel, lm_type = kenlm, lm_order = 5, lm_file = src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz }
@@ -16,7 +17,7 @@ feature_functions = [
 ]
 
 grammars = [
-  { class = TextGrammar, owner = pt, span_limit = 0, path = src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz }
+  { class = PhraseTable, owner = pt, span_limit = 0, path = src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz }
 ]
 
 weights = {
@@ -25,8 +26,8 @@ weights = {
   PhrasePenalty = 1.0
   WordPenalty = -2.844814
   lm_0 = 1.0
-  tm_pt_0 = 1.0
-  tm_pt_1 = 1.0
-  tm_pt_2 = 1.0
-  tm_pt_3 = 1.0
+  pt_0 = 1.0
+  pt_1 = 1.0
+  pt_2 = 1.0
+  pt_3 = 1.0
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/531ba99d/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
index 5ad4b64..7af8ec8 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/UniqueHypothesesTest.java
@@ -22,6 +22,7 @@ import static com.typesafe.config.ConfigFactory.parseResources;
 import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
 import static org.testng.Assert.assertEquals;
 
+import java.io.File;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
@@ -30,9 +31,11 @@ import java.util.Set;
 import org.apache.joshua.decoder.Decoder;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;
 
 import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
 
 /**
  * Ensures that derivations are unique for the phrase-based decoder.
@@ -42,14 +45,18 @@ public class UniqueHypothesesTest {
   public static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
 
   private Decoder decoder = null;
-
-  @Test
-  public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
-      throws Exception {
+  
+  @BeforeMethod
+  public void setUp() throws Exception {
     Config config = parseResources(this.getClass(), "UniqueHypothesesTest.conf")
         .withFallback(Decoder.getDefaultFlags());
     KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
+  }
 
+  @Test
+  public void givenInputSentence_whenDecodingWithUniqueHypotheses_thenAllHypothesesUnique()
+      throws Exception {
+    
     List<String> decodedStrings = decodeList(Arrays.asList(new String[] { INPUT }), decoder);
 
     assertEquals(decodedStrings.size(), 300);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/531ba99d/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz b/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
index 14466e9..57a9cb2 100644
Binary files a/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz and b/joshua-core/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/531ba99d/scripts/compat/sevenize_my_conf_plz.py
----------------------------------------------------------------------
diff --git a/scripts/compat/sevenize_my_conf_plz.py b/scripts/compat/sevenize_my_conf_plz.py
index 550872a..bd930dc 100755
--- a/scripts/compat/sevenize_my_conf_plz.py
+++ b/scripts/compat/sevenize_my_conf_plz.py
@@ -17,7 +17,7 @@ tms = []
 features = []
 
 def smooth_key(key):
-    return key.replace('-', '_').replace('maxspan', 'span_limit')
+    return key.replace('-', '_').replace('maxspan', 'span_limit').replace('search', 'search_algorithm')
 
 def moses_phrasetable_error():
     sys.stderr.write('MOSES phrase table format (tm keyword "moses") is no longer support')