You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/08 19:32:46 UTC

[2/3] incubator-joshua git commit: restored phrase-based constrained decoding

restored phrase-based constrained decoding


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/938ab944
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/938ab944
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/938ab944

Branch: refs/heads/master
Commit: 938ab944205f1171cecfa9ec9b4cbcdfe3eb22a4
Parents: cb37624
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 8 12:55:04 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 8 12:55:04 2016 -0400

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/phrase/Stack.java | 24 ++++++++++------
 .../resources/decoder/phrase/constrained/config | 29 --------------------
 .../decoder/phrase/constrained/joshua.config    | 29 ++++++++++++++++++++
 .../decoder/phrase/constrained/test.sh          |  2 +-
 4 files changed, 45 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index a16d9fe..0ff025f 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -26,6 +26,7 @@ import java.util.PriorityQueue;
 import java.util.Set;
 
 import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -143,24 +144,29 @@ public class Stack extends ArrayList<Hypothesis> {
 
     // Constrained decoding
     if (sentence.target() != null) {
-      throw new RuntimeException("* FATAL! Constrained decoding no longer works for the new phrase format");
-      // TODO: fix constrained decoding
-      /*
-      String oldWords = cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] ",  "");
-      String newWords = cand.getRule().getEnglishWords().replace("[X,1] ",  "");
+
+      /* Get the rule. If if it's a swap or monolingual rule, find the right backpointer */
+      Rule rule = cand.getHypothesis().getRule();
+      if (rule == Hypothesis.MONO_RULE)
+        rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(1).bestHyperedge.getRule();
+      else if (rule == Hypothesis.SWAP_RULE)
+        rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(0).bestHyperedge.getRule();
+      String oldWords = rule.getEnglishWords();
+
+      String newWords = cand.getPhraseRule().getEnglishWords();
           
+      boolean allowed = sentence.fullTarget().contains(oldWords + " " + newWords);
+      
       // If the string is not found in the target sentence, explore the cube neighbors
-      if (!sentence.fullTarget().contains(oldWords + " " + newWords)) {
+      if (! allowed) {
         Candidate next = cand.extendPhrase();
         if (next != null)
           addCandidate(next); 
+        
         return;
       }
-      */
     }
 
-    // TODO: sourcepath
-    
     candidates.add(cand);
   }
   

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/config b/src/test/resources/decoder/phrase/constrained/config
deleted file mode 100644
index be45e0a..0000000
--- a/src/test/resources/decoder/phrase/constrained/config
+++ /dev/null
@@ -1,29 +0,0 @@
-tm = moses pt 0 ../decode/rules.1.gz
-
-lm = kenlm 5 true false 100 ../decode/lm.1.gz
-
-mark-oovs = false
-pop-limit = 10
-top-n = 5
-
-output-format = %i ||| %s ||| %f ||| %c
-
-include-align-index = false
-reordering-limit = 10
-use-unique-nbest = false
-
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
-
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-tm_pt_0 0.0370068
-tm_pt_1 0.0495759
-tm_pt_2 0.196742
-tm_pt_3 0.0745423
-lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/joshua.config b/src/test/resources/decoder/phrase/constrained/joshua.config
new file mode 100644
index 0000000..be45e0a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/joshua.config
@@ -0,0 +1,29 @@
+tm = moses pt 0 ../decode/rules.1.gz
+
+lm = kenlm 5 true false 100 ../decode/lm.1.gz
+
+mark-oovs = false
+pop-limit = 10
+top-n = 5
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 10
+use-unique-nbest = false
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/test.sh b/src/test/resources/decoder/phrase/constrained/test.sh
index 7703aa4..6bef145 100755
--- a/src/test/resources/decoder/phrase/constrained/test.sh
+++ b/src/test/resources/decoder/phrase/constrained/test.sh
@@ -17,7 +17,7 @@
 #
 set -u
 
-cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
 
 # Compare
 diff -u output output.gold > diff