You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/08 19:32:46 UTC
[2/3] incubator-joshua git commit: restored phrase-based constrained
decoding
restored phrase-based constrained decoding
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/938ab944
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/938ab944
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/938ab944
Branch: refs/heads/master
Commit: 938ab944205f1171cecfa9ec9b4cbcdfe3eb22a4
Parents: cb37624
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 8 12:55:04 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 8 12:55:04 2016 -0400
----------------------------------------------------------------------
.../org/apache/joshua/decoder/phrase/Stack.java | 24 ++++++++++------
.../resources/decoder/phrase/constrained/config | 29 --------------------
.../decoder/phrase/constrained/joshua.config | 29 ++++++++++++++++++++
.../decoder/phrase/constrained/test.sh | 2 +-
4 files changed, 45 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index a16d9fe..0ff025f 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -26,6 +26,7 @@ import java.util.PriorityQueue;
import java.util.Set;
import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -143,24 +144,29 @@ public class Stack extends ArrayList<Hypothesis> {
// Constrained decoding
if (sentence.target() != null) {
- throw new RuntimeException("* FATAL! Constrained decoding no longer works for the new phrase format");
- // TODO: fix constrained decoding
- /*
- String oldWords = cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] ", "");
- String newWords = cand.getRule().getEnglishWords().replace("[X,1] ", "");
+
+ /* Get the rule. If if it's a swap or monolingual rule, find the right backpointer */
+ Rule rule = cand.getHypothesis().getRule();
+ if (rule == Hypothesis.MONO_RULE)
+ rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(1).bestHyperedge.getRule();
+ else if (rule == Hypothesis.SWAP_RULE)
+ rule = cand.getHypothesis().bestHyperedge.getTailNodes().get(0).bestHyperedge.getRule();
+ String oldWords = rule.getEnglishWords();
+
+ String newWords = cand.getPhraseRule().getEnglishWords();
+ boolean allowed = sentence.fullTarget().contains(oldWords + " " + newWords);
+
// If the string is not found in the target sentence, explore the cube neighbors
- if (!sentence.fullTarget().contains(oldWords + " " + newWords)) {
+ if (! allowed) {
Candidate next = cand.extendPhrase();
if (next != null)
addCandidate(next);
+
return;
}
- */
}
- // TODO: sourcepath
-
candidates.add(cand);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/config b/src/test/resources/decoder/phrase/constrained/config
deleted file mode 100644
index be45e0a..0000000
--- a/src/test/resources/decoder/phrase/constrained/config
+++ /dev/null
@@ -1,29 +0,0 @@
-tm = moses pt 0 ../decode/rules.1.gz
-
-lm = kenlm 5 true false 100 ../decode/lm.1.gz
-
-mark-oovs = false
-pop-limit = 10
-top-n = 5
-
-output-format = %i ||| %s ||| %f ||| %c
-
-include-align-index = false
-reordering-limit = 10
-use-unique-nbest = false
-
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
-
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-tm_pt_0 0.0370068
-tm_pt_1 0.0495759
-tm_pt_2 0.196742
-tm_pt_3 0.0745423
-lm_0 0.204412452147565
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/joshua.config b/src/test/resources/decoder/phrase/constrained/joshua.config
new file mode 100644
index 0000000..be45e0a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/joshua.config
@@ -0,0 +1,29 @@
+tm = moses pt 0 ../decode/rules.1.gz
+
+lm = kenlm 5 true false 100 ../decode/lm.1.gz
+
+mark-oovs = false
+pop-limit = 10
+top-n = 5
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 10
+use-unique-nbest = false
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/938ab944/src/test/resources/decoder/phrase/constrained/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/test.sh b/src/test/resources/decoder/phrase/constrained/test.sh
index 7703aa4..6bef145 100755
--- a/src/test/resources/decoder/phrase/constrained/test.sh
+++ b/src/test/resources/decoder/phrase/constrained/test.sh
@@ -17,7 +17,7 @@
#
set -u
-cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
# Compare
diff -u output output.gold > diff