You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/20 02:43:25 UTC
[01/15] incubator-joshua git commit: large commit converting
phrase-based decoding to new rule format
Repository: incubator-joshua
Updated Branches:
refs/heads/JOSHUA-284 [created] 16d5647be
large commit converting phrase-based decoding to new rule format
Not working yet, but much of the code is redone and future estimates are being computed correctly
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dcc7e7ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dcc7e7ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dcc7e7ee
Branch: refs/heads/JOSHUA-284
Commit: dcc7e7ee72228de08b70003a49344c2614eaedbe
Parents: fcaf0bf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:13:06 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:13:06 2016 -0400
----------------------------------------------------------------------
.gitignore | 1 +
.../decoder/ff/tm/format/MosesFormatReader.java | 13 +--
.../apache/joshua/decoder/phrase/Candidate.java | 103 +++++++++++++------
.../apache/joshua/decoder/phrase/Future.java | 9 +-
.../apache/joshua/decoder/phrase/Header.java | 87 ----------------
.../joshua/decoder/phrase/Hypothesis.java | 48 ++++++---
.../joshua/decoder/phrase/PhraseTable.java | 8 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 15 ++-
.../apache/joshua/decoder/phrase/Stacks.java | 18 +++-
.../joshua/decoder/phrase/TargetPhrases.java | 1 +
.../org/apache/joshua/tools/GrammarPacker.java | 8 +-
11 files changed, 136 insertions(+), 175 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index d3d311e..0d42974 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,3 +57,4 @@ build
.classpath
/target/
.project
+/doc/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 7811b3b..cdf2170 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -63,22 +63,15 @@ public class MosesFormatReader extends HieroFormatReader {
*
* becomes
*
- * [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
+ * [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
*
- * For thrax-extracted phrasal grammars, it transforms
- *
- * [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
- *
- * into
- *
- * [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
+ * For thrax-extracted phrasal grammars, no transformation is needed.
*/
@Override
public Rule parseLine(String line) {
String[] fields = line.split(Constants.fieldDelimiter);
- String nt = FormatUtils.cleanNonTerminal(Constants.defaultNT);
- StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| [" + nt + ",1] " + fields[0] + " ||| [" + nt + ",1] " + fields[1] + " |||");
+ StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| " + fields[0] + " ||| " + fields[1] + " |||");
String mosesFeatureString = fields[2];
for (String value: mosesFeatureString.split(" ")) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index ee8a2a9..2abe560 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -19,9 +19,17 @@
package org.apache.joshua.decoder.phrase;
/***
- * A candidate is basically a cube prune state. It contains a list of hypotheses and target
- * phrases, and an instantiated candidate is a pair of indices that index these two lists. This
- * is the "cube prune" position.
+ * A candidate represents a translation hypothesis that may possibly be added to the translation
+ * hypergraph. It groups together (a) a set of translation hypotheses all having the same coverage
+ * vector and (b) a set of compatible phrase extensions that all cover the same source span. A
+ * Candidate object therefore denotes a particular precise coverage vector. When a Candidate is
+ * instantiated, it has values in ranks[] that are indices into these two lists representing
+ * the current cube prune state.
+ *
+ * For any particular (previous hypothesis) x (translation option) combination (a selection from
+ * both lists), there is no guarantee about whether this is a (m)onotonic, (s)wap, or (d)iscontinuous
+ * rule application. This must be inferred from the span (recording the portion of the input being
+ * translated) and the last index of the previous hypothesis under consideration.
*/
import java.util.ArrayList;
@@ -30,27 +38,41 @@ import java.util.List;
import org.apache.joshua.corpus.Span;
import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
public class Candidate {
-
+
+ private List<FeatureFunction> featureFunctions;
+ private Sentence sentence;
+
+ // source span of new phrase
+ public Span span;
+
// the set of hypotheses that can be paired with phrases from this span
private List<Hypothesis> hypotheses;
// the list of target phrases gathered from a span of the input
private TargetPhrases phrases;
-
- // source span of new phrase
- public Span span;
// future cost of applying phrases to hypotheses
- float future_delta;
+ private float future_delta;
// indices into the hypotheses and phrases arrays (used for cube pruning)
private int[] ranks;
+ // the reordering rule used by an instantiated Candidate
+ private Rule rule;
+
+ // the HGNode built over the current target side phrase
+ private HGNode phraseNode;
+
+ // the cost of the current configuration
+ private ComputeNodeResult computedResult;
+
// scoring and state information
private ComputeNodeResult result;
@@ -96,22 +118,27 @@ public class Candidate {
ranks[0], hypotheses.size(), ranks[1], phrases.size(),
getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] ",""), getSpan());
}
-
- public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta) {
- this.hypotheses = hypotheses;
- this.phrases = phrases;
- this.span = span;
- this.future_delta = delta;
- this.ranks = new int[] { 0, 0 };
- }
- public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+ public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence,
+ List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
this.hypotheses = hypotheses;
this.phrases = phrases;
this.span = span;
this.future_delta = delta;
this.ranks = ranks;
+ this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
// this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
+ this.phraseNode = null;
+ }
+
+ /**
+ * Determines whether the current previous hypothesis extended with the currently selected
+ * phrase represents a straight or inverted rule application.
+ *
+ * @return
+ */
+ private boolean isMonotonic() {
+ return getHypothesis().getLastSourceIndex() < span.start;
}
/**
@@ -131,7 +158,7 @@ public class Candidate {
*/
public Candidate extendHypothesis() {
if (ranks[0] < hypotheses.size() - 1) {
- return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
+ return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
}
return null;
}
@@ -143,7 +170,7 @@ public class Candidate {
*/
public Candidate extendPhrase() {
if (ranks[1] < phrases.size() - 1) {
- return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
+ return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
}
return null;
@@ -170,13 +197,24 @@ public class Candidate {
}
/**
- * This returns the target side {@link org.apache.joshua.corpus.Phrase}, which is a {@link org.apache.joshua.decoder.ff.tm.Rule} object. This is just a
- * convenience function that works by returning the phrase indexed in ranks[1].
+ * This returns a new Hypothesis (HGNode) representing the phrase being added, i.e., a terminal
+ * production in the hypergraph. The score and DP state are computed only here on demand.
+ *
+ * @return a new hypergraph node representing the phrase translation
+ */
+ public HGNode getPhraseNode() {
+ ComputeNodeResult result = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
+ phraseNode = new HGNode(-1, span.end, rule.getLHS(), result.getDPStates(), null, result.getPruningEstimate());
+ return phraseNode;
+ }
+
+ /**
+ * This returns the rule being applied (straight or inverted)
*
* @return the phrase at position ranks[1]
*/
public Rule getRule() {
- return phrases.get(ranks[1]);
+ return this.rule;
}
/**
@@ -187,7 +225,13 @@ public class Candidate {
*/
public List<HGNode> getTailNodes() {
List<HGNode> tailNodes = new ArrayList<HGNode>();
- tailNodes.add(getHypothesis());
+ if (isMonotonic()) {
+ tailNodes.add(getHypothesis());
+ tailNodes.add(getPhraseNode());
+ } else {
+ tailNodes.add(getPhraseNode());
+ tailNodes.add(getHypothesis());
+ }
return tailNodes;
}
@@ -202,13 +246,8 @@ public class Candidate {
return cov;
}
- /**
- * Sets the result of a candidate (TODO should just be moved to the constructor).
- *
- * @param result todo
- */
- public void setResult(ComputeNodeResult result) {
- this.result = result;
+ public ComputeNodeResult getResult() {
+ return computedResult;
}
/**
@@ -234,8 +273,4 @@ public class Candidate {
public List<DPState> getStates() {
return result.getDPStates();
}
-
- public ComputeNodeResult getResult() {
- return result;
- }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 0ece4a3..572aa64 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -43,7 +43,7 @@ public class Future {
sentlen = chart.SentenceLength();
entries = new ChartSpan<Float>(sentlen + 1, Float.NEGATIVE_INFINITY);
-
+
/*
* The sentence is represented as a sequence of words, with the first and last words set
* to <s> and </s>. We start indexing at 1 because the first word (<s>) is always covered.
@@ -68,7 +68,7 @@ public class Future {
// All the phrases are in, now do minimum dynamic programming. Lengths 0 and 1 were already handled above.
for (int length = 2; length <= chart.SentenceLength(); length++) {
- for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
+ for (int begin = 1; begin < chart.SentenceLength() - length; begin++) {
for (int division = begin + 1; division < begin + length; division++) {
setEntry(begin, begin + length, Math.max(getEntry(begin, begin + length), getEntry(begin, division) + getEntry(division, begin + length)));
}
@@ -106,14 +106,13 @@ public class Future {
private float getEntry(int begin, int end) {
assert end >= begin;
- assert end < this.sentlen;
+ assert end <= this.sentlen;
return entries.get(begin, end);
}
private void setEntry(int begin, int end, float value) {
assert end >= begin;
- assert end < this.sentlen;
- // System.err.println(String.format("future cost from %d to %d is %.5f", begin, end, value));
+ assert end <= this.sentlen;
entries.set(begin, end, value);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Header.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Header.java b/src/main/java/org/apache/joshua/decoder/phrase/Header.java
deleted file mode 100644
index 30d771c..0000000
--- a/src/main/java/org/apache/joshua/decoder/phrase/Header.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.phrase;
-
-// PORT: done
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Comparator;
-
-public class Header implements Comparable<Header>, Comparator<Header> {
-
- private static final Logger LOG = LoggerFactory.getLogger(Header.class);
-
- private float score;
- private int arity;
- private Note note;
-
- protected Header() {
- score = 0.0f;
- arity = 0;
- note = null;
- }
-
- protected Header(Header other) {
- this.score = other.GetScore();
- this.arity = other.GetArity();
- this.note = other.GetNote();
- }
-
- protected Header(int arity) {
- this.score = 0.0f;
- this.arity = arity;
- this.note = new Note();
- }
-
- public boolean Valid() {
- // C++: return base_;
- LOG.debug("Header::Valid(): {}", (note != null));
- return note != null;
- }
-
- public float GetScore() {
- return score;
- }
-
- public void SetScore(float score) {
- this.score = score;
- }
-
- public int GetArity() { return arity; }
-
- public Note GetNote() { return note; }
-
- public void SetNote(Note note) { this.note = note; }
-
- @Override
- public int compareTo(Header other) {
- if (this.GetScore() < other.GetScore())
- return -1;
- else if (this.GetScore() > other.GetScore())
- return 1;
- return 0;
- }
-
- @Override
- public int compare(Header arg0, Header arg1) {
- return arg0.compareTo(arg1);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 71d3df9..f87b728 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -18,9 +18,8 @@
*/
package org.apache.joshua.decoder.phrase;
-import java.util.List;
+import java.util.List;
-import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
@@ -28,10 +27,12 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.hypergraph.HyperEdge;
/**
- * Represents a hypothesis, a translation of some coverage of the input. Extends {@link org.apache.joshua.decoder.hypergraph.HGNode},
- * through a bit of a hack. Whereas (i,j) represents the span of an {@link org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
- * and j is overloaded to denote the span of the phrase being applied. The complete coverage vector
- * can be obtained by looking at the tail pointer and casting it.
+ * Represents a hypothesis, a translation of some subset of the input sentence. Extends
+ * {@link org.apache.joshua.decoder.hypergraph.HGNode}, through a bit of a hack. Whereas (i,j)
+ * represents the span of an {@link org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
+ * and j is overloaded to denote the index into the source string of the end of the last phrase that
+ * was applied. The complete coverage vector can be obtained by looking at the tail pointer and
+ * casting it.
*
* @author Kenneth Heafield
* @author Matt Post post@cs.jhu.edu
@@ -41,9 +42,11 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
// The hypothesis' coverage vector
private Coverage coverage;
- public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
- public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
-
+ public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> ||| ||| 0-0");
+ public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| ||| 0-0 1-1");
+ public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| ||| 0-0 1-1");
+ public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] ||| ||| 0-1 1-0");
+
public String toString() {
StringBuffer sb = new StringBuffer();
for (DPState state: getDPStates())
@@ -55,18 +58,25 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
// Initialize root hypothesis. Provide the LM's BeginSentence.
public Hypothesis(List<DPState> states, float futureCost) {
- super(0, 1, Vocabulary.id("[X]"), states,
+ super(0, 1, BEGIN_RULE.getLHS(), states,
new HyperEdge(BEGIN_RULE, 0.0f, 0.0f, null, null), futureCost);
this.coverage = new Coverage(1);
}
+ /**
+ * This creates a hypothesis from a Candidate object
+ *
+ * @param cand the candidate
+ */
public Hypothesis(Candidate cand) {
// TODO: sourcepath
- super(-1, cand.span.end, Vocabulary.id("[X]"), cand.getStates(), new HyperEdge(
- cand.getRule(), cand.getResult().getViterbiCost(), cand.getResult().getTransitionCost(),
- cand.getTailNodes(), null), cand.score());
+ super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(),
+ new HyperEdge(cand.getRule(), cand.getResult().getViterbiCost(),
+ cand.getResult().getTransitionCost(),
+ cand.getTailNodes(), null), cand.score());
this.coverage = cand.getCoverage();
}
+
// Extend a previous hypothesis.
public Hypothesis(List<DPState> states, float score, Hypothesis previous, int source_end, Rule target) {
@@ -74,6 +84,10 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
this.coverage = previous.coverage;
}
+ public Hypothesis(int lastSourceIndex, int lhs, List<DPState> states) {
+ super(-1, lastSourceIndex, lhs, states, null, 0.0f);
+ }
+
public Coverage getCoverage() {
return coverage;
}
@@ -86,16 +100,16 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
* HGNodes (designed for chart parsing) maintain a span (i,j). We overload j
* here to record the index of the last translated source word.
*
- * @return the int 'j' which is overloaded to denote the span of the phrase being applied
+ * @return the index of the last translated source word
*/
- public int LastSourceIndex() {
+ public int getLastSourceIndex() {
return j;
}
@Override
public int hashCode() {
int hash = 0;
- hash = 31 * LastSourceIndex() + 19 * getCoverage().hashCode();
+ hash = 31 * getLastSourceIndex() + 19 * getCoverage().hashCode();
if (null != dpStates && dpStates.size() > 0)
for (DPState dps: dpStates)
hash *= 57 + dps.hashCode();
@@ -112,7 +126,7 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
if (obj instanceof Hypothesis) {
Hypothesis other = (Hypothesis) obj;
- if (LastSourceIndex() != other.LastSourceIndex() || ! getCoverage().equals(other.getCoverage()))
+ if (getLastSourceIndex() != other.getLastSourceIndex() || ! getCoverage().equals(other.getCoverage()))
return false;
if (dpStates == null)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 312781f..6b237a9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -18,8 +18,6 @@
*/
package org.apache.joshua.decoder.phrase;
-import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
-
import java.io.File;
import java.io.IOException;
import java.util.List;
@@ -80,14 +78,13 @@ public class PhraseTable implements Grammar {
}
/**
- * Returns the longest source phrase read. Because phrases have a dummy nonterminal prepended to
- * them, we need to subtract 1.
+ * Returns the longest source phrase read.
*
* @return the longest source phrase read.
*/
@Override
public int getMaxSourcePhraseLength() {
- return this.backend.getMaxSourcePhraseLength() - 1;
+ return this.backend.getMaxSourcePhraseLength();
}
/**
@@ -99,7 +96,6 @@ public class PhraseTable implements Grammar {
public RuleCollection getPhrases(int[] sourceWords) {
if (sourceWords.length != 0) {
Trie pointer = getTrieRoot();
- pointer = pointer.match(Vocabulary.id("[X]"));
int i = 0;
while (pointer != null && i < sourceWords.length)
pointer = pointer.match(sourceWords[i++]);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index d0ae2da..ad24a51 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -22,13 +22,10 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
-import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -46,7 +43,6 @@ public class Stack extends ArrayList<Hypothesis> {
private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
private Sentence sentence;
- private List<FeatureFunction> featureFunctions;
private JoshuaConfiguration config;
/* The list of states we've already visited. */
@@ -65,8 +61,7 @@ public class Stack extends ArrayList<Hypothesis> {
* @param sentence input for a {@link org.apache.joshua.lattice.Lattice}
* @param config populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
*/
- public Stack(List<FeatureFunction> featureFunctions, Sentence sentence, JoshuaConfiguration config) {
- this.featureFunctions = featureFunctions;
+ public Stack(Sentence sentence, JoshuaConfiguration config) {
this.sentence = sentence;
this.config = config;
@@ -149,6 +144,9 @@ public class Stack extends ArrayList<Hypothesis> {
// Constrained decoding
if (sentence.target() != null) {
+ throw new RuntimeException("* FATAL! Constrained decoding no longer works for the new phrase format");
+ // TODO: fix constrained decoding
+ /*
String oldWords = cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] ", "");
String newWords = cand.getRule().getEnglishWords().replace("[X,1] ", "");
@@ -159,12 +157,10 @@ public class Stack extends ArrayList<Hypothesis> {
addCandidate(next);
return;
}
+ */
}
// TODO: sourcepath
- ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, cand.getRule(),
- cand.getTailNodes(), -1, cand.getSpan().end, null, this.sentence);
- cand.setResult(result);
candidates.add(cand);
}
@@ -199,6 +195,7 @@ public class Stack extends ArrayList<Hypothesis> {
/**
* Adds a popped candidate to the chart / main stack. This is a candidate we have decided to
* keep around.
+ *
* @param complete a completely-initialized translation {@link org.apache.joshua.decoder.phrase.Candidate}
*
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 8c092ec..dc1a692 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -126,13 +126,13 @@ public class Stacks {
// Initialize root hypothesis with <s> context and future cost for everything.
ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, Hypothesis.BEGIN_RULE,
null, -1, 1, null, this.sentence);
- Stack firstStack = new Stack(featureFunctions, sentence, config);
+ Stack firstStack = new Stack(sentence, config);
firstStack.add(new Hypothesis(result.getDPStates(), future.Full()));
stacks.add(firstStack);
// Decode with increasing numbers of source words.
for (int source_words = 2; source_words <= sentence.length(); ++source_words) {
- Stack targetStack = new Stack(featureFunctions, sentence, config);
+ Stack targetStack = new Stack(sentence, config);
stacks.add(targetStack);
// Iterate over stacks to continue from.
@@ -144,7 +144,13 @@ public class Stacks {
LOG.debug("WORDS {} MAX {} (STACK {} phrase_length {})", source_words,
chart.MaxSourcePhraseLength(), from_stack, phrase_length);
- // Iterate over antecedents in this stack.
+ /* Each from stack groups together lots of different coverage vectors that all cover the
+ * same number of words. We have the number of covered words from from_stack, and the length
+ * of the phrases we are going to add from (source_words - from_stack). We now iterate over
+ * all coverage vectors, finding the set of phrases that can extend each of them, given
+ * the two constraints: the phrase length, and the current coverage vector. These will all
+ * be grouped under the same target stack.
+ */
for (Coverage coverage: tailStack.getCoverages()) {
ArrayList<Hypothesis> hypotheses = tailStack.get(coverage);
@@ -161,6 +167,9 @@ public class Stacks {
continue;
}
+ /* We have found a permissible phrase start point and length, that fits with the current
+ * coverage vector. Record that in a Span.
+ */
Span span = new Span(begin, begin + phrase_length);
// Don't append </s> until the end
@@ -171,7 +180,6 @@ public class Stacks {
if (phrases == null)
continue;
-
LOG.debug("Applying {} target phrases over [{}, {}]",
phrases.size(), begin, begin + phrase_length);
@@ -185,7 +193,7 @@ public class Stacks {
* phrases from that span. The hypotheses are wrapped in HypoState objects, which
* augment the hypothesis score with a future cost.
*/
- Candidate cand = new Candidate(hypotheses, phrases, span, future_delta);
+ Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] {0, 0});
targetStack.addCandidate(cand);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index 05a4b0a..ed1d577 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -59,6 +59,7 @@ public class TargetPhrases extends ArrayList<Rule> {
* Score the rules and sort them. Scoring is necessary because rules are only scored if they
* are used, in an effort to make reading in rules more efficient. This is starting to create
* some trouble and should probably be reworked.
+ *
* @param features a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
* @param weights a populated {@link org.apache.joshua.decoder.ff.FeatureVector}
* @param num_options the number of options
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/GrammarPacker.java b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index b9208d2..b39b775 100644
--- a/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@ -61,9 +61,13 @@ public class GrammarPacker {
* table packing that packed phrases without the [X,1] on the source and target sides, which
* then required special handling in the decoder to use for phrase-based decoding.
*
- *
+ * - 4 (August 2016). Phrase-based decoding rewritten to represent phrases without a builtin
+ * nonterminal. Instead, cost-less glue rules are used in phrase-based decoding. This eliminates
+ * the need for special handling of phrase grammars (except for having to add a LHS), and lets
+ * phrase grammars be used in both hierarchical and phrase-based decoding without conversion.
+ *
*/
- public static final int VERSION = 3;
+ public static final int VERSION = 4;
// Size limit for slice in bytes.
private static int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);
[07/15] incubator-joshua git commit: changed order of assert() args
Posted by mj...@apache.org.
changed order of assert() args
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2e746c18
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2e746c18
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2e746c18
Branch: refs/heads/JOSHUA-284
Commit: 2e746c1864ca7eb6be27f2fca3ab258c9ebe7adb
Parents: 1022699
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:14:18 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:14:18 2016 -0500
----------------------------------------------------------------------
.../apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2e746c18/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 9e9a415..e81b0de 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -64,7 +64,7 @@ public class PhraseDecodingTest {
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
final String translation = decode(INPUT).toString();
final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
- assertEquals(gold, translation);
+ assertEquals(translation, gold);
}
private Translation decode(String input) {
[12/15] incubator-joshua git commit: converted from span -> separate
i, j
Posted by mj...@apache.org.
converted from span -> separate i, j
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/eb002238
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/eb002238
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/eb002238
Branch: refs/heads/JOSHUA-284
Commit: eb00223870c7683cf8e557ab689a1979fb36ec1d
Parents: 5719c8c
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:43:58 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:43:58 2016 -0500
----------------------------------------------------------------------
.../apache/joshua/decoder/phrase/Candidate.java | 33 +++++++++++---------
.../apache/joshua/decoder/phrase/Future.java | 6 ++--
.../joshua/decoder/phrase/Hypothesis.java | 6 +---
.../joshua/decoder/phrase/PhraseChart.java | 5 ++-
.../apache/joshua/decoder/phrase/Stacks.java | 4 ++-
.../joshua/decoder/phrase/TargetPhrases.java | 8 ++++-
6 files changed, 36 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index bd581e3..cb9cd6d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -50,9 +50,6 @@ public class Candidate {
private List<FeatureFunction> featureFunctions;
private Sentence sentence;
- // source span of new phrase
- public Span span;
-
// the set of hypotheses that can be paired with phrases from this span
private List<Hypothesis> hypotheses;
@@ -92,7 +89,7 @@ public class Candidate {
public boolean equals(Object obj) {
if (obj instanceof Candidate) {
Candidate other = (Candidate) obj;
- if (hypotheses != other.hypotheses || phrases != other.phrases || span != other.span)
+ if (hypotheses != other.hypotheses || phrases != other.phrases)
return false;
if (ranks.length != other.ranks.length)
@@ -111,7 +108,6 @@ public class Candidate {
public int hashCode() {
return 17 * hypotheses.size()
+ 23 * phrases.size()
- + 57 * span.hashCode()
+ 117 * Arrays.hashCode(ranks);
// return hypotheses.hashCode() * phrases.hashCode() * span.hashCode() * Arrays.hashCode(ranks);
}
@@ -124,12 +120,11 @@ public class Candidate {
}
public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence,
- List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+ List<Hypothesis> hypotheses, TargetPhrases phrases, float delta, int[] ranks) {
this.featureFunctions = featureFunctions;
this.sentence = sentence;
this.hypotheses = hypotheses;
this.phrases = phrases;
- this.span = span;
this.future_delta = delta;
this.ranks = ranks;
this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.SWAP_RULE;
@@ -148,7 +143,9 @@ public class Candidate {
* @return
*/
private boolean isMonotonic() {
- return getHypothesis().getLastSourceIndex() < span.start;
+// System.err.println(String.format("isMonotonic(); %d < %d -> %s",
+// getLastCovered(), getPhraseEnd(), getLastCovered() < getPhraseEnd()));
+ return getLastCovered() < getPhraseEnd();
}
/**
@@ -168,7 +165,7 @@ public class Candidate {
*/
public Candidate extendHypothesis() {
if (ranks[0] < hypotheses.size() - 1) {
- return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
+ return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0] + 1, ranks[1] });
}
return null;
}
@@ -180,7 +177,7 @@ public class Candidate {
*/
public Candidate extendPhrase() {
if (ranks[1] < phrases.size() - 1) {
- return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
+ return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0], ranks[1] + 1 });
}
return null;
@@ -192,7 +189,7 @@ public class Candidate {
* @return the span object
*/
public Span getSpan() {
- return this.span;
+ return new Span(this.phrases.i, this.phrases.j);
}
/**
@@ -236,13 +233,13 @@ public class Candidate {
public ComputeNodeResult computeResult() {
if (computedResult == null) {
// add the phrase node
- ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, span.start, span.end, null, sentence);
+ ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, phrases.i, phrases.j, null, sentence);
HyperEdge edge = new HyperEdge(getPhraseRule(), phraseResult.getViterbiCost(), phraseResult.getTransitionCost(), null, null);
- phraseNode = new HGNode(-1, span.end, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
+ phraseNode = new HGNode(phrases.i, phrases.j, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
// add the rule
// TODO: sourcepath
- computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), -1, span.end, null, sentence);
+ computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), getLastCovered(), getPhraseEnd(), null, sentence);
}
return computedResult;
@@ -315,4 +312,12 @@ public class Candidate {
public List<DPState> getStates() {
return computeResult().getDPStates();
}
+
+ public int getLastCovered() {
+ return getHypothesis().getLastSourceIndex();
+ }
+
+ public int getPhraseEnd() {
+ return phrases.j;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 572aa64..c411ccb 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -99,8 +99,9 @@ public class Future {
public float Change(Coverage coverage, int begin, int end) {
int left = coverage.leftOpening(begin);
int right = coverage.rightOpening(end, sentlen);
- // System.err.println(String.format("Future::Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", coverage, begin, end, left, right,
- // Entry(left, begin), Entry(end, right), Entry(left, right)));
+// System.err.println(String.format("Future.Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f",
+// coverage, begin, end, left, right,
+// getEntry(left, begin), getEntry(end, right), getEntry(left, right)));
return getEntry(left, begin) + getEntry(end, right) - getEntry(left, right);
}
@@ -113,6 +114,7 @@ public class Future {
private void setEntry(int begin, int end, float value) {
assert end >= begin;
assert end <= this.sentlen;
+// System.err.println(String.format("Future.setEntry(%d, %d) = %f", begin, end, value));
entries.set(begin, end, value);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 132d62d..8ef5597 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -70,7 +70,7 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
*/
public Hypothesis(Candidate cand) {
// TODO: sourcepath
- super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(),
+ super(cand.getLastCovered(), cand.getPhraseEnd(), cand.getRule().getLHS(), cand.getStates(),
new HyperEdge(cand.getRule(), cand.computeResult().getViterbiCost(),
cand.computeResult().getTransitionCost(),
cand.getTailNodes(), null), cand.score());
@@ -84,10 +84,6 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
this.coverage = previous.coverage;
}
- public Hypothesis(int lastSourceIndex, int lhs, List<DPState> states) {
- super(-1, lastSourceIndex, lhs, states, null, 0.0f);
- }
-
public Coverage getCoverage() {
return coverage;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
index 9803d9b..dd5872f 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
@@ -185,9 +185,8 @@ public class PhraseChart {
try {
int offset = offset(begin, end);
if (entries.get(offset) == null)
- entries.set(offset, new TargetPhrases(rules));
- else
- entries.get(offset).addAll(rules);
+ entries.set(offset, new TargetPhrases(begin, end));
+ entries.get(offset).addAll(rules);
} catch (java.lang.IndexOutOfBoundsException e) {
LOG.error("Whoops! {} [{}-{}] too long ({})", to, begin, end, entries.size());
LOG.error(e.getMessage(), e);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index dc1a692..c642226 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -193,7 +193,7 @@ public class Stacks {
* phrases from that span. The hypotheses are wrapped in HypoState objects, which
* augment the hypothesis score with a future cost.
*/
- Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] {0, 0});
+ Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] {0, 0});
targetStack.addCandidate(cand);
}
}
@@ -267,6 +267,8 @@ public class Stacks {
float finalTransitionScore = ComputeNodeResult.computeFinalCost(featureFunctions, tailNodes, 0, sentence.length(), null, sentence);
+ System.err.println(String.format("createGoalNode: final score: %f -> %f", score, finalTransitionScore));
+
if (null == this.end)
this.end = new Hypothesis(null, score + finalTransitionScore, hyp, sentence.length(), null);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index ed1d577..5692d1a 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -37,9 +37,15 @@ import org.apache.joshua.decoder.ff.tm.Rule;
public class TargetPhrases extends ArrayList<Rule> {
private static final long serialVersionUID = 1L;
+
+ public int i = -2;
+ public int j = -2;
- public TargetPhrases() {
+ public TargetPhrases(int i, int j) {
super();
+
+ this.i = i;
+ this.j = j;
}
/**
[11/15] incubator-joshua git commit: fixed distortion computation to
work with new format
Posted by mj...@apache.org.
fixed distortion computation to work with new format
code now produces a translation on my test case, though it's not the correct one
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5719c8cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5719c8cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5719c8cf
Branch: refs/heads/JOSHUA-284
Commit: 5719c8cff728499bffd1053462351340f1d91353
Parents: 9b73d61
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:17:21 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:17:21 2016 -0500
----------------------------------------------------------------------
.../org/apache/joshua/decoder/ff/phrase/Distortion.java | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5719c8cf/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index f9e6a29..072162b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -56,12 +56,14 @@ public class Distortion extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE) {
- int start_point = j - rule.getFrench().length + rule.getArity();
+ if (rule == Hypothesis.MONO_RULE || rule == Hypothesis.SWAP_RULE) {
+// int start_point = j - rule.getFrench().length + rule.getArity();
+// int jump_size = Math.abs(tailNodes.get(0).j - start_point);
- int jump_size = Math.abs(tailNodes.get(0).j - start_point);
-// acc.add(name, -jump_size);
- acc.add(denseFeatureIndex, -jump_size);
+ int start_point = tailNodes.get(rule == Hypothesis.MONO_RULE ? 0 : 1).j;
+ int jump_size = Math.abs(j - start_point);
+
+ acc.add(denseFeatureIndex, -jump_size);
}
// System.err.println(String.format("DISTORTION(%d, %d) from %d = %d", i, j, tailNodes != null ? tailNodes.get(0).j : -1, jump_size));
[13/15] incubator-joshua git commit: off-by-one error in computing
future estimates
Posted by mj...@apache.org.
off-by-one error in computing future estimates
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/473b3016
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/473b3016
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/473b3016
Branch: refs/heads/JOSHUA-284
Commit: 473b3016562677671f70a19cd15d67a2bc1a5c83
Parents: eb00223
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:44:14 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:44:14 2016 -0500
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/phrase/Future.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/473b3016/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index c411ccb..b1bdcc8 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -68,7 +68,7 @@ public class Future {
// All the phrases are in, now do minimum dynamic programming. Lengths 0 and 1 were already handled above.
for (int length = 2; length <= chart.SentenceLength(); length++) {
- for (int begin = 1; begin < chart.SentenceLength() - length; begin++) {
+ for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
for (int division = begin + 1; division < begin + length; division++) {
setEntry(begin, begin + length, Math.max(getEntry(begin, begin + length), getEntry(begin, division) + getEntry(division, begin + length)));
}
[15/15] incubator-joshua git commit: fixed computation of distortion
Posted by mj...@apache.org.
fixed computation of distortion
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/16d5647b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/16d5647b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/16d5647b
Branch: refs/heads/JOSHUA-284
Commit: 16d5647bee30345ffa56b5b7d5bebc1021afa3fa
Parents: 574cb36
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:45:12 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:45:12 2016 -0500
----------------------------------------------------------------------
.../joshua/decoder/ff/phrase/Distortion.java | 23 +++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/16d5647b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index 072162b..d4b49db 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -60,14 +60,27 @@ public class Distortion extends StatelessFF {
// int start_point = j - rule.getFrench().length + rule.getArity();
// int jump_size = Math.abs(tailNodes.get(0).j - start_point);
- int start_point = tailNodes.get(rule == Hypothesis.MONO_RULE ? 0 : 1).j;
- int jump_size = Math.abs(j - start_point);
+ if (rule == Hypothesis.MONO_RULE) {
+ int start_point = j - tailNodes.get(1).getHyperEdges().get(0).getRule().getFrench().length;
+ int last_point = tailNodes.get(0).j;
+ int jump_size = Math.abs(start_point - last_point);
+
+// System.err.println(String.format("DISTORTION_mono(%d -> %d) = %d",
+// last_point, start_point, jump_size));
- acc.add(denseFeatureIndex, -jump_size);
+ acc.add(denseFeatureIndex, -jump_size);
+ } else {
+ int start_point = j - tailNodes.get(0).getHyperEdges().get(0).getRule().getFrench().length;
+ int last_point = tailNodes.get(1).j;
+ int jump_size = Math.abs(start_point - last_point);
+
+// System.err.println(String.format("DISTORTION_swap(%d -> %d) = %d",
+// last_point, start_point, jump_size));
+
+ acc.add(denseFeatureIndex, -jump_size);
+ }
}
-// System.err.println(String.format("DISTORTION(%d, %d) from %d = %d", i, j, tailNodes != null ? tailNodes.get(0).j : -1, jump_size));
-
return null;
}
}
[05/15] incubator-joshua git commit: enabled test
Posted by mj...@apache.org.
enabled test
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1ec6271
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1ec6271
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1ec6271
Branch: refs/heads/JOSHUA-284
Commit: b1ec62711d15f3b692b6a7026752123f75522f6e
Parents: dac822d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:24:07 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:24:07 2016 -0400
----------------------------------------------------------------------
.../apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1ec6271/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index f2fc6a7..9e9a415 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -60,7 +60,7 @@ public class PhraseDecodingTest {
decoder = null;
}
- @Test(enabled = false)
+ @Test(enabled = true)
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
final String translation = decode(INPUT).toString();
final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
@@ -69,6 +69,7 @@ public class PhraseDecodingTest {
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ joshuaConfig.setVerbosity(2);
return decoder.decode(sentence);
}
[04/15] incubator-joshua git commit: missed file in commit
Posted by mj...@apache.org.
missed file in commit
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dac822d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dac822d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dac822d1
Branch: refs/heads/JOSHUA-284
Commit: dac822d15145614c33f5fb12d2797e1f91825bb3
Parents: 48a9aad
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:23:57 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:23:57 2016 -0400
----------------------------------------------------------------------
.../apache/joshua/decoder/phrase/Candidate.java | 55 ++++++++++++++------
1 file changed, 40 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dac822d1/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 2abe560..303749d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -67,14 +67,17 @@ public class Candidate {
// the reordering rule used by an instantiated Candidate
private Rule rule;
- // the HGNode built over the current target side phrase
- private HGNode phraseNode;
-
- // the cost of the current configuration
+ /*
+ * Stores the inside cost of the current phrase, as well as the computed dynamic programming
+ * state. Expensive to compute so there is an option of delaying it.
+ */
private ComputeNodeResult computedResult;
- // scoring and state information
- private ComputeNodeResult result;
+ /*
+ * This is the HGNode built over the current target side phrase. It requires the computed results
+ * as part of its constructor, so we delay computing it unless needed.
+ */
+ private HGNode phraseNode;
/**
* When candidate objects are extended, the new one is initialized with the same underlying
@@ -121,6 +124,8 @@ public class Candidate {
public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence,
List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+ this.featureFunctions = featureFunctions;
+ this.sentence = sentence;
this.hypotheses = hypotheses;
this.phrases = phrases;
this.span = span;
@@ -128,7 +133,11 @@ public class Candidate {
this.ranks = ranks;
this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
// this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
- this.phraseNode = null;
+
+ // TODO: compute this proactively or lazily according to a parameter
+ getResult();
+// this.phraseNode = null;
+// this.computedResult = null;
}
/**
@@ -203,10 +212,24 @@ public class Candidate {
* @return a new hypergraph node representing the phrase translation
*/
public HGNode getPhraseNode() {
- ComputeNodeResult result = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
- phraseNode = new HGNode(-1, span.end, rule.getLHS(), result.getDPStates(), null, result.getPruningEstimate());
+ getResult();
return phraseNode;
}
+
+ /**
+ * Ensures that the cost of applying the edge has been computed. This is tucked away in an
+ * accessor so that we can do it lazily if we wish.
+ *
+ * @return
+ */
+ public ComputeNodeResult getResult() {
+ if (computedResult == null) {
+ computedResult = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
+ phraseNode = new HGNode(-1, span.end, rule.getLHS(), computedResult.getDPStates(), null, computedResult.getPruningEstimate());
+ }
+
+ return computedResult;
+ }
/**
* This returns the rule being applied (straight or inverted)
@@ -246,10 +269,6 @@ public class Candidate {
return cov;
}
- public ComputeNodeResult getResult() {
- return computedResult;
- }
-
/**
* This returns the sum of two costs: the HypoState cost + the transition cost. The HypoState cost
* is in turn the sum of two costs: the Viterbi cost of the underlying hypothesis, and the adjustment
@@ -263,7 +282,13 @@ public class Candidate {
* @return the sum of two costs: the HypoState cost + the transition cost
*/
public float score() {
- return getHypothesis().getScore() + future_delta + result.getTransitionCost();
+ float score = getHypothesis().getScore() + future_delta;
+ /*
+ * TODO: you can add this if it's been computed.
+ */
+ if (computedResult != null)
+ score += computedResult.getTransitionCost();
+ return score;
}
public float getFutureEstimate() {
@@ -271,6 +296,6 @@ public class Candidate {
}
public List<DPState> getStates() {
- return result.getDPStates();
+ return getResult().getDPStates();
}
}
[02/15] incubator-joshua git commit: updated scripts to work with the
new format
Posted by mj...@apache.org.
updated scripts to work with the new format
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32504c47
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32504c47
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32504c47
Branch: refs/heads/JOSHUA-284
Commit: 32504c47bbc90b3fd4a8d02298b9758fa8126a44
Parents: dcc7e7e
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:13:50 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:13:50 2016 -0400
----------------------------------------------------------------------
scripts/support/phrase2hiero.py | 22 ++++------------------
scripts/training/pipeline.pl | 8 ++------
2 files changed, 6 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32504c47/scripts/support/phrase2hiero.py
----------------------------------------------------------------------
diff --git a/scripts/support/phrase2hiero.py b/scripts/support/phrase2hiero.py
index e741564..581a823 100755
--- a/scripts/support/phrase2hiero.py
+++ b/scripts/support/phrase2hiero.py
@@ -1,13 +1,10 @@
#!/usr/bin/python
"""
-Prepends nonterminals to source and target side of phrase rules, and also
-increments the alignment points (if present) to match.
-This allows them to be used in the phrase-based decoder.
+Converts a Moses phrase table to a Joshua phrase table. The differences are
+(a) adding an LHS and (b) applying -log() to all the model weights.
-Usage: gzip -cd grammar.gz | phrase2hiero.py [-moses] | gzip -9n > grammar.new.gz
-
-If you specify "-moses", it will also apply -log() to each of the model weights.
+Usage: gzip -cd grammar.gz | phrase2hiero.py | gzip -9n > grammar.new.gz
Author: Matt Post <po...@cs.jhu.edu>
Date: June 2016
@@ -16,7 +13,6 @@ Date: June 2016
import sys
import math
import codecs
-import argparse
reload(sys)
sys.setdefaultencoding('utf-8')
@@ -24,11 +20,6 @@ sys.stdin = codecs.getreader('utf-8')(sys.stdin)
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
sys.stdout.encoding = 'utf-8'
-def incr(alignment):
- """Takes an alignment point (0-1) and increments both sides"""
- points = alignment.split('-')
- return '%d-%d' % (int(points[0]) + 1, int(points[1]) + 1)
-
def maybelog(value):
"""Takes a feature value and returns -log(x) if it is a scalar"""
try:
@@ -46,14 +37,9 @@ for line in sys.stdin:
# Get all the fields
tokens = line.split(r' ||| ')
- tokens[1] = '[X,1] ' + tokens[1]
- tokens[2] = '[X,1] ' + tokens[2]
# take the -log() of each input token
if moses and len(tokens) >= 4:
tokens[3] = ' '.join(map(maybelog, tokens[3].split(' ')))
- if len(tokens) >= 5:
- tokens[4] = ' '.join(map(incr, tokens[4].split(' ')))
-
- print ' ||| '.join(tokens)
+ print ' ||| '.join(tokens),
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32504c47/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index ea617bc..08933ec 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -1123,7 +1123,7 @@ if (! defined $GRAMMAR_FILE) {
# Convert the model to Joshua format
$cachepipe->cmd("convert-moses-to-joshua",
- "$CAT model/phrase-table.gz | $SCRIPTDIR/support/phrase2hiero.py | gzip -9n > grammar.gz",
+ "$CAT model/phrase-table.gz | $SCRIPTDIR/support/phrase2hiero.py -moses | gzip -9n > grammar.gz",
"model/phrase-table.gz",
"grammar.gz",
);
@@ -1165,10 +1165,6 @@ if (! defined $GRAMMAR_FILE) {
$GRAMMAR_FILE = "grammar.gz";
- # Convert phrase model to hiero format (Thrax should do this!)
- if ($GRAMMAR_TYPE eq "phrase") {
- system("mv grammar.gz grammar.tmp.gz; gzip -cd grammar.tmp.gz | $SCRIPTDIR/support/phrase2hiero.py | gzip -9n > grammar.gz; rm -rf grammar.tmp.gz");
- }
} else {
print STDERR "* FATAL: There was no way to build a grammar, and none was passed in\n";
@@ -1181,7 +1177,7 @@ if (! defined $GRAMMAR_FILE) {
}
# Pack the entire model! Saves filtering and repacking of tuning and test sets
-if ($DO_PACK_GRAMMARS and ! $DO_FILTER_TM) {
+if ($DO_PACK_GRAMMARS and ! $DO_FILTER_TM and ! -e "grammar.packed") {
$cachepipe->cmd("pack-grammar",
"$SCRIPTDIR/support/grammar-packer.pl -a -T $TMPDIR -m $PACKER_MEM -g $GRAMMAR_FILE -o $RUNDIR/grammar.packed",
"$RUNDIR/grammar.packed/vocabulary",
[10/15] incubator-joshua git commit: build two nodes over terminal
productions
Posted by mj...@apache.org.
build two nodes over terminal productions
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9b73d614
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9b73d614
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9b73d614
Branch: refs/heads/JOSHUA-284
Commit: 9b73d6147a61580058cc57c86c1f623f44b7452a
Parents: af4ef88
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:16:47 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:16:47 2016 -0500
----------------------------------------------------------------------
.../apache/joshua/decoder/phrase/Candidate.java | 37 ++++++++++++++------
.../joshua/decoder/phrase/Hypothesis.java | 4 +--
.../org/apache/joshua/decoder/phrase/Stack.java | 8 ++---
3 files changed, 33 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 303749d..bd581e3 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -42,6 +42,7 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.hypergraph.HyperEdge;
import org.apache.joshua.decoder.segment_file.Sentence;
public class Candidate {
@@ -119,7 +120,7 @@ public class Candidate {
public String toString() {
return String.format("CANDIDATE(hyp %d/%d, phr %d/%d) [%s] phrase=[%s] span=%s",
ranks[0], hypotheses.size(), ranks[1], phrases.size(),
- getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] ",""), getSpan());
+ getHypothesis(), getPhraseNode(), getSpan());
}
public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence,
@@ -131,13 +132,13 @@ public class Candidate {
this.span = span;
this.future_delta = delta;
this.ranks = ranks;
- this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
+ this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.SWAP_RULE;
// this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
-
+
// TODO: compute this proactively or lazily according to a parameter
- getResult();
+ computeResult();
// this.phraseNode = null;
-// this.computedResult = null;
+// this.computedResult = null;
}
/**
@@ -206,13 +207,23 @@ public class Candidate {
}
/**
+ * A candidate is a (hypothesis, target phrase) pairing. The hypothesis and target phrase are
+ * drawn from a list that is indexed by (ranks[0], ranks[1]), respectively. This is a shortcut
+ * to return the rule representing the terminal phrase production of the candidate pair.
+ *
+ * @return the phrase rule at position ranks[1]
+ */
+ public Rule getPhraseRule() {
+ return this.phrases.get(ranks[1]);
+ }
+
+ /**
* This returns a new Hypothesis (HGNode) representing the phrase being added, i.e., a terminal
* production in the hypergraph. The score and DP state are computed only here on demand.
*
* @return a new hypergraph node representing the phrase translation
*/
public HGNode getPhraseNode() {
- getResult();
return phraseNode;
}
@@ -222,10 +233,16 @@ public class Candidate {
*
* @return
*/
- public ComputeNodeResult getResult() {
+ public ComputeNodeResult computeResult() {
if (computedResult == null) {
- computedResult = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
- phraseNode = new HGNode(-1, span.end, rule.getLHS(), computedResult.getDPStates(), null, computedResult.getPruningEstimate());
+ // add the phrase node
+ ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, span.start, span.end, null, sentence);
+ HyperEdge edge = new HyperEdge(getPhraseRule(), phraseResult.getViterbiCost(), phraseResult.getTransitionCost(), null, null);
+ phraseNode = new HGNode(-1, span.end, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
+
+ // add the rule
+ // TODO: sourcepath
+ computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), -1, span.end, null, sentence);
}
return computedResult;
@@ -296,6 +313,6 @@ public class Candidate {
}
public List<DPState> getStates() {
- return getResult().getDPStates();
+ return computeResult().getDPStates();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 904634d..132d62d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -71,8 +71,8 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
public Hypothesis(Candidate cand) {
// TODO: sourcepath
super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(),
- new HyperEdge(cand.getRule(), cand.getResult().getViterbiCost(),
- cand.getResult().getTransitionCost(),
+ new HyperEdge(cand.getRule(), cand.computeResult().getViterbiCost(),
+ cand.computeResult().getTransitionCost(),
cand.getTailNodes(), null), cand.score());
this.coverage = cand.getCoverage();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index ad24a51..3b8a976 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -215,12 +215,12 @@ public class Stack extends ArrayList<Hypothesis> {
if (LOG.isDebugEnabled()) {
LOG.debug("{} from ( ... {} )", taskName, complete.getHypothesis().getRule().getEnglishWords());
- LOG.debug(" base score {}", complete.getResult().getBaseCost());
+ LOG.debug(" base score {}", complete.computeResult().getBaseCost());
LOG.debug(" covering {}-{}", complete.getSpan().start - 1, complete.getSpan().end - 2);
- LOG.debug(" translated as: {}", complete.getRule().getEnglishWords());
+ LOG.debug(" translated as: {}", complete.getPhraseRule().getEnglishWords());
LOG.debug(" score {} + future cost {} = {}",
- complete.getResult().getTransitionCost(), complete.getFutureEstimate(),
- complete.getResult().getTransitionCost() + complete.getFutureEstimate());
+ complete.computeResult().getTransitionCost(), complete.getFutureEstimate(),
+ complete.computeResult().getTransitionCost() + complete.getFutureEstimate());
}
}
}
[14/15] incubator-joshua git commit: bugfix: this is (probably)
supposed to return the pruning estimate
Posted by mj...@apache.org.
bugfix: this is (probably) supposed to return the pruning estimate
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/574cb36b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/574cb36b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/574cb36b
Branch: refs/heads/JOSHUA-284
Commit: 574cb36b5e1b610e37eda81d6d76b4318c141a4c
Parents: 473b301
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:44:44 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:44:44 2016 -0500
----------------------------------------------------------------------
.../org/apache/joshua/decoder/phrase/Candidate.java | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/574cb36b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index cb9cd6d..9c7b3d1 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -296,12 +296,12 @@ public class Candidate {
* @return the sum of two costs: the HypoState cost + the transition cost
*/
public float score() {
- float score = getHypothesis().getScore() + future_delta;
- /*
- * TODO: you can add this if it's been computed.
- */
- if (computedResult != null)
- score += computedResult.getTransitionCost();
+ float score = computedResult.getPruningEstimate();
+
+// float score = getHypothesis().getScore() + future_delta;
+// if (computedResult != null)
+// score += computedResult.getTransitionCost();
+
return score;
}
[09/15] incubator-joshua git commit: added derived directories
Posted by mj...@apache.org.
added derived directories
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/af4ef88d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/af4ef88d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/af4ef88d
Branch: refs/heads/JOSHUA-284
Commit: af4ef88d5a6a6a1cc4167ec421b4b6bd1a91dc0a
Parents: 048b2e3
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:15:36 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:15:36 2016 -0500
----------------------------------------------------------------------
.gitignore | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/af4ef88d/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 0d42974..8e03044 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,3 +58,5 @@ build
/target/
.project
/doc/
+/ext.bak/
+/ext/
[03/15] incubator-joshua git commit: repacked the grammar
Posted by mj...@apache.org.
repacked the grammar
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/48a9aad7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/48a9aad7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/48a9aad7
Branch: refs/heads/JOSHUA-284
Commit: 48a9aad7873b969230aad90d6e0c61e13ae2d2b4
Parents: 32504c4
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:14:15 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:14:15 2016 -0400
----------------------------------------------------------------------
.../decoder/phrase/decode/rules.packed/config | 4 ++--
.../decode/rules.packed/slice_00000.features | Bin 4128858 -> 4128858 bytes
.../decode/rules.packed/slice_00000.source | Bin 1982244 -> 1982228 bytes
.../decode/rules.packed/slice_00000.target | Bin 2652936 -> 1463856 bytes
.../rules.packed/slice_00000.target.lookup | Bin 32 -> 28 bytes
.../phrase/decode/rules.packed/vocabulary | Bin 169236 -> 169225 bytes
6 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/config b/src/test/resources/decoder/phrase/decode/rules.packed/config
index 7bdb804..2251fe6 100644
--- a/src/test/resources/decoder/phrase/decode/rules.packed/config
+++ b/src/test/resources/decoder/phrase/decode/rules.packed/config
@@ -1,2 +1,2 @@
-version = 3
-max-source-len = 4
+version = 4
+max-source-len = 3
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
index c4127ff..27fa07d 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
index 83d47dc..cdc98f6 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
index 8094eef..fa82c0d 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
index 1c6db18..3e8c294 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
index e9b0900..ff62042 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary and b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary differ
[06/15] incubator-joshua git commit: temporary commenting-out of very
verbose output
Posted by mj...@apache.org.
temporary commenting-out of very verbose output
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1022699c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1022699c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1022699c
Branch: refs/heads/JOSHUA-284
Commit: 1022699cc744fa9fbc21f4b19122f51e3985a371
Parents: b1ec627
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:24:46 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:24:46 2016 -0400
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1022699c/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 15fbec1..c11d46a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -380,10 +380,10 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
if (this.estimatedCost <= Float.NEGATIVE_INFINITY) {
this.estimatedCost = 0.0f; // weights.innerProduct(computeFeatures());
- LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
+// LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
for (FeatureFunction ff : models) {
float val = ff.estimateCost(this, null);
- LOG.debug(" FEATURE {} -> {}", ff.getName(), val);
+// LOG.debug(" FEATURE {} -> {}", ff.getName(), val);
this.estimatedCost += val;
}
}
[08/15] incubator-joshua git commit: removed RHS nonterminal
Posted by mj...@apache.org.
removed RHS nonterminal
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/048b2e30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/048b2e30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/048b2e30
Branch: refs/heads/JOSHUA-284
Commit: 048b2e30f849de3f1ac82e6017ea2aab299f6b8d
Parents: 2e746c1
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:15:18 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:15:18 2016 -0500
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/048b2e30/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index f87b728..904634d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -43,9 +43,9 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
private Coverage coverage;
public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> ||| ||| 0-0");
- public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| ||| 0-0 1-1");
- public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| ||| 0-0 1-1");
- public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] ||| ||| 0-1 1-0");
+ public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| </s> ||| </s> ||| ||| 0-0");
+ public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| ||| 0-0 1-1");
+ public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] ||| ||| 0-1 1-0");
public String toString() {
StringBuffer sb = new StringBuffer();