You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/13 00:25:18 UTC
[1/3] incubator-joshua git commit: Fixed rule sorting for
phrase-based decoding
Repository: incubator-joshua
Updated Branches:
refs/heads/7 f0883156c -> 55caf2442
Fixed rule sorting for phrase-based decoding
The rules were sorted backwards due to an earlier fix that got rid of Joshua's
negating of phrase-table variables. The effect was that all values were computed
correctly, but the decoder retained the bottom config.num_translation_options rules
for each source, instead of the top. This doesn't affect Hiero decoding because
those rule lists are not truncated (note: they should be).
(This raises an idea: decoding could be made a lot faster if we sorted rules
using only stateless feature functions, and then trimmed the list to the top
config.num_translation_options items. This would add some loss of quality, likely
negligible, and would save language model costs on lots of items that are unlikely
to be used.)
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d48cd56a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d48cd56a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d48cd56a
Branch: refs/heads/7
Commit: d48cd56ac721cd9dfad8b9f58a557135caf99ebc
Parents: f088315
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Sep 12 23:07:17 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Sep 12 23:07:17 2016 +0200
----------------------------------------------------------------------
.../joshua/decoder/ff/tm/BasicRuleCollection.java | 3 ++-
.../java/org/apache/joshua/decoder/ff/tm/Rule.java | 15 +++------------
2 files changed, 5 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d48cd56a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
index 4d577dc..848791b 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
@@ -88,7 +88,8 @@ public class BasicRuleCollection implements RuleCollection {
for (Rule rule: getRules())
rule.estimateRuleCost(models);
- Collections.sort(rules, Rule.EstimatedCostComparator);
+ // Sort using natural order
+ Collections.sort(rules);
this.sorted = true;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d48cd56a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 601dde7..1e998b6 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -189,6 +189,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
result += val;
}
this.estimatedCost = result;
+ LOG.debug(" -> " + this.estimatedCost);
}
return estimatedCost;
@@ -383,21 +384,11 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
return getPattern().matcher(sentence.fullSource()).find();
}
- /**
- * This comparator is used for sorting the rules during cube pruning. An estimate of the cost
- * of each rule is computed and used to sort.
- */
- public static Comparator<Rule> EstimatedCostComparator = new Comparator<Rule>() {
- public int compare(Rule rule1, Rule rule2) {
- return Float.compare(rule1.getEstimatedCost(), rule2.getEstimatedCost());
- }
- };
-
public int compare(Rule rule1, Rule rule2) {
- return EstimatedCostComparator.compare(rule1, rule2);
+ return Float.compare(rule2.getEstimatedCost(), rule1.getEstimatedCost());
}
public int compareTo(Rule other) {
- return EstimatedCostComparator.compare(this, other);
+ return compare(this, other);
}
}
[3/3] incubator-joshua git commit: updated test case to new format
Posted by mj...@apache.org.
updated test case to new format
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/55caf244
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/55caf244
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/55caf244
Branch: refs/heads/7
Commit: 55caf2442e47e6acd0b164f268df1eff9b2819e0
Parents: 8193dc8
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Sep 13 02:25:10 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Sep 13 02:25:10 2016 +0200
----------------------------------------------------------------------
bin/debug-joshua | 2 +-
.../joshua/decoder/phrase/decode/PhraseDecodingTest.java | 6 ++++--
2 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/55caf244/bin/debug-joshua
----------------------------------------------------------------------
diff --git a/bin/debug-joshua b/bin/debug-joshua
index 0fd76f4..b453b22 100755
--- a/bin/debug-joshua
+++ b/bin/debug-joshua
@@ -44,5 +44,5 @@ JAR_PATH=$(ls -t $JOSHUA/target/joshua-*-jar-with-dependencies.jar | head -n1)
exec java -Xmx${mem} \
-Dfile.encoding=utf8 \
-Djava.library.path=$JOSHUA/lib \
- -cp $JOSHUA/target/classes:$JAR_PATH \
+ -cp $JOSHUA/joshua-core/target/classes:$JAR_PATH \
org.apache.joshua.decoder.JoshuaDecoder -v 2 "$@"
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/55caf244/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 66515de..b17799f 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -38,8 +38,10 @@ public class PhraseDecodingTest {
private static final String CONFIG = "src/test/resources/phrase_decoder/config";
private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
- private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
- private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
+
+
+ private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| Distortion=0.000000 WordPenalty=-3.040061 PhrasePenalty=5.000000 pt_0=-9.702445 pt_1=-10.799793 pt_2=-7.542729 pt_3=-8.555386 lm_0=-19.116861 ||| -7.496";
+ private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| Distortion=0.000000 WordPenalty=-3.040061 PhrasePenalty=5.000000 pt_0=-9.702445 pt_1=-10.799793 pt_2=-7.542729 pt_3=-8.555386 lm_0=-19.116861 ||| -7.496";
private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
[2/3] incubator-joshua git commit: removed explicit comparator in
favor of natural ordering
Posted by mj...@apache.org.
removed explicit comparator in favor of natural ordering
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/8193dc81
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/8193dc81
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/8193dc81
Branch: refs/heads/7
Commit: 8193dc810b586b44db85e1ceaa6a195f3e7cf7ee
Parents: d48cd56
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Sep 13 02:24:56 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Sep 13 02:24:56 2016 +0200
----------------------------------------------------------------------
.../joshua/decoder/chart_parser/Cell.java | 3 ++-
.../joshua/decoder/hypergraph/HGNode.java | 27 +++++---------------
.../joshua/decoder/phrase/PhraseNodes.java | 9 ++-----
.../apache/joshua/decoder/phrase/Stacks.java | 4 +--
4 files changed, 12 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
index cfcd06b..a771bec 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
@@ -21,6 +21,7 @@ package org.apache.joshua.decoder.chart_parser;
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
@@ -268,7 +269,7 @@ class Cell {
}
// sort the node in an decreasing-LogP order
- this.sortedNodes.sort(HGNode.inverseLogPComparator);
+ Collections.sort(this.sortedNodes);
// TODO: we cannot create new SuperItem here because the DotItem link to them.
// Thus, we clear nodes from existing SuperNodes
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
index c353a36..a7e112e 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
@@ -34,7 +34,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.DPState;
// TODO: handle the case that the Hypergraph only maintains the one-best tree
-public class HGNode {
+public class HGNode implements Comparable<HGNode> {
public final int i;
public final int j;
@@ -241,16 +241,13 @@ public class HGNode {
}
}
- /*
- * this will called by the sorting in Cell.ensureSorted()
+ /**
+ * Sorts HGNodes by their score, in descending order.
+ *
+ * @return -1, 0, or 1 if this item is greater than, equal to, or less than the other HGNode
*/
- // sort by estTotalLogP: for pruning purpose
public int compareTo(HGNode anotherItem) {
- throw new RuntimeException("HGNode.compareTo(HGNode) is not implemented");
- /*
- * if (this.estTotalLogP > anotherItem.estTotalLogP) { return -1; } else if (this.estTotalLogP
- * == anotherItem.estTotalLogP) { return 0; } else { return 1; }
- */
+ return Float.compare(anotherItem.getScore(), getScore());
}
/**
@@ -272,18 +269,6 @@ public class HGNode {
}
};
- public static final Comparator<HGNode> inverseLogPComparator = (item1, item2) -> {
- float logp1 = item1.score;
- float logp2 = item2.score;
- if (logp1 > logp2) {
- return -1;
- } else if (logp1 == logp2) {
- return 0;
- } else {
- return 1;
- }
- };
-
public String toString() {
StringBuilder sb = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
index c690dc3..6ed5534 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
@@ -45,14 +45,9 @@ public class PhraseNodes extends ArrayList<HGNode> {
}
/**
- * Score the rules and sort them. Scoring is necessary
- * because rules are only scored if they are used, in an
- * effort to make reading in rules more efficient.
- * This is starting to create some trouble and should
- * probably be reworked.
+ * Called after all the nodes have been added via add(). Sorts them using their natural order.
*/
public void finish() {
- Collections.sort(this, HGNode.inverseLogPComparator);
+ Collections.sort(this);
}
-
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index d02ad1a..230ed09 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -216,8 +216,8 @@ public class Stacks {
}
/**
- * Enforces reordering constraints. Our version of Moses' ReorderingConstraint::Check() and
- * SearchCubePruning::CheckDistortion().
+ * Enforces reordering constraints. Our version of Moses' ReorderingConstraint.Check() and
+ * SearchCubePruning.CheckDistortion().
*
* @param coverage
* @param begin