You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/13 00:25:18 UTC

[1/3] incubator-joshua git commit: Fixed rule sorting for phrase-based decoding

Repository: incubator-joshua
Updated Branches:
  refs/heads/7 f0883156c -> 55caf2442


Fixed rule sorting for phrase-based decoding

The rules were sorted backwards due to an earlier fix that got rid of Joshua's
negating of phrase-table variables. The effect was that all values were computed
correctly, but the decoder retained the bottom config.num_translation_options rules
for each source, instead of the top. This doesn't affect Hiero decoding because
those rule lists are not truncated (note: they should be).

(This raises an idea: decoding could be made a lot faster if we sorted rules
using only stateless feature functions, and then trimmed the list to the top
config.num_translation_options items. This would add some loss of quality, likely
negligible, and would save language model costs on lots of items that are unlikely
to be used.)


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d48cd56a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d48cd56a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d48cd56a

Branch: refs/heads/7
Commit: d48cd56ac721cd9dfad8b9f58a557135caf99ebc
Parents: f088315
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Sep 12 23:07:17 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Sep 12 23:07:17 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/ff/tm/BasicRuleCollection.java    |  3 ++-
 .../java/org/apache/joshua/decoder/ff/tm/Rule.java   | 15 +++------------
 2 files changed, 5 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d48cd56a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
index 4d577dc..848791b 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
@@ -88,7 +88,8 @@ public class BasicRuleCollection implements RuleCollection {
       for (Rule rule: getRules())
         rule.estimateRuleCost(models);
 
-      Collections.sort(rules, Rule.EstimatedCostComparator);
+      // Sort using natural order
+      Collections.sort(rules);
       this.sorted = true;      
     }
     

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d48cd56a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 601dde7..1e998b6 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -189,6 +189,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
         result += val; 
       }
       this.estimatedCost = result;
+      LOG.debug("  -> " + this.estimatedCost);
     }
 
     return estimatedCost;
@@ -383,21 +384,11 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
     return getPattern().matcher(sentence.fullSource()).find();
   }
 
-  /**
-   * This comparator is used for sorting the rules during cube pruning. An estimate of the cost
-   * of each rule is computed and used to sort. 
-   */
-  public static Comparator<Rule> EstimatedCostComparator = new Comparator<Rule>() {
-    public int compare(Rule rule1, Rule rule2) {
-      return Float.compare(rule1.getEstimatedCost(),  rule2.getEstimatedCost());
-    }
-  };
-  
   public int compare(Rule rule1, Rule rule2) {
-    return EstimatedCostComparator.compare(rule1, rule2);
+    return Float.compare(rule2.getEstimatedCost(), rule1.getEstimatedCost());
   }
 
   public int compareTo(Rule other) {
-    return EstimatedCostComparator.compare(this, other);
+    return compare(this, other);
   }
 }


[3/3] incubator-joshua git commit: updated test case to new format

Posted by mj...@apache.org.
updated test case to new format


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/55caf244
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/55caf244
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/55caf244

Branch: refs/heads/7
Commit: 55caf2442e47e6acd0b164f268df1eff9b2819e0
Parents: 8193dc8
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Sep 13 02:25:10 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Sep 13 02:25:10 2016 +0200

----------------------------------------------------------------------
 bin/debug-joshua                                               | 2 +-
 .../joshua/decoder/phrase/decode/PhraseDecodingTest.java       | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/55caf244/bin/debug-joshua
----------------------------------------------------------------------
diff --git a/bin/debug-joshua b/bin/debug-joshua
index 0fd76f4..b453b22 100755
--- a/bin/debug-joshua
+++ b/bin/debug-joshua
@@ -44,5 +44,5 @@ JAR_PATH=$(ls -t $JOSHUA/target/joshua-*-jar-with-dependencies.jar | head -n1)
 exec java -Xmx${mem} \
 	-Dfile.encoding=utf8 \
 	-Djava.library.path=$JOSHUA/lib \
-	-cp $JOSHUA/target/classes:$JAR_PATH \
+	-cp $JOSHUA/joshua-core/target/classes:$JAR_PATH \
 	org.apache.joshua.decoder.JoshuaDecoder -v 2 "$@"

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/55caf244/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 66515de..b17799f 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -38,8 +38,10 @@ public class PhraseDecodingTest {
 
   private static final String CONFIG = "src/test/resources/phrase_decoder/config";
   private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
-  private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
-  private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
+
+
+  private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| Distortion=0.000000 WordPenalty=-3.040061 PhrasePenalty=5.000000 pt_0=-9.702445 pt_1=-10.799793 pt_2=-7.542729 pt_3=-8.555386 lm_0=-19.116861 ||| -7.496";
+  private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| Distortion=0.000000 WordPenalty=-3.040061 PhrasePenalty=5.000000 pt_0=-9.702445 pt_1=-10.799793 pt_2=-7.542729 pt_3=-8.555386 lm_0=-19.116861 ||| -7.496";
   
   private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;


[2/3] incubator-joshua git commit: removed explicit comparator in favor of natural ordering

Posted by mj...@apache.org.
removed explicit comparator in favor of natural ordering


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/8193dc81
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/8193dc81
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/8193dc81

Branch: refs/heads/7
Commit: 8193dc810b586b44db85e1ceaa6a195f3e7cf7ee
Parents: d48cd56
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Sep 13 02:24:56 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Sep 13 02:24:56 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/chart_parser/Cell.java       |  3 ++-
 .../joshua/decoder/hypergraph/HGNode.java       | 27 +++++---------------
 .../joshua/decoder/phrase/PhraseNodes.java      |  9 ++-----
 .../apache/joshua/decoder/phrase/Stacks.java    |  4 +--
 4 files changed, 12 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
index cfcd06b..a771bec 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
@@ -21,6 +21,7 @@ package org.apache.joshua.decoder.chart_parser;
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
@@ -268,7 +269,7 @@ class Cell {
       }
 
       // sort the node in an decreasing-LogP order 
-      this.sortedNodes.sort(HGNode.inverseLogPComparator);
+      Collections.sort(this.sortedNodes);
 
       // TODO: we cannot create new SuperItem here because the DotItem link to them.
       // Thus, we clear nodes from existing SuperNodes

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
index c353a36..a7e112e 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
@@ -34,7 +34,7 @@ import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 
 // TODO: handle the case that the Hypergraph only maintains the one-best tree
 
-public class HGNode {
+public class HGNode implements Comparable<HGNode> {
 
   public final int i;
   public final int j;
@@ -241,16 +241,13 @@ public class HGNode {
     }
   }
 
-  /*
-   * this will called by the sorting in Cell.ensureSorted()
+  /**
+   * Sorts HGNodes by their score, in descending order.
+   *
+   * @return -1, 0, or 1 if this item is greater than, equal to, or less than the other HGNode
    */
-  // sort by estTotalLogP: for pruning purpose
   public int compareTo(HGNode anotherItem) {
-    throw new RuntimeException("HGNode.compareTo(HGNode) is not implemented");
-    /*
-     * if (this.estTotalLogP > anotherItem.estTotalLogP) { return -1; } else if (this.estTotalLogP
-     * == anotherItem.estTotalLogP) { return 0; } else { return 1; }
-     */
+    return Float.compare(anotherItem.getScore(), getScore());
   }
 
   /**
@@ -272,18 +269,6 @@ public class HGNode {
     }
   };
 
-  public static final Comparator<HGNode> inverseLogPComparator = (item1, item2) -> {
-    float logp1 = item1.score;
-    float logp2 = item2.score;
-    if (logp1 > logp2) {
-      return -1;
-    } else if (logp1 == logp2) {
-      return 0;
-    } else {
-      return 1;
-    }
-  };
-
   public String toString() {
     StringBuilder sb = new StringBuilder();
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
index c690dc3..6ed5534 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/PhraseNodes.java
@@ -45,14 +45,9 @@ public class PhraseNodes extends ArrayList<HGNode> {
   }
   
   /**
-   * Score the rules and sort them. Scoring is necessary 
-   * because rules are only scored if they are used, in an 
-   * effort to make reading in rules more efficient. 
-   * This is starting to create some trouble and should 
-   * probably be reworked.
+   * Called after all the nodes have been added via add(). Sorts them using their natural order.
    */
   public void finish() {
-    Collections.sort(this, HGNode.inverseLogPComparator);    
+    Collections.sort(this);
   }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8193dc81/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index d02ad1a..230ed09 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -216,8 +216,8 @@ public class Stacks {
   }
     
   /**
-   * Enforces reordering constraints. Our version of Moses' ReorderingConstraint::Check() and
-   * SearchCubePruning::CheckDistortion(). 
+   * Enforces reordering constraints. Our version of Moses' ReorderingConstraint.Check() and
+   * SearchCubePruning.CheckDistortion(). 
    * 
    * @param coverage
    * @param begin