You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/03 21:01:44 UTC

[13/14] incubator-joshua git commit: bugfix in iterator, moved k-best logic to constructor

bugfix in iterator, moved k-best logic to constructor


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/29d9197b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/29d9197b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/29d9197b

Branch: refs/heads/joshua_api
Commit: 29d9197ba3877d02351c677d75b7ad227a0ff1d4
Parents: 46a8c87
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 17:34:56 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 17:34:56 2016 -0400

----------------------------------------------------------------------
 src/joshua/decoder/Decoder.java                 | 35 ++++++++++----------
 .../decoder/hypergraph/KBestExtractor.java      | 32 ++++++++++--------
 2 files changed, 35 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/29d9197b/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index 652f25d..4175e36 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -465,7 +465,7 @@ public class Decoder {
       Sentence sentence = hg.sentence;
       
       if (config.input_type == INPUT_TYPE.json || config.server_type == SERVER_TYPE.HTTP) {
-        KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
+        KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
         JSONMessage message = JSONMessage.buildMessage(sentence, extractor, featureFunctions, config);
         out.write(message.toString().getBytes());
         
@@ -476,47 +476,45 @@ public class Decoder {
          * Moses expects the simple translation on STDOUT and the n-best list in a file with a fixed
          * format.
          */
-        String text;
+        String bestOutput;
         if (config.moses) {
-          KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
+          KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
           
           final String mosesFormat = "%i ||| %s ||| %f ||| %c"; 
           
-          int k = 1;
+          boolean firstPass = true;
           for (DerivationState derivation: extractor) {
-            if (k > config.topN || derivation == null)
-              break;
-            
 
             TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
             Translation translation = factory.formattedTranslation(mosesFormat).translation();
-            text = translation.getFormattedTranslation().replaceAll("=",  "= ");
+            String text = translation.getFormattedTranslation().replaceAll("=",  "= ");
             // Write the complete formatted string to STDOUT
             if (config.n_best_file != null)
               nbest_out.write(text + "\n");
-            
-            k++;
-          }
+
+            if (firstPass) {
+              bestOutput = translation.toString();
+              firstPass = false;
+            }
+          } 
         }
 
-        KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
-        int k = 1;
+        KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
+        boolean firstPass = true;
         for (DerivationState derivation: extractor) {
-          if (k > config.topN || derivation == null)
-            break;
 
           Translation t = new TranslationBuilder(sentence, derivation, featureFunctions, config)
               .formattedTranslation(config.outputFormat)
               .translation();
           
-          if (k == 1)
+          if (firstPass) {
             Decoder.LOG(1, String.format("Translation %d: %.3f %s", sentence.id(), t.score(), t.toString()));
+            firstPass = false;
+          }
 
           String bestString = t.getFormattedTranslation();
           out.write(bestString.getBytes());
           out.write("\n".getBytes());
-          
-          k++;
         }
       }
       out.flush();
@@ -532,6 +530,7 @@ public class Decoder {
           break;
         }
       }
+
     }
     
     if (config.n_best_file != null)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/29d9197b/src/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index 4d40639..8997790 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -114,6 +114,7 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
   private HyperGraph hyperGraph;
   private DerivationState nextDerivation = null;
   private int derivationCounter;
+  private int maxDerivations;
 
   public KBestExtractor(
       Sentence sentence,
@@ -121,7 +122,8 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
       List<FeatureFunction> featureFunctions,
       FeatureVector weights,
       boolean isMonolingual,
-      JoshuaConfiguration joshuaConfiguration) {
+      JoshuaConfiguration joshuaConfiguration,
+      int k) {
 
     this.featureFunctions = featureFunctions;
     this.hyperGraph = hyperGraph;
@@ -134,7 +136,8 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
     
     // initialize the iterator
     this.derivationCounter = 0;
-    this.nextDerivation = getViterbiDerivation();
+    this.nextDerivation = null;
+    this.maxDerivations = k;
   }
 
   /**
@@ -146,12 +149,11 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
    */
   public DerivationState getViterbiDerivation() {
     
-    /* TODO: this is just a short-cut to get this working. Instead of triggering the k-best extraction,
-     * it would be better to have a shortcut function that can construction a {@link DerivationState object}
-     * from the hypergraph directly, which would be a lot cheaper.
+    /* TODO: the viterbi derivation is often needed, but triggering all the k-best mechanisms
+     * to extract it is expensive. There should be a way to get the 1-best DerivationState object
+     * very quickly, so that we can fit it into this framework. 
      */
-    hasNext();
-    return this.nextDerivation;
+    throw new RuntimeException("Not yet implemented! We need a fast way to get the Viterbi DerivationState!");
   }
 
   
@@ -649,14 +651,16 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
 
   @Override
   public boolean hasNext() {
-    if (this.nextDerivation != null)
-      return true;
-
-    derivationCounter++;
+    if (this.nextDerivation == null) {
+      this.derivationCounter++;
+      if (this.derivationCounter <= this.maxDerivations) {
+        VirtualNode virtualNode = getVirtualNode(hyperGraph.goalNode);
+        this.nextDerivation = virtualNode.lazyKBestExtractOnNode(this, derivationCounter);
+      }
+      return this.nextDerivation != null;
+    }
     
-    VirtualNode virtualNode = getVirtualNode(hyperGraph.goalNode);
-    this.nextDerivation = virtualNode.lazyKBestExtractOnNode(this, derivationCounter);
-    return this.nextDerivation != null;
+    return true;
   }
 
   @Override