You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/03 21:01:43 UTC

[12/14] incubator-joshua git commit: renaming factory to builder

renaming factory to builder


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/46a8c874
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/46a8c874
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/46a8c874

Branch: refs/heads/joshua_api
Commit: 46a8c87485a5e8ae00127499c13df341895daac0
Parents: 31c66f2
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 17:18:10 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 17:20:23 2016 -0400

----------------------------------------------------------------------
 src/joshua/decoder/Decoder.java            |   7 +-
 src/joshua/decoder/TranslationBuilder.java | 136 ++++++++++++++++++++++++
 src/joshua/decoder/TranslationFactory.java | 127 ----------------------
 src/joshua/decoder/io/JSONMessage.java     |   8 +-
 4 files changed, 145 insertions(+), 133 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index c3850a3..652f25d 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -466,7 +466,7 @@ public class Decoder {
       
       if (config.input_type == INPUT_TYPE.json || config.server_type == SERVER_TYPE.HTTP) {
         KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
-        JSONMessage message = JSONMessage.buildMessage(sentence, extractor, config);
+        JSONMessage message = JSONMessage.buildMessage(sentence, extractor, featureFunctions, config);
         out.write(message.toString().getBytes());
         
       } else {
@@ -487,7 +487,8 @@ public class Decoder {
             if (k > config.topN || derivation == null)
               break;
             
-            TranslationFactory factory = new TranslationFactory(sentence, derivation, config);
+
+            TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
             Translation translation = factory.formattedTranslation(mosesFormat).translation();
             text = translation.getFormattedTranslation().replaceAll("=",  "= ");
             // Write the complete formatted string to STDOUT
@@ -504,7 +505,7 @@ public class Decoder {
           if (k > config.topN || derivation == null)
             break;
 
-          Translation t = new TranslationFactory(sentence, derivation, config)
+          Translation t = new TranslationBuilder(sentence, derivation, featureFunctions, config)
               .formattedTranslation(config.outputFormat)
               .translation();
           

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/TranslationBuilder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/TranslationBuilder.java b/src/joshua/decoder/TranslationBuilder.java
new file mode 100644
index 0000000..0ac6737
--- /dev/null
+++ b/src/joshua/decoder/TranslationBuilder.java
@@ -0,0 +1,136 @@
+package joshua.decoder;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.ff.lm.StateMinimizingLanguageModel;
+import joshua.decoder.hypergraph.DerivationState;
+import joshua.decoder.hypergraph.KBestExtractor.Side;
+import joshua.decoder.io.DeNormalize;
+import joshua.decoder.segment_file.Sentence;
+import joshua.decoder.segment_file.Token;
+import joshua.util.FormatUtils;
+
+import java.util.List;
+
+public class TranslationBuilder {
+
+  private final Sentence sentence;
+  private final JoshuaConfiguration config;
+  private final DerivationState derivation;
+  private final List<FeatureFunction> featureFunctions;
+  
+  private Translation translation;
+  
+  public TranslationBuilder(Sentence sentence, DerivationState derivation, 
+      List<FeatureFunction> featureFunctions, JoshuaConfiguration config) {
+    this.sentence = sentence;
+    this.derivation = derivation;
+    this.featureFunctions = featureFunctions;
+    this.config = config;
+    
+    if (this.derivation != null) {
+      this.translation = new Translation(sentence, derivation.getHypothesis(), derivation.getCost());
+    } else {
+      this.translation = new Translation(sentence, null, 0.0f);
+    }
+  }
+  
+  /**
+   * Returns the underlying translation object that was being built. Once this is called, it
+   * the TranslationFactory object assumes that the hypergraph is no longer needed.
+   * 
+   * @return the built Translation object
+   */
+  public Translation translation() {
+    return this.translation;
+  }
+
+  public TranslationBuilder formattedTranslation(String format) {
+
+    // TODO: instead of calling replace() a million times, walk through yourself and find the
+    // special characters, and then replace them.  If you do this from the right side the index
+    // replacement should be a lot more efficient than what we're doing here, particularly since
+    // all these arguments get evaluated whether they're used or not
+
+    String output = format
+        .replace("%s", translation.toString())
+        .replace("%e", derivation.getHypothesis(Side.SOURCE))
+        .replace("%S", DeNormalize.processSingleLine(translation.toString()))
+        .replace("%c", String.format("%.3f", translation.score()))
+        .replace("%i", Integer.toString(sentence.id()));
+
+    if (output.contains("%a")) {
+      this.withAlignments().translation();
+      output = output.replace("%a", translation.getWordAlignment().toString());
+    }
+
+    if (config.outputFormat.contains("%f")) {
+      this.withFeatures();
+      final FeatureVector features = translation.getFeatures();
+      output = output.replace("%f", config.moses ? features.mosesString() : features.toString());
+    }
+    
+    if (output.contains("%t")) {
+      // TODO: also store in Translation objection
+      output = output.replace("%t", derivation.getTree());
+    }
+
+    /* %d causes a derivation with rules one per line to be output */
+    if (output.contains("%d")) {
+      // TODO: also store in Translation objection
+      output = output.replace("%d", derivation.getDerivation());
+    }
+
+    translation.setFormattedTranslation(maybeProjectCase(derivation, output));
+    return this;
+  }
+
+  /** 
+   * Stores the features
+   * 
+   * @return
+   */
+  public TranslationBuilder withFeatures() {
+    translation.setFeatures(derivation.getFeatures());
+    return this;
+  }
+  
+  public TranslationBuilder withAlignments() {
+    translation.setWordAlignment(derivation.getWordAlignment());
+    return this;
+  }
+  
+  /**
+   * If requested, projects source-side lettercase to target, and appends the alignment from
+   * to the source-side sentence in ||s.
+   * 
+   * @param hypothesis
+   * @param state
+   * @return
+   */
+  private String maybeProjectCase(DerivationState derivation, String hypothesis) {
+    String output = hypothesis;
+
+    if (config.project_case) {
+      String[] tokens = hypothesis.split("\\s+");
+      List<List<Integer>> points = derivation.getWordAlignment().toFinalList();
+      for (int i = 0; i < points.size(); i++) {
+        List<Integer> target = points.get(i);
+        for (int source: target) {
+          Token token = sentence.getTokens().get(source + 1); // skip <s>
+          String annotation = "";
+          if (token != null && token.getAnnotation("lettercase") != null)
+            annotation = token.getAnnotation("lettercase");
+          if (source != 0 && annotation.equals("upper"))
+            tokens[i] = FormatUtils.capitalize(tokens[i]);
+          else if (annotation.equals("all-upper"))
+            tokens[i] = tokens[i].toUpperCase();
+        }
+      }
+
+      output = String.join(" ",  tokens);
+    }
+
+    return output;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/TranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/TranslationFactory.java b/src/joshua/decoder/TranslationFactory.java
deleted file mode 100644
index 9d1953e..0000000
--- a/src/joshua/decoder/TranslationFactory.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package joshua.decoder;
-
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.hypergraph.DerivationState;
-import joshua.decoder.hypergraph.KBestExtractor.Side;
-import joshua.decoder.io.DeNormalize;
-import joshua.decoder.segment_file.Sentence;
-import joshua.decoder.segment_file.Token;
-import joshua.util.FormatUtils;
-
-import java.util.List;
-
-public class TranslationFactory {
-
-  private final Sentence sentence;
-  private final JoshuaConfiguration config;
-
-  private DerivationState derivation;
-  private Translation translation;
-
-  public TranslationFactory(Sentence sentence, DerivationState derivation, JoshuaConfiguration config) {
-    this.sentence = sentence;
-    this.derivation = derivation;
-    this.config = config;
-    
-    if (this.derivation != null) {
-      this.translation = new Translation(sentence, derivation.getHypothesis(), derivation.getCost());
-    } else {
-      this.translation = new Translation(sentence, null, 0.0f);
-    }
-  }
-  
-  public Translation translation() {
-    return this.translation;
-  }
-
-  public TranslationFactory formattedTranslation(String format) {
-
-    // TODO: instead of calling replace() a million times, walk through yourself and find the
-    // special characters, and then replace them.  If you do this from the right side the index
-    // replacement should be a lot more efficient than what we're doing here, particularly since
-    // all these arguments get evaluated whether they're used or not
-
-    String output = format
-        .replace("%s", translation.toString())
-        .replace("%e", derivation.getHypothesis(Side.SOURCE))
-        .replace("%S", DeNormalize.processSingleLine(translation.toString()))
-        .replace("%c", String.format("%.3f", translation.score()))
-        .replace("%i", Integer.toString(sentence.id()));
-
-    if (output.contains("%a")) {
-      this.alignments().translation();
-      output = output.replace("%a", translation.getWordAlignment().toString());
-    }
-
-    if (config.outputFormat.contains("%f")) {
-      this.features();
-      final FeatureVector features = translation.getFeatures();
-      output = output.replace("%f", config.moses ? features.mosesString() : features.toString());
-    }
-    
-    if (output.contains("%t")) {
-      // TODO: also store in Translation objection
-      output = output.replace("%t", derivation.getTree());
-    }
-
-    /* %d causes a derivation with rules one per line to be output */
-    if (output.contains("%d")) {
-      // TODO: also store in Translation objection
-      output = output.replace("%d", derivation.getDerivation());
-    }
-
-    translation.setFormattedTranslation(maybeProjectCase(derivation, output));
-    return this;
-  }
-
-  /** 
-   * Stores the features
-   * 
-   * @return
-   */
-  public TranslationFactory features() {
-    translation.setFeatures(derivation.getFeatures());
-    return this;
-  }
-  
-  public TranslationFactory alignments() {
-    // TODO: write this
-    //    this.translation.setAlignments(getViterbiWordAlignmentList(derivation);
-    translation.setWordAlignment(derivation.getWordAlignment());
-    return this;
-  }
-  
-  /**
-   * If requested, projects source-side lettercase to target, and appends the alignment from
-   * to the source-side sentence in ||s.
-   * 
-   * @param hypothesis
-   * @param state
-   * @return
-   */
-  private String maybeProjectCase(DerivationState derivation, String hypothesis) {
-    String output = hypothesis;
-
-    if (config.project_case) {
-      String[] tokens = hypothesis.split("\\s+");
-      List<List<Integer>> points = derivation.getWordAlignment().toFinalList();
-      for (int i = 0; i < points.size(); i++) {
-        List<Integer> target = points.get(i);
-        for (int source: target) {
-          Token token = sentence.getTokens().get(source + 1); // skip <s>
-          String annotation = "";
-          if (token != null && token.getAnnotation("lettercase") != null)
-            annotation = token.getAnnotation("lettercase");
-          if (source != 0 && annotation.equals("upper"))
-            tokens[i] = FormatUtils.capitalize(tokens[i]);
-          else if (annotation.equals("all-upper"))
-            tokens[i] = tokens[i].toUpperCase();
-        }
-      }
-
-      output = String.join(" ",  tokens);
-    }
-
-    return output;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/JSONMessage.java b/src/joshua/decoder/io/JSONMessage.java
index bf75133..fbefbd7 100644
--- a/src/joshua/decoder/io/JSONMessage.java
+++ b/src/joshua/decoder/io/JSONMessage.java
@@ -26,7 +26,8 @@ import com.google.gson.GsonBuilder;
 
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.Translation;
-import joshua.decoder.TranslationFactory;
+import joshua.decoder.TranslationBuilder;
+import joshua.decoder.ff.FeatureFunction;
 import joshua.decoder.hypergraph.DerivationState;
 import joshua.decoder.hypergraph.KBestExtractor;
 import joshua.decoder.segment_file.Sentence;
@@ -91,7 +92,8 @@ public class JSONMessage {
     }
   }
 
-  public static JSONMessage buildMessage(Sentence sentence, KBestExtractor extractor, JoshuaConfiguration config) {
+  public static JSONMessage buildMessage(Sentence sentence, KBestExtractor extractor,
+      List<FeatureFunction> featureFunctions, JoshuaConfiguration config) {
     JSONMessage message = new JSONMessage();
     
     final String mosesFormat = "%i ||| %s ||| %f ||| %c"; 
@@ -101,7 +103,7 @@ public class JSONMessage {
       if (k > config.topN)
         break;
       
-      TranslationFactory factory = new TranslationFactory(sentence, derivation, config);
+      TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
       Translation translation = factory.formattedTranslation(mosesFormat).translation();
 
       JSONMessage.TranslationItem item = message.addTranslation(translation.toString());