You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/03 21:01:43 UTC
[12/14] incubator-joshua git commit: renaming factory to builder
renaming factory to builder
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/46a8c874
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/46a8c874
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/46a8c874
Branch: refs/heads/joshua_api
Commit: 46a8c87485a5e8ae00127499c13df341895daac0
Parents: 31c66f2
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 17:18:10 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 17:20:23 2016 -0400
----------------------------------------------------------------------
src/joshua/decoder/Decoder.java | 7 +-
src/joshua/decoder/TranslationBuilder.java | 136 ++++++++++++++++++++++++
src/joshua/decoder/TranslationFactory.java | 127 ----------------------
src/joshua/decoder/io/JSONMessage.java | 8 +-
4 files changed, 145 insertions(+), 133 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index c3850a3..652f25d 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -466,7 +466,7 @@ public class Decoder {
if (config.input_type == INPUT_TYPE.json || config.server_type == SERVER_TYPE.HTTP) {
KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
- JSONMessage message = JSONMessage.buildMessage(sentence, extractor, config);
+ JSONMessage message = JSONMessage.buildMessage(sentence, extractor, featureFunctions, config);
out.write(message.toString().getBytes());
} else {
@@ -487,7 +487,8 @@ public class Decoder {
if (k > config.topN || derivation == null)
break;
- TranslationFactory factory = new TranslationFactory(sentence, derivation, config);
+
+ TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
Translation translation = factory.formattedTranslation(mosesFormat).translation();
text = translation.getFormattedTranslation().replaceAll("=", "= ");
// Write the complete formatted string to STDOUT
@@ -504,7 +505,7 @@ public class Decoder {
if (k > config.topN || derivation == null)
break;
- Translation t = new TranslationFactory(sentence, derivation, config)
+ Translation t = new TranslationBuilder(sentence, derivation, featureFunctions, config)
.formattedTranslation(config.outputFormat)
.translation();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/TranslationBuilder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/TranslationBuilder.java b/src/joshua/decoder/TranslationBuilder.java
new file mode 100644
index 0000000..0ac6737
--- /dev/null
+++ b/src/joshua/decoder/TranslationBuilder.java
@@ -0,0 +1,136 @@
+package joshua.decoder;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.ff.lm.StateMinimizingLanguageModel;
+import joshua.decoder.hypergraph.DerivationState;
+import joshua.decoder.hypergraph.KBestExtractor.Side;
+import joshua.decoder.io.DeNormalize;
+import joshua.decoder.segment_file.Sentence;
+import joshua.decoder.segment_file.Token;
+import joshua.util.FormatUtils;
+
+import java.util.List;
+
+public class TranslationBuilder {
+
+ private final Sentence sentence;
+ private final JoshuaConfiguration config;
+ private final DerivationState derivation;
+ private final List<FeatureFunction> featureFunctions;
+
+ private Translation translation;
+
+ public TranslationBuilder(Sentence sentence, DerivationState derivation,
+ List<FeatureFunction> featureFunctions, JoshuaConfiguration config) {
+ this.sentence = sentence;
+ this.derivation = derivation;
+ this.featureFunctions = featureFunctions;
+ this.config = config;
+
+ if (this.derivation != null) {
+ this.translation = new Translation(sentence, derivation.getHypothesis(), derivation.getCost());
+ } else {
+ this.translation = new Translation(sentence, null, 0.0f);
+ }
+ }
+
+ /**
+ * Returns the underlying translation object that was being built. Once this is called, it
+ * the TranslationFactory object assumes that the hypergraph is no longer needed.
+ *
+ * @return the built Translation object
+ */
+ public Translation translation() {
+ return this.translation;
+ }
+
+ public TranslationBuilder formattedTranslation(String format) {
+
+ // TODO: instead of calling replace() a million times, walk through yourself and find the
+ // special characters, and then replace them. If you do this from the right side the index
+ // replacement should be a lot more efficient than what we're doing here, particularly since
+ // all these arguments get evaluated whether they're used or not
+
+ String output = format
+ .replace("%s", translation.toString())
+ .replace("%e", derivation.getHypothesis(Side.SOURCE))
+ .replace("%S", DeNormalize.processSingleLine(translation.toString()))
+ .replace("%c", String.format("%.3f", translation.score()))
+ .replace("%i", Integer.toString(sentence.id()));
+
+ if (output.contains("%a")) {
+ this.withAlignments().translation();
+ output = output.replace("%a", translation.getWordAlignment().toString());
+ }
+
+ if (config.outputFormat.contains("%f")) {
+ this.withFeatures();
+ final FeatureVector features = translation.getFeatures();
+ output = output.replace("%f", config.moses ? features.mosesString() : features.toString());
+ }
+
+ if (output.contains("%t")) {
+ // TODO: also store in Translation objection
+ output = output.replace("%t", derivation.getTree());
+ }
+
+ /* %d causes a derivation with rules one per line to be output */
+ if (output.contains("%d")) {
+ // TODO: also store in Translation objection
+ output = output.replace("%d", derivation.getDerivation());
+ }
+
+ translation.setFormattedTranslation(maybeProjectCase(derivation, output));
+ return this;
+ }
+
+ /**
+ * Stores the features
+ *
+ * @return
+ */
+ public TranslationBuilder withFeatures() {
+ translation.setFeatures(derivation.getFeatures());
+ return this;
+ }
+
+ public TranslationBuilder withAlignments() {
+ translation.setWordAlignment(derivation.getWordAlignment());
+ return this;
+ }
+
+ /**
+ * If requested, projects source-side lettercase to target, and appends the alignment from
+ * to the source-side sentence in ||s.
+ *
+ * @param hypothesis
+ * @param state
+ * @return
+ */
+ private String maybeProjectCase(DerivationState derivation, String hypothesis) {
+ String output = hypothesis;
+
+ if (config.project_case) {
+ String[] tokens = hypothesis.split("\\s+");
+ List<List<Integer>> points = derivation.getWordAlignment().toFinalList();
+ for (int i = 0; i < points.size(); i++) {
+ List<Integer> target = points.get(i);
+ for (int source: target) {
+ Token token = sentence.getTokens().get(source + 1); // skip <s>
+ String annotation = "";
+ if (token != null && token.getAnnotation("lettercase") != null)
+ annotation = token.getAnnotation("lettercase");
+ if (source != 0 && annotation.equals("upper"))
+ tokens[i] = FormatUtils.capitalize(tokens[i]);
+ else if (annotation.equals("all-upper"))
+ tokens[i] = tokens[i].toUpperCase();
+ }
+ }
+
+ output = String.join(" ", tokens);
+ }
+
+ return output;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/TranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/TranslationFactory.java b/src/joshua/decoder/TranslationFactory.java
deleted file mode 100644
index 9d1953e..0000000
--- a/src/joshua/decoder/TranslationFactory.java
+++ /dev/null
@@ -1,127 +0,0 @@
-package joshua.decoder;
-
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.hypergraph.DerivationState;
-import joshua.decoder.hypergraph.KBestExtractor.Side;
-import joshua.decoder.io.DeNormalize;
-import joshua.decoder.segment_file.Sentence;
-import joshua.decoder.segment_file.Token;
-import joshua.util.FormatUtils;
-
-import java.util.List;
-
-public class TranslationFactory {
-
- private final Sentence sentence;
- private final JoshuaConfiguration config;
-
- private DerivationState derivation;
- private Translation translation;
-
- public TranslationFactory(Sentence sentence, DerivationState derivation, JoshuaConfiguration config) {
- this.sentence = sentence;
- this.derivation = derivation;
- this.config = config;
-
- if (this.derivation != null) {
- this.translation = new Translation(sentence, derivation.getHypothesis(), derivation.getCost());
- } else {
- this.translation = new Translation(sentence, null, 0.0f);
- }
- }
-
- public Translation translation() {
- return this.translation;
- }
-
- public TranslationFactory formattedTranslation(String format) {
-
- // TODO: instead of calling replace() a million times, walk through yourself and find the
- // special characters, and then replace them. If you do this from the right side the index
- // replacement should be a lot more efficient than what we're doing here, particularly since
- // all these arguments get evaluated whether they're used or not
-
- String output = format
- .replace("%s", translation.toString())
- .replace("%e", derivation.getHypothesis(Side.SOURCE))
- .replace("%S", DeNormalize.processSingleLine(translation.toString()))
- .replace("%c", String.format("%.3f", translation.score()))
- .replace("%i", Integer.toString(sentence.id()));
-
- if (output.contains("%a")) {
- this.alignments().translation();
- output = output.replace("%a", translation.getWordAlignment().toString());
- }
-
- if (config.outputFormat.contains("%f")) {
- this.features();
- final FeatureVector features = translation.getFeatures();
- output = output.replace("%f", config.moses ? features.mosesString() : features.toString());
- }
-
- if (output.contains("%t")) {
- // TODO: also store in Translation objection
- output = output.replace("%t", derivation.getTree());
- }
-
- /* %d causes a derivation with rules one per line to be output */
- if (output.contains("%d")) {
- // TODO: also store in Translation objection
- output = output.replace("%d", derivation.getDerivation());
- }
-
- translation.setFormattedTranslation(maybeProjectCase(derivation, output));
- return this;
- }
-
- /**
- * Stores the features
- *
- * @return
- */
- public TranslationFactory features() {
- translation.setFeatures(derivation.getFeatures());
- return this;
- }
-
- public TranslationFactory alignments() {
- // TODO: write this
- // this.translation.setAlignments(getViterbiWordAlignmentList(derivation);
- translation.setWordAlignment(derivation.getWordAlignment());
- return this;
- }
-
- /**
- * If requested, projects source-side lettercase to target, and appends the alignment from
- * to the source-side sentence in ||s.
- *
- * @param hypothesis
- * @param state
- * @return
- */
- private String maybeProjectCase(DerivationState derivation, String hypothesis) {
- String output = hypothesis;
-
- if (config.project_case) {
- String[] tokens = hypothesis.split("\\s+");
- List<List<Integer>> points = derivation.getWordAlignment().toFinalList();
- for (int i = 0; i < points.size(); i++) {
- List<Integer> target = points.get(i);
- for (int source: target) {
- Token token = sentence.getTokens().get(source + 1); // skip <s>
- String annotation = "";
- if (token != null && token.getAnnotation("lettercase") != null)
- annotation = token.getAnnotation("lettercase");
- if (source != 0 && annotation.equals("upper"))
- tokens[i] = FormatUtils.capitalize(tokens[i]);
- else if (annotation.equals("all-upper"))
- tokens[i] = tokens[i].toUpperCase();
- }
- }
-
- output = String.join(" ", tokens);
- }
-
- return output;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/46a8c874/src/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/JSONMessage.java b/src/joshua/decoder/io/JSONMessage.java
index bf75133..fbefbd7 100644
--- a/src/joshua/decoder/io/JSONMessage.java
+++ b/src/joshua/decoder/io/JSONMessage.java
@@ -26,7 +26,8 @@ import com.google.gson.GsonBuilder;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.Translation;
-import joshua.decoder.TranslationFactory;
+import joshua.decoder.TranslationBuilder;
+import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.hypergraph.DerivationState;
import joshua.decoder.hypergraph.KBestExtractor;
import joshua.decoder.segment_file.Sentence;
@@ -91,7 +92,8 @@ public class JSONMessage {
}
}
- public static JSONMessage buildMessage(Sentence sentence, KBestExtractor extractor, JoshuaConfiguration config) {
+ public static JSONMessage buildMessage(Sentence sentence, KBestExtractor extractor,
+ List<FeatureFunction> featureFunctions, JoshuaConfiguration config) {
JSONMessage message = new JSONMessage();
final String mosesFormat = "%i ||| %s ||| %f ||| %c";
@@ -101,7 +103,7 @@ public class JSONMessage {
if (k > config.topN)
break;
- TranslationFactory factory = new TranslationFactory(sentence, derivation, config);
+ TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
Translation translation = factory.formattedTranslation(mosesFormat).translation();
JSONMessage.TranslationItem item = message.addTranslation(translation.toString());