You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/03 21:01:44 UTC
[13/14] incubator-joshua git commit: bugfix in iterator,
moved k-best logic to constructor
bugfix in iterator, moved k-best logic to constructor
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/29d9197b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/29d9197b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/29d9197b
Branch: refs/heads/joshua_api
Commit: 29d9197ba3877d02351c677d75b7ad227a0ff1d4
Parents: 46a8c87
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 17:34:56 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 17:34:56 2016 -0400
----------------------------------------------------------------------
src/joshua/decoder/Decoder.java | 35 ++++++++++----------
.../decoder/hypergraph/KBestExtractor.java | 32 ++++++++++--------
2 files changed, 35 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/29d9197b/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index 652f25d..4175e36 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -465,7 +465,7 @@ public class Decoder {
Sentence sentence = hg.sentence;
if (config.input_type == INPUT_TYPE.json || config.server_type == SERVER_TYPE.HTTP) {
- KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
+ KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
JSONMessage message = JSONMessage.buildMessage(sentence, extractor, featureFunctions, config);
out.write(message.toString().getBytes());
@@ -476,47 +476,45 @@ public class Decoder {
* Moses expects the simple translation on STDOUT and the n-best list in a file with a fixed
* format.
*/
- String text;
+ String bestOutput;
if (config.moses) {
- KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
+ KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
final String mosesFormat = "%i ||| %s ||| %f ||| %c";
- int k = 1;
+ boolean firstPass = true;
for (DerivationState derivation: extractor) {
- if (k > config.topN || derivation == null)
- break;
-
TranslationBuilder factory = new TranslationBuilder(sentence, derivation, featureFunctions, config);
Translation translation = factory.formattedTranslation(mosesFormat).translation();
- text = translation.getFormattedTranslation().replaceAll("=", "= ");
+ String text = translation.getFormattedTranslation().replaceAll("=", "= ");
// Write the complete formatted string to STDOUT
if (config.n_best_file != null)
nbest_out.write(text + "\n");
-
- k++;
- }
+
+ if (firstPass) {
+ bestOutput = translation.toString();
+ firstPass = false;
+ }
+ }
}
- KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config);
- int k = 1;
+ KBestExtractor extractor = new KBestExtractor(sentence, hg, featureFunctions, weights, false, config, config.topN);
+ boolean firstPass = true;
for (DerivationState derivation: extractor) {
- if (k > config.topN || derivation == null)
- break;
Translation t = new TranslationBuilder(sentence, derivation, featureFunctions, config)
.formattedTranslation(config.outputFormat)
.translation();
- if (k == 1)
+ if (firstPass) {
Decoder.LOG(1, String.format("Translation %d: %.3f %s", sentence.id(), t.score(), t.toString()));
+ firstPass = false;
+ }
String bestString = t.getFormattedTranslation();
out.write(bestString.getBytes());
out.write("\n".getBytes());
-
- k++;
}
}
out.flush();
@@ -532,6 +530,7 @@ public class Decoder {
break;
}
}
+
}
if (config.n_best_file != null)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/29d9197b/src/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index 4d40639..8997790 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -114,6 +114,7 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
private HyperGraph hyperGraph;
private DerivationState nextDerivation = null;
private int derivationCounter;
+ private int maxDerivations;
public KBestExtractor(
Sentence sentence,
@@ -121,7 +122,8 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
List<FeatureFunction> featureFunctions,
FeatureVector weights,
boolean isMonolingual,
- JoshuaConfiguration joshuaConfiguration) {
+ JoshuaConfiguration joshuaConfiguration,
+ int k) {
this.featureFunctions = featureFunctions;
this.hyperGraph = hyperGraph;
@@ -134,7 +136,8 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
// initialize the iterator
this.derivationCounter = 0;
- this.nextDerivation = getViterbiDerivation();
+ this.nextDerivation = null;
+ this.maxDerivations = k;
}
/**
@@ -146,12 +149,11 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
*/
public DerivationState getViterbiDerivation() {
- /* TODO: this is just a short-cut to get this working. Instead of triggering the k-best extraction,
- * it would be better to have a shortcut function that can construction a {@link DerivationState object}
- * from the hypergraph directly, which would be a lot cheaper.
+ /* TODO: the viterbi derivation is often needed, but triggering all the k-best mechanisms
+ * to extract it is expensive. There should be a way to get the 1-best DerivationState object
+ * very quickly, so that we can fit it into this framework.
*/
- hasNext();
- return this.nextDerivation;
+ throw new RuntimeException("Not yet implemented! We need a fast way to get the Viterbi DerivationState!");
}
@@ -649,14 +651,16 @@ public class KBestExtractor implements Iterator<DerivationState>, Iterable<Deriv
@Override
public boolean hasNext() {
- if (this.nextDerivation != null)
- return true;
-
- derivationCounter++;
+ if (this.nextDerivation == null) {
+ this.derivationCounter++;
+ if (this.derivationCounter <= this.maxDerivations) {
+ VirtualNode virtualNode = getVirtualNode(hyperGraph.goalNode);
+ this.nextDerivation = virtualNode.lazyKBestExtractOnNode(this, derivationCounter);
+ }
+ return this.nextDerivation != null;
+ }
- VirtualNode virtualNode = getVirtualNode(hyperGraph.goalNode);
- this.nextDerivation = virtualNode.lazyKBestExtractOnNode(this, derivationCounter);
- return this.nextDerivation != null;
+ return true;
}
@Override