You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/24 22:53:34 UTC
[13/18] incubator-joshua git commit: Revert "moved files that were
strangely under joshua-6"
Revert "moved files that were strangely under joshua-6"
This reverts commit bc83a1a6d31bc034ec546f79ed00cc5598349c69.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/4f2bec7c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/4f2bec7c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/4f2bec7c
Branch: refs/heads/morph
Commit: 4f2bec7c00803029cc4cb187fa7f567d7e6a1f22
Parents: bc83a1a
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sat Apr 23 19:14:25 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sat Apr 23 19:14:25 2016 -0400
----------------------------------------------------------------------
.../joshua/decoder/StructuredTranslation.java | 143 +++++++++++++++++++
.../ViterbiFeatureVectorWalkerFunction.java | 44 ++++++
.../ViterbiOutputStringWalkerFunction.java | 96 +++++++++++++
src/joshua/decoder/StructuredTranslation.java | 143 -------------------
.../ViterbiFeatureVectorWalkerFunction.java | 44 ------
.../ViterbiOutputStringWalkerFunction.java | 96 -------------
6 files changed, 283 insertions(+), 283 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/joshua-6/src/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/StructuredTranslation.java b/joshua-6/src/joshua/decoder/StructuredTranslation.java
new file mode 100644
index 0000000..1939ea0
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/StructuredTranslation.java
@@ -0,0 +1,143 @@
+package joshua.decoder;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.emptyMap;
+import static joshua.decoder.hypergraph.ViterbiExtractor.walk;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.hypergraph.HyperGraph;
+import joshua.decoder.hypergraph.ViterbiFeatureVectorWalkerFunction;
+import joshua.decoder.hypergraph.ViterbiOutputStringWalkerFunction;
+import joshua.decoder.hypergraph.WalkerFunction;
+import joshua.decoder.hypergraph.WordAlignmentExtractor;
+import joshua.decoder.segment_file.Sentence;
+
+/**
+ * structuredTranslation provides a more structured access to translation
+ * results than the Translation class.
+ * Members of instances of this class can be used upstream.
+ * <br/>
+ * TODO:
+ * Enable K-Best extraction.
+ *
+ * @author fhieber
+ */
+public class StructuredTranslation {
+
+ private final Sentence sourceSentence;
+ private final List<FeatureFunction> featureFunctions;
+
+ private final String translationString;
+ private final List<String> translationTokens;
+ private final float translationScore;
+ private List<List<Integer>> translationWordAlignments;
+ private Map<String,Float> translationFeatures;
+ private final float extractionTime;
+
+ public StructuredTranslation(final Sentence sourceSentence,
+ final HyperGraph hypergraph,
+ final List<FeatureFunction> featureFunctions) {
+
+ final long startTime = System.currentTimeMillis();
+
+ this.sourceSentence = sourceSentence;
+ this.featureFunctions = featureFunctions;
+ this.translationString = extractViterbiString(hypergraph);
+ this.translationTokens = extractTranslationTokens();
+ this.translationScore = extractTranslationScore(hypergraph);
+ this.translationFeatures = extractViterbiFeatures(hypergraph);
+ this.translationWordAlignments = extractViterbiWordAlignment(hypergraph);
+ this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
+ }
+
+ private Map<String,Float> extractViterbiFeatures(final HyperGraph hypergraph) {
+ if (hypergraph == null) {
+ return emptyMap();
+ } else {
+ ViterbiFeatureVectorWalkerFunction viterbiFeatureVectorWalker = new ViterbiFeatureVectorWalkerFunction(featureFunctions, sourceSentence);
+ walk(hypergraph.goalNode, viterbiFeatureVectorWalker);
+ return new HashMap<String,Float>(viterbiFeatureVectorWalker.getFeaturesMap());
+ }
+ }
+
+ private List<List<Integer>> extractViterbiWordAlignment(final HyperGraph hypergraph) {
+ if (hypergraph == null) {
+ return emptyList();
+ } else {
+ final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
+ walk(hypergraph.goalNode, wordAlignmentWalker);
+ return wordAlignmentWalker.getFinalWordAlignments();
+ }
+ }
+
+ private float extractTranslationScore(final HyperGraph hypergraph) {
+ if (hypergraph == null) {
+ return 0;
+ } else {
+ return hypergraph.goalNode.getScore();
+ }
+ }
+
+ private String extractViterbiString(final HyperGraph hypergraph) {
+ if (hypergraph == null) {
+ return sourceSentence.source();
+ } else {
+ final WalkerFunction viterbiOutputStringWalker = new ViterbiOutputStringWalkerFunction();
+ walk(hypergraph.goalNode, viterbiOutputStringWalker);
+ return viterbiOutputStringWalker.toString();
+ }
+ }
+
+ private List<String> extractTranslationTokens() {
+ if (translationString.isEmpty()) {
+ return emptyList();
+ } else {
+ return asList(translationString.split("\\s+"));
+ }
+ }
+
+ // Getters to use upstream
+
+ public Sentence getSourceSentence() {
+ return sourceSentence;
+ }
+
+ public int getSentenceId() {
+ return sourceSentence.id();
+ }
+
+ public String getTranslationString() {
+ return translationString;
+ }
+
+ public List<String> getTranslationTokens() {
+ return translationTokens;
+ }
+
+ public float getTranslationScore() {
+ return translationScore;
+ }
+
+ /**
+ * Returns a list of target to source alignments.
+ */
+ public List<List<Integer>> getTranslationWordAlignments() {
+ return translationWordAlignments;
+ }
+
+ public Map<String,Float> getTranslationFeatures() {
+ return translationFeatures;
+ }
+
+ /**
+ * Time taken to build output information from the hypergraph.
+ */
+ public Float getExtractionTime() {
+ return extractionTime;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java b/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
new file mode 100644
index 0000000..5af6c4d
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
@@ -0,0 +1,44 @@
+package joshua.decoder.hypergraph;
+
+import static joshua.decoder.chart_parser.ComputeNodeResult.computeTransitionFeatures;
+
+import java.util.List;
+import java.util.Map;
+
+import joshua.decoder.ff.FeatureFunction;
+import joshua.decoder.ff.FeatureVector;
+import joshua.decoder.segment_file.Sentence;
+
+public class ViterbiFeatureVectorWalkerFunction implements WalkerFunction {
+
+ private final FeatureVector features;
+ private final List<FeatureFunction> featureFunctions;
+ private final Sentence sourceSentence;
+
+ public ViterbiFeatureVectorWalkerFunction(
+ final List<FeatureFunction> featureFunctions,
+ final Sentence sourceSentence) {
+ this.features = new FeatureVector();
+ this.featureFunctions = featureFunctions;
+ this.sourceSentence = sourceSentence;
+ }
+
+ /**
+ * Recompute feature values for each Viterbi edge and add to features.
+ */
+ @Override
+ public void apply(HGNode node) {
+ final FeatureVector edgeFeatures = computeTransitionFeatures(
+ featureFunctions, node.bestHyperedge, node.i, node.j, sourceSentence);
+ features.add(edgeFeatures);
+ }
+
+ public FeatureVector getFeatures() {
+ return features;
+ }
+
+ public Map<String,Float> getFeaturesMap() {
+ return features.getMap();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
----------------------------------------------------------------------
diff --git a/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java b/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
new file mode 100644
index 0000000..0c84375
--- /dev/null
+++ b/joshua-6/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
@@ -0,0 +1,96 @@
+package joshua.decoder.hypergraph;
+
+import static java.lang.Integer.MAX_VALUE;
+import static joshua.corpus.Vocabulary.getWords;
+import static joshua.corpus.Vocabulary.nt;
+
+import java.util.Stack;
+
+import joshua.decoder.ff.tm.Rule;
+
+public class ViterbiOutputStringWalkerFunction implements WalkerFunction {
+
+ private Stack<int[]> viterbiWords = new Stack<int[]>();
+
+ @Override
+ public void apply(HGNode node) {
+ final Rule rule = node.bestHyperedge.getRule();
+ if (rule != null) {
+ merge(rule.getEnglish());
+ }
+ }
+
+ private boolean containsNonTerminals(final int[] ids) {
+ boolean hasNonTerminals = false;
+ for (int i = 0; i < ids.length; i++) {
+ if (nt(ids[i])) {
+ hasNonTerminals = true;
+ break;
+ }
+ }
+ return hasNonTerminals;
+ }
+
+ /**
+ * Returns the index of the next non-terminal slot to fill.
+ * Since non-terminals in right hand sides of rules are indexed by
+ * their order on the source side, this function looks for the largest
+ * negative id in ids and returns its index.
+ */
+ private int getNextNonTerminalIndexToFill(final int[] ids) {
+ int nextIndex = 0;
+ int nextNonTerminal = -MAX_VALUE;
+ for (int i = 0; i < ids.length; i++) {
+ if (nt(ids[i]) && ids[i] > nextNonTerminal) {
+ nextIndex = i;
+ nextNonTerminal = ids[i];
+ }
+ }
+ return nextIndex;
+ }
+
+ private int[] substituteNonTerminal(final int[] parentWords, final int[] childWords) {
+ final int ntIndex = getNextNonTerminalIndexToFill(parentWords);
+ final int[] result = new int[parentWords.length + childWords.length - 1];
+ int resultIndex = 0;
+ for (int i = 0; i < ntIndex; i++) {
+ result[resultIndex++] = parentWords[i];
+ }
+ for (int i = 0; i < childWords.length; i++) {
+ result[resultIndex++] = childWords[i];
+ }
+ for (int i = ntIndex + 1; i < parentWords.length; i++) {
+ result[resultIndex++] = parentWords[i];
+ }
+ return result;
+ }
+
+ private void merge(final int[] words) {
+ if (!containsNonTerminals(words)
+ && !viterbiWords.isEmpty()
+ && containsNonTerminals(viterbiWords.peek())) {
+ merge(substituteNonTerminal(viterbiWords.pop(), words));
+ } else {
+ viterbiWords.add(words);
+ }
+ }
+
+ @Override
+ public String toString() {
+ if (viterbiWords.isEmpty()) {
+ return "";
+ }
+
+ if (viterbiWords.size() != 1) {
+ throw new RuntimeException(
+ String.format(
+ "Stack of ViterbiOutputStringWalker should contain only a single (last) element, but was size %d", viterbiWords.size()));
+ }
+
+ String result = getWords(viterbiWords.peek());
+ // strip of sentence markers (<s>,</s>)
+ result = result.substring(result.indexOf(' ') + 1, result.lastIndexOf(' '));
+ return result.trim();
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/src/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/StructuredTranslation.java b/src/joshua/decoder/StructuredTranslation.java
deleted file mode 100644
index 1939ea0..0000000
--- a/src/joshua/decoder/StructuredTranslation.java
+++ /dev/null
@@ -1,143 +0,0 @@
-package joshua.decoder;
-
-import static java.util.Arrays.asList;
-import static java.util.Collections.emptyList;
-import static java.util.Collections.emptyMap;
-import static joshua.decoder.hypergraph.ViterbiExtractor.walk;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.hypergraph.HyperGraph;
-import joshua.decoder.hypergraph.ViterbiFeatureVectorWalkerFunction;
-import joshua.decoder.hypergraph.ViterbiOutputStringWalkerFunction;
-import joshua.decoder.hypergraph.WalkerFunction;
-import joshua.decoder.hypergraph.WordAlignmentExtractor;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * structuredTranslation provides a more structured access to translation
- * results than the Translation class.
- * Members of instances of this class can be used upstream.
- * <br/>
- * TODO:
- * Enable K-Best extraction.
- *
- * @author fhieber
- */
-public class StructuredTranslation {
-
- private final Sentence sourceSentence;
- private final List<FeatureFunction> featureFunctions;
-
- private final String translationString;
- private final List<String> translationTokens;
- private final float translationScore;
- private List<List<Integer>> translationWordAlignments;
- private Map<String,Float> translationFeatures;
- private final float extractionTime;
-
- public StructuredTranslation(final Sentence sourceSentence,
- final HyperGraph hypergraph,
- final List<FeatureFunction> featureFunctions) {
-
- final long startTime = System.currentTimeMillis();
-
- this.sourceSentence = sourceSentence;
- this.featureFunctions = featureFunctions;
- this.translationString = extractViterbiString(hypergraph);
- this.translationTokens = extractTranslationTokens();
- this.translationScore = extractTranslationScore(hypergraph);
- this.translationFeatures = extractViterbiFeatures(hypergraph);
- this.translationWordAlignments = extractViterbiWordAlignment(hypergraph);
- this.extractionTime = (System.currentTimeMillis() - startTime) / 1000.0f;
- }
-
- private Map<String,Float> extractViterbiFeatures(final HyperGraph hypergraph) {
- if (hypergraph == null) {
- return emptyMap();
- } else {
- ViterbiFeatureVectorWalkerFunction viterbiFeatureVectorWalker = new ViterbiFeatureVectorWalkerFunction(featureFunctions, sourceSentence);
- walk(hypergraph.goalNode, viterbiFeatureVectorWalker);
- return new HashMap<String,Float>(viterbiFeatureVectorWalker.getFeaturesMap());
- }
- }
-
- private List<List<Integer>> extractViterbiWordAlignment(final HyperGraph hypergraph) {
- if (hypergraph == null) {
- return emptyList();
- } else {
- final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
- walk(hypergraph.goalNode, wordAlignmentWalker);
- return wordAlignmentWalker.getFinalWordAlignments();
- }
- }
-
- private float extractTranslationScore(final HyperGraph hypergraph) {
- if (hypergraph == null) {
- return 0;
- } else {
- return hypergraph.goalNode.getScore();
- }
- }
-
- private String extractViterbiString(final HyperGraph hypergraph) {
- if (hypergraph == null) {
- return sourceSentence.source();
- } else {
- final WalkerFunction viterbiOutputStringWalker = new ViterbiOutputStringWalkerFunction();
- walk(hypergraph.goalNode, viterbiOutputStringWalker);
- return viterbiOutputStringWalker.toString();
- }
- }
-
- private List<String> extractTranslationTokens() {
- if (translationString.isEmpty()) {
- return emptyList();
- } else {
- return asList(translationString.split("\\s+"));
- }
- }
-
- // Getters to use upstream
-
- public Sentence getSourceSentence() {
- return sourceSentence;
- }
-
- public int getSentenceId() {
- return sourceSentence.id();
- }
-
- public String getTranslationString() {
- return translationString;
- }
-
- public List<String> getTranslationTokens() {
- return translationTokens;
- }
-
- public float getTranslationScore() {
- return translationScore;
- }
-
- /**
- * Returns a list of target to source alignments.
- */
- public List<List<Integer>> getTranslationWordAlignments() {
- return translationWordAlignments;
- }
-
- public Map<String,Float> getTranslationFeatures() {
- return translationFeatures;
- }
-
- /**
- * Time taken to build output information from the hypergraph.
- */
- public Float getExtractionTime() {
- return extractionTime;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java b/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
deleted file mode 100644
index 5af6c4d..0000000
--- a/src/joshua/decoder/hypergraph/ViterbiFeatureVectorWalkerFunction.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package joshua.decoder.hypergraph;
-
-import static joshua.decoder.chart_parser.ComputeNodeResult.computeTransitionFeatures;
-
-import java.util.List;
-import java.util.Map;
-
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.segment_file.Sentence;
-
-public class ViterbiFeatureVectorWalkerFunction implements WalkerFunction {
-
- private final FeatureVector features;
- private final List<FeatureFunction> featureFunctions;
- private final Sentence sourceSentence;
-
- public ViterbiFeatureVectorWalkerFunction(
- final List<FeatureFunction> featureFunctions,
- final Sentence sourceSentence) {
- this.features = new FeatureVector();
- this.featureFunctions = featureFunctions;
- this.sourceSentence = sourceSentence;
- }
-
- /**
- * Recompute feature values for each Viterbi edge and add to features.
- */
- @Override
- public void apply(HGNode node) {
- final FeatureVector edgeFeatures = computeTransitionFeatures(
- featureFunctions, node.bestHyperedge, node.i, node.j, sourceSentence);
- features.add(edgeFeatures);
- }
-
- public FeatureVector getFeatures() {
- return features;
- }
-
- public Map<String,Float> getFeaturesMap() {
- return features.getMap();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/4f2bec7c/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java b/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
deleted file mode 100644
index 0c84375..0000000
--- a/src/joshua/decoder/hypergraph/ViterbiOutputStringWalkerFunction.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package joshua.decoder.hypergraph;
-
-import static java.lang.Integer.MAX_VALUE;
-import static joshua.corpus.Vocabulary.getWords;
-import static joshua.corpus.Vocabulary.nt;
-
-import java.util.Stack;
-
-import joshua.decoder.ff.tm.Rule;
-
-public class ViterbiOutputStringWalkerFunction implements WalkerFunction {
-
- private Stack<int[]> viterbiWords = new Stack<int[]>();
-
- @Override
- public void apply(HGNode node) {
- final Rule rule = node.bestHyperedge.getRule();
- if (rule != null) {
- merge(rule.getEnglish());
- }
- }
-
- private boolean containsNonTerminals(final int[] ids) {
- boolean hasNonTerminals = false;
- for (int i = 0; i < ids.length; i++) {
- if (nt(ids[i])) {
- hasNonTerminals = true;
- break;
- }
- }
- return hasNonTerminals;
- }
-
- /**
- * Returns the index of the next non-terminal slot to fill.
- * Since non-terminals in right hand sides of rules are indexed by
- * their order on the source side, this function looks for the largest
- * negative id in ids and returns its index.
- */
- private int getNextNonTerminalIndexToFill(final int[] ids) {
- int nextIndex = 0;
- int nextNonTerminal = -MAX_VALUE;
- for (int i = 0; i < ids.length; i++) {
- if (nt(ids[i]) && ids[i] > nextNonTerminal) {
- nextIndex = i;
- nextNonTerminal = ids[i];
- }
- }
- return nextIndex;
- }
-
- private int[] substituteNonTerminal(final int[] parentWords, final int[] childWords) {
- final int ntIndex = getNextNonTerminalIndexToFill(parentWords);
- final int[] result = new int[parentWords.length + childWords.length - 1];
- int resultIndex = 0;
- for (int i = 0; i < ntIndex; i++) {
- result[resultIndex++] = parentWords[i];
- }
- for (int i = 0; i < childWords.length; i++) {
- result[resultIndex++] = childWords[i];
- }
- for (int i = ntIndex + 1; i < parentWords.length; i++) {
- result[resultIndex++] = parentWords[i];
- }
- return result;
- }
-
- private void merge(final int[] words) {
- if (!containsNonTerminals(words)
- && !viterbiWords.isEmpty()
- && containsNonTerminals(viterbiWords.peek())) {
- merge(substituteNonTerminal(viterbiWords.pop(), words));
- } else {
- viterbiWords.add(words);
- }
- }
-
- @Override
- public String toString() {
- if (viterbiWords.isEmpty()) {
- return "";
- }
-
- if (viterbiWords.size() != 1) {
- throw new RuntimeException(
- String.format(
- "Stack of ViterbiOutputStringWalker should contain only a single (last) element, but was size %d", viterbiWords.size()));
- }
-
- String result = getWords(viterbiWords.peek());
- // strip of sentence markers (<s>,</s>)
- result = result.substring(result.indexOf(' ') + 1, result.lastIndexOf(' '));
- return result.trim();
- }
-
-}
\ No newline at end of file