You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/27 00:34:24 UTC
[18/32] incubator-joshua git commit: Replaced redundant
Vocabulary.nt() to FormatUtils.isNonterminal(), for clarity and consolidation
Replaced redundant Vocabulary.nt() to FormatUtils.isNonterminal(), for clarity and consolidation
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/868b3409
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/868b3409
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/868b3409
Branch: refs/heads/JOSHUA-252
Commit: 868b340949f324b12d810d03c09c25fd5877d3dc
Parents: 366f408
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 05:50:16 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 05:50:16 2016 -0400
----------------------------------------------------------------------
src/joshua/corpus/TerminalIterator.java | 4 +++-
src/joshua/corpus/Vocabulary.java | 10 ---------
src/joshua/decoder/BLEU.java | 1 -
src/joshua/decoder/Decoder.java | 1 -
src/joshua/decoder/ff/TargetBigram.java | 3 ++-
src/joshua/decoder/ff/lm/LanguageModelFF.java | 5 +++--
.../ff/lm/StateMinimizingLanguageModel.java | 5 +++--
.../GrammarBuilderWalkerFunction.java | 22 ++++++++++----------
.../hypergraph/OutputStringExtractor.java | 8 +++----
src/joshua/decoder/segment_file/Sentence.java | 1 -
src/joshua/metrics/MinimumChangeBLEU.java | 1 -
src/joshua/metrics/Precis.java | 1 -
src/joshua/oracle/OracleExtractionHG.java | 3 ++-
src/joshua/util/FormatUtils.java | 8 +++++++
14 files changed, 36 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/corpus/TerminalIterator.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/TerminalIterator.java b/src/joshua/corpus/TerminalIterator.java
index 29544fb..12cb16c 100644
--- a/src/joshua/corpus/TerminalIterator.java
+++ b/src/joshua/corpus/TerminalIterator.java
@@ -21,6 +21,8 @@ package joshua.corpus;
import java.util.Iterator;
import java.util.NoSuchElementException;
+import joshua.util.FormatUtils;
+
/**
* Iterator capable of iterating over those word identifiers in a phrase which represent terminals.
* <p>
@@ -49,7 +51,7 @@ public class TerminalIterator implements Iterator<Integer> {
/* See Javadoc for java.util.Iterator#next(). */
public boolean hasNext() {
- while (dirty || Vocabulary.nt(next)) {
+ while (dirty || FormatUtils.isNonterminal(next)) {
nextIndex++;
if (nextIndex < words.length) {
next = words[nextIndex];
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index 6f72ad8..74f6a47 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -227,16 +227,6 @@ public class Vocabulary {
return UNKNOWN_WORD;
}
- /**
- * Returns true if the Vocabulary ID represents a nonterminal.
- *
- * @param id
- * @return
- */
- public static boolean nt(int id) {
- return (id < 0);
- }
-
public static int size() {
long lock_stamp = lock.readLock();
try {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/BLEU.java b/src/joshua/decoder/BLEU.java
index 1b3e3f8..2fd3287 100644
--- a/src/joshua/decoder/BLEU.java
+++ b/src/joshua/decoder/BLEU.java
@@ -20,7 +20,6 @@ package joshua.decoder;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index 0057f87..22ed8b9 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -28,7 +28,6 @@ import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/TargetBigram.java b/src/joshua/decoder/ff/TargetBigram.java
index 846273d..defaaf1 100644
--- a/src/joshua/decoder/ff/TargetBigram.java
+++ b/src/joshua/decoder/ff/TargetBigram.java
@@ -31,6 +31,7 @@ import joshua.decoder.ff.state_maintenance.NgramDPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
import joshua.util.io.LineReader;
/***
@@ -112,7 +113,7 @@ public class TargetBigram extends StatefulFF {
for (int c = 0; c < enWords.length; c++) {
int curID = enWords[c];
- if (Vocabulary.nt(curID)) {
+ if (FormatUtils.isNonterminal(curID)) {
int index = -(curID + 1);
NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);
int[] leftContext = state.getLeftLMStateWords();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/LanguageModelFF.java b/src/joshua/decoder/ff/lm/LanguageModelFF.java
index a002de7..984ce97 100644
--- a/src/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -40,6 +40,7 @@ import joshua.decoder.ff.state_maintenance.NgramDPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
/**
* This class performs the following:
@@ -318,7 +319,7 @@ public class LanguageModelFF extends StatefulFF {
*/
for (int c = 0; c < enWords.length; c++) {
int currentWord = enWords[c];
- if (Vocabulary.nt(currentWord)) {
+ if (FormatUtils.isNonterminal(currentWord)) {
estimate += scoreChunkLogP(words, considerIncompleteNgrams, skipStart);
words.clear();
skipStart = false;
@@ -376,7 +377,7 @@ public class LanguageModelFF extends StatefulFF {
for (int c = 0; c < enWords.length; c++) {
int curID = enWords[c];
- if (Vocabulary.nt(curID)) {
+ if (FormatUtils.isNonterminal(curID)) {
int index = -(curID + 1);
NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index f07b668..1f7e818 100644
--- a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -33,6 +33,7 @@ import joshua.decoder.ff.state_maintenance.KenLMState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
/**
* Wrapper for KenLM LMs with left-state minimization. We inherit from the regular
@@ -99,7 +100,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
for (int x = 0; x < ruleWords.length; x++) {
int id = ruleWords[x];
- if (Vocabulary.nt(id)) {
+ if (FormatUtils.isNonterminal(id)) {
// For the estimate, we can just mark negative values
words[x] = -1;
@@ -131,7 +132,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
for (int x = 0; x < ruleWords.length; x++) {
int id = ruleWords[x];
- if (Vocabulary.nt(id)) {
+ if (FormatUtils.isNonterminal(id)) {
// Nonterminal: retrieve the KenLM long that records the state
int index = -(id + 1);
KenLMState state = (KenLMState) tailNodes.get(index).getDPState(stateIndex);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index a4df7e5..d4b11df 100644
--- a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -115,16 +115,16 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
int[] result = new int[english.length];
for (int i = 0; i < english.length; i++) {
int curr = english[i];
- if (!Vocabulary.nt(curr)) {
- // If it's a terminal symbol, we just copy it into the new rule.
+ if (! FormatUtils.isNonterminal(curr)) {
+ // If it's a terminal symbol, we just copy it into the new rule.
result[i] = curr;
} else {
- // If it's a nonterminal, its value is -N, where N is the index
- // of the nonterminal on the source side.
- //
- // That is, if we would call a nonterminal "[X,2]", the value of
- // curr at this point is -2. And the tail node that it points at
- // is #1 (since getTailNodes() is 0-indexed).
+ // If it's a nonterminal, its value is -N, where N is the index
+ // of the nonterminal on the source side.
+ //
+ // That is, if we would call a nonterminal "[X,2]", the value of
+ // curr at this point is -2. And the tail node that it points at
+ // is #1 (since getTailNodes() is 0-indexed).
int index = -curr - 1;
result[i] = getLabelWithSpan(edge.getTailNodes().get(index));
}
@@ -135,12 +135,12 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
private static int[] getNewTargetFromSource(int[] source) {
int[] result = new int[source.length];
- int currNT = -1; // value to stick into NT slots
+ int currNT = -1; // value to stick into NT slots
for (int i = 0; i < source.length; i++) {
result[i] = source[i];
- if (Vocabulary.nt(result[i])) {
+ if (FormatUtils.isNonterminal(result[i])) {
result[i] = currNT;
- currNT--;
+ currNT--;
}
}
// System.err.printf("target: %s\n", result);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/hypergraph/OutputStringExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/OutputStringExtractor.java b/src/joshua/decoder/hypergraph/OutputStringExtractor.java
index acb2e17..f67a9df 100644
--- a/src/joshua/decoder/hypergraph/OutputStringExtractor.java
+++ b/src/joshua/decoder/hypergraph/OutputStringExtractor.java
@@ -20,13 +20,13 @@ package joshua.decoder.hypergraph;
import static java.lang.Math.min;
import static joshua.corpus.Vocabulary.getWords;
-import static joshua.corpus.Vocabulary.nt;
import java.util.Stack;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
+import joshua.util.FormatUtils;
public class OutputStringExtractor implements WalkerFunction, DerivationVisitor {
@@ -66,7 +66,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
private static int getSourceNonTerminalPosition(final int[] words, int nonTerminalIndex) {
int nonTerminalsSeen = 0;
for (int i = 0; i < words.length; i++) {
- if (nt(words[i])) {
+ if (FormatUtils.isNonterminal(words[i])) {
nonTerminalsSeen++;
if (nonTerminalsSeen == nonTerminalIndex) {
return i;
@@ -89,7 +89,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
*/
private static int getTargetNonTerminalPosition(int[] words, int nonTerminalIndex) {
for (int pos = 0; pos < words.length; pos++) {
- if (nt(words[pos]) && -(words[pos] + 1) == nonTerminalIndex) {
+ if (FormatUtils.isNonterminal(words[pos]) && -(words[pos] + 1) == nonTerminalIndex) {
return pos;
}
}
@@ -174,7 +174,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
* of child and the arity of this.
*/
private void substituteNonTerminalAtPosition(final int[] words, final int position) {
- assert(nt(this.words[position]));
+ assert(FormatUtils.isNonterminal(this.words[position]));
final int[] result = new int[words.length + this.words.length - 1];
int resultIndex = 0;
for (int i = 0; i < position; i++) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
index 588850b..c77b51c 100644
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ b/src/joshua/decoder/segment_file/Sentence.java
@@ -19,7 +19,6 @@
package joshua.decoder.segment_file;
import static joshua.util.FormatUtils.addSentenceMarkers;
-import static joshua.util.FormatUtils.escapeSpecialSymbols;
import java.util.ArrayList;
import java.util.HashSet;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/metrics/MinimumChangeBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/MinimumChangeBLEU.java b/src/joshua/metrics/MinimumChangeBLEU.java
index fa764c3..91ee81c 100644
--- a/src/joshua/metrics/MinimumChangeBLEU.java
+++ b/src/joshua/metrics/MinimumChangeBLEU.java
@@ -19,7 +19,6 @@
package joshua.metrics;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
import java.util.logging.Logger;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/metrics/Precis.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/Precis.java b/src/joshua/metrics/Precis.java
index 82f4106..84279ac 100644
--- a/src/joshua/metrics/Precis.java
+++ b/src/joshua/metrics/Precis.java
@@ -19,7 +19,6 @@
package joshua.metrics;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
import java.util.logging.Logger;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/oracle/OracleExtractionHG.java
----------------------------------------------------------------------
diff --git a/src/joshua/oracle/OracleExtractionHG.java b/src/joshua/oracle/OracleExtractionHG.java
index 7e7fcb8..8a688e3 100644
--- a/src/joshua/oracle/OracleExtractionHG.java
+++ b/src/joshua/oracle/OracleExtractionHG.java
@@ -35,6 +35,7 @@ import joshua.decoder.hypergraph.HyperEdge;
import joshua.decoder.hypergraph.HyperGraph;
import joshua.decoder.hypergraph.KBestExtractor;
import joshua.util.FileUtility;
+import joshua.util.FormatUtils;
import joshua.util.io.LineReader;
/**
@@ -384,7 +385,7 @@ public class OracleExtractionHG extends SplitHg {
// #### get left_state_sequence, right_state_sequence, total_hyp_len, num_ngram_match
for (int c = 0; c < en_words.length; c++) {
int c_id = en_words[c];
- if (Vocabulary.nt(c_id)) {
+ if (FormatUtils.isNonterminal(c_id)) {
int index = -(c_id + 1);
DPStateOracle ant_state = (DPStateOracle) l_ant_virtual_item.get(index).dp_state;
total_hyp_len += ant_state.best_len;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/util/FormatUtils.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/FormatUtils.java b/src/joshua/util/FormatUtils.java
index 568d3ca..f6cfcd7 100644
--- a/src/joshua/util/FormatUtils.java
+++ b/src/joshua/util/FormatUtils.java
@@ -44,6 +44,14 @@ public class FormatUtils {
public static boolean isNonterminal(String token) {
return (token.length() >=3 && token.charAt(0) == '[') && (token.charAt(token.length() - 1) == ']');
}
+
+ /**
+ * Determines whether the ID represents a nonterminal. This is a trivial check, since nonterminal
+ * IDs are simply negative ones.
+ */
+ public static boolean isNonterminal(int id) {
+ return id < 0;
+ }
/**
* Nonterminals are stored in the vocabulary in square brackets. This removes them when you