You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/27 00:34:24 UTC

[18/32] incubator-joshua git commit: Replaced redundant Vocabulary.nt() to FormatUtils.isNonterminal(), for clarity and consolidation

Replaced redundant Vocabulary.nt() to FormatUtils.isNonterminal(), for clarity and consolidation


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/868b3409
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/868b3409
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/868b3409

Branch: refs/heads/JOSHUA-252
Commit: 868b340949f324b12d810d03c09c25fd5877d3dc
Parents: 366f408
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 05:50:16 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 05:50:16 2016 -0400

----------------------------------------------------------------------
 src/joshua/corpus/TerminalIterator.java         |  4 +++-
 src/joshua/corpus/Vocabulary.java               | 10 ---------
 src/joshua/decoder/BLEU.java                    |  1 -
 src/joshua/decoder/Decoder.java                 |  1 -
 src/joshua/decoder/ff/TargetBigram.java         |  3 ++-
 src/joshua/decoder/ff/lm/LanguageModelFF.java   |  5 +++--
 .../ff/lm/StateMinimizingLanguageModel.java     |  5 +++--
 .../GrammarBuilderWalkerFunction.java           | 22 ++++++++++----------
 .../hypergraph/OutputStringExtractor.java       |  8 +++----
 src/joshua/decoder/segment_file/Sentence.java   |  1 -
 src/joshua/metrics/MinimumChangeBLEU.java       |  1 -
 src/joshua/metrics/Precis.java                  |  1 -
 src/joshua/oracle/OracleExtractionHG.java       |  3 ++-
 src/joshua/util/FormatUtils.java                |  8 +++++++
 14 files changed, 36 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/corpus/TerminalIterator.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/TerminalIterator.java b/src/joshua/corpus/TerminalIterator.java
index 29544fb..12cb16c 100644
--- a/src/joshua/corpus/TerminalIterator.java
+++ b/src/joshua/corpus/TerminalIterator.java
@@ -21,6 +21,8 @@ package joshua.corpus;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
+import joshua.util.FormatUtils;
+
 /**
  * Iterator capable of iterating over those word identifiers in a phrase which represent terminals.
  * <p>
@@ -49,7 +51,7 @@ public class TerminalIterator implements Iterator<Integer> {
   /* See Javadoc for java.util.Iterator#next(). */
   public boolean hasNext() {
 
-    while (dirty || Vocabulary.nt(next)) {
+    while (dirty || FormatUtils.isNonterminal(next)) {
       nextIndex++;
       if (nextIndex < words.length) {
         next = words[nextIndex];

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index 6f72ad8..74f6a47 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -227,16 +227,6 @@ public class Vocabulary {
     return UNKNOWN_WORD;
   }
 
-  /**
-   * Returns true if the Vocabulary ID represents a nonterminal.
-   *
-   * @param id
-   * @return
-   */
-  public static boolean nt(int id) {
-    return (id < 0);
-  }
-
   public static int size() {
     long lock_stamp = lock.readLock();
     try {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/BLEU.java b/src/joshua/decoder/BLEU.java
index 1b3e3f8..2fd3287 100644
--- a/src/joshua/decoder/BLEU.java
+++ b/src/joshua/decoder/BLEU.java
@@ -20,7 +20,6 @@ package joshua.decoder;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index 0057f87..22ed8b9 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -28,7 +28,6 @@ import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.lang.reflect.Constructor;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/TargetBigram.java b/src/joshua/decoder/ff/TargetBigram.java
index 846273d..defaaf1 100644
--- a/src/joshua/decoder/ff/TargetBigram.java
+++ b/src/joshua/decoder/ff/TargetBigram.java
@@ -31,6 +31,7 @@ import joshua.decoder.ff.state_maintenance.NgramDPState;
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.HGNode;
 import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
 import joshua.util.io.LineReader;
 
 /***
@@ -112,7 +113,7 @@ public class TargetBigram extends StatefulFF {
     for (int c = 0; c < enWords.length; c++) {
       int curID = enWords[c];
 
-      if (Vocabulary.nt(curID)) {
+      if (FormatUtils.isNonterminal(curID)) {
         int index = -(curID + 1);
         NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);
         int[] leftContext = state.getLeftLMStateWords();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/LanguageModelFF.java b/src/joshua/decoder/ff/lm/LanguageModelFF.java
index a002de7..984ce97 100644
--- a/src/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -40,6 +40,7 @@ import joshua.decoder.ff.state_maintenance.NgramDPState;
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.HGNode;
 import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
 
 /**
  * This class performs the following:
@@ -318,7 +319,7 @@ public class LanguageModelFF extends StatefulFF {
      */
     for (int c = 0; c < enWords.length; c++) {
       int currentWord = enWords[c];
-      if (Vocabulary.nt(currentWord)) {
+      if (FormatUtils.isNonterminal(currentWord)) {
         estimate += scoreChunkLogP(words, considerIncompleteNgrams, skipStart);
         words.clear();
         skipStart = false;
@@ -376,7 +377,7 @@ public class LanguageModelFF extends StatefulFF {
     for (int c = 0; c < enWords.length; c++) {
       int curID = enWords[c];
 
-      if (Vocabulary.nt(curID)) {
+      if (FormatUtils.isNonterminal(curID)) {
         int index = -(curID + 1);
 
         NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index f07b668..1f7e818 100644
--- a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -33,6 +33,7 @@ import joshua.decoder.ff.state_maintenance.KenLMState;
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.HGNode;
 import joshua.decoder.segment_file.Sentence;
+import joshua.util.FormatUtils;
 
 /**
  * Wrapper for KenLM LMs with left-state minimization. We inherit from the regular
@@ -99,7 +100,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
     for (int x = 0; x < ruleWords.length; x++) {
       int id = ruleWords[x];
 
-      if (Vocabulary.nt(id)) {
+      if (FormatUtils.isNonterminal(id)) {
         // For the estimate, we can just mark negative values
         words[x] = -1;
 
@@ -131,7 +132,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
     for (int x = 0; x < ruleWords.length; x++) {
       int id = ruleWords[x];
 
-      if (Vocabulary.nt(id)) {
+      if (FormatUtils.isNonterminal(id)) {
         // Nonterminal: retrieve the KenLM long that records the state
         int index = -(id + 1);
         KenLMState state = (KenLMState) tailNodes.get(index).getDPState(stateIndex);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index a4df7e5..d4b11df 100644
--- a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -115,16 +115,16 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
     int[] result = new int[english.length];
     for (int i = 0; i < english.length; i++) {
       int curr = english[i];
-      if (!Vocabulary.nt(curr)) {
-				// If it's a terminal symbol, we just copy it into the new rule.
+      if (! FormatUtils.isNonterminal(curr)) {
+        // If it's a terminal symbol, we just copy it into the new rule.
         result[i] = curr;
       } else {
-				// If it's a nonterminal, its value is -N, where N is the index
-				// of the nonterminal on the source side.
-				//
-				// That is, if we would call a nonterminal "[X,2]", the value of
-				// curr at this point is -2. And the tail node that it points at
-				// is #1 (since getTailNodes() is 0-indexed).
+        // If it's a nonterminal, its value is -N, where N is the index
+        // of the nonterminal on the source side.
+        //
+        // That is, if we would call a nonterminal "[X,2]", the value of
+        // curr at this point is -2. And the tail node that it points at
+        // is #1 (since getTailNodes() is 0-indexed).
         int index = -curr - 1;
         result[i] = getLabelWithSpan(edge.getTailNodes().get(index));
       }
@@ -135,12 +135,12 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
 
   private static int[] getNewTargetFromSource(int[] source) {
     int[] result = new int[source.length];
-		int currNT = -1; // value to stick into NT slots
+    int currNT = -1; // value to stick into NT slots
     for (int i = 0; i < source.length; i++) {
       result[i] = source[i];
-      if (Vocabulary.nt(result[i])) {
+      if (FormatUtils.isNonterminal(result[i])) {
         result[i] = currNT;
-				currNT--;
+        currNT--;
       }
     }
     // System.err.printf("target: %s\n", result);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/hypergraph/OutputStringExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/OutputStringExtractor.java b/src/joshua/decoder/hypergraph/OutputStringExtractor.java
index acb2e17..f67a9df 100644
--- a/src/joshua/decoder/hypergraph/OutputStringExtractor.java
+++ b/src/joshua/decoder/hypergraph/OutputStringExtractor.java
@@ -20,13 +20,13 @@ package joshua.decoder.hypergraph;
 
 import static java.lang.Math.min;
 import static joshua.corpus.Vocabulary.getWords;
-import static joshua.corpus.Vocabulary.nt;
 
 import java.util.Stack;
 
 import joshua.decoder.ff.tm.Rule;
 import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
 import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
+import joshua.util.FormatUtils;
 
 public class OutputStringExtractor implements WalkerFunction, DerivationVisitor {
   
@@ -66,7 +66,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
   private static int getSourceNonTerminalPosition(final int[] words, int nonTerminalIndex) {
     int nonTerminalsSeen = 0;
     for (int i = 0; i < words.length; i++) {
-      if (nt(words[i])) {
+      if (FormatUtils.isNonterminal(words[i])) {
         nonTerminalsSeen++;
         if (nonTerminalsSeen == nonTerminalIndex) {
           return i;
@@ -89,7 +89,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
    */
   private static int getTargetNonTerminalPosition(int[] words, int nonTerminalIndex) {
     for (int pos = 0; pos < words.length; pos++) {
-      if (nt(words[pos]) && -(words[pos] + 1) == nonTerminalIndex) {
+      if (FormatUtils.isNonterminal(words[pos]) && -(words[pos] + 1) == nonTerminalIndex) {
         return pos;
       }
     }
@@ -174,7 +174,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
      * of child and the arity of this.
      */
     private void substituteNonTerminalAtPosition(final int[] words, final int position) {
-      assert(nt(this.words[position]));
+      assert(FormatUtils.isNonterminal(this.words[position]));
       final int[] result = new int[words.length + this.words.length - 1];
       int resultIndex = 0;
       for (int i = 0; i < position; i++) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
index 588850b..c77b51c 100644
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ b/src/joshua/decoder/segment_file/Sentence.java
@@ -19,7 +19,6 @@
 package joshua.decoder.segment_file;
 
 import static joshua.util.FormatUtils.addSentenceMarkers;
-import static joshua.util.FormatUtils.escapeSpecialSymbols;
 
 import java.util.ArrayList;
 import java.util.HashSet;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/metrics/MinimumChangeBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/MinimumChangeBLEU.java b/src/joshua/metrics/MinimumChangeBLEU.java
index fa764c3..91ee81c 100644
--- a/src/joshua/metrics/MinimumChangeBLEU.java
+++ b/src/joshua/metrics/MinimumChangeBLEU.java
@@ -19,7 +19,6 @@
 package joshua.metrics;
 
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.Map;
 import java.util.logging.Logger;
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/metrics/Precis.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/Precis.java b/src/joshua/metrics/Precis.java
index 82f4106..84279ac 100644
--- a/src/joshua/metrics/Precis.java
+++ b/src/joshua/metrics/Precis.java
@@ -19,7 +19,6 @@
 package joshua.metrics;
 
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.Map;
 import java.util.logging.Logger;
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/oracle/OracleExtractionHG.java
----------------------------------------------------------------------
diff --git a/src/joshua/oracle/OracleExtractionHG.java b/src/joshua/oracle/OracleExtractionHG.java
index 7e7fcb8..8a688e3 100644
--- a/src/joshua/oracle/OracleExtractionHG.java
+++ b/src/joshua/oracle/OracleExtractionHG.java
@@ -35,6 +35,7 @@ import joshua.decoder.hypergraph.HyperEdge;
 import joshua.decoder.hypergraph.HyperGraph;
 import joshua.decoder.hypergraph.KBestExtractor;
 import joshua.util.FileUtility;
+import joshua.util.FormatUtils;
 import joshua.util.io.LineReader;
 
 /**
@@ -384,7 +385,7 @@ public class OracleExtractionHG extends SplitHg {
     // #### get left_state_sequence, right_state_sequence, total_hyp_len, num_ngram_match
     for (int c = 0; c < en_words.length; c++) {
       int c_id = en_words[c];
-      if (Vocabulary.nt(c_id)) {
+      if (FormatUtils.isNonterminal(c_id)) {
         int index = -(c_id + 1);
         DPStateOracle ant_state = (DPStateOracle) l_ant_virtual_item.get(index).dp_state;
         total_hyp_len += ant_state.best_len;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/868b3409/src/joshua/util/FormatUtils.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/FormatUtils.java b/src/joshua/util/FormatUtils.java
index 568d3ca..f6cfcd7 100644
--- a/src/joshua/util/FormatUtils.java
+++ b/src/joshua/util/FormatUtils.java
@@ -44,6 +44,14 @@ public class FormatUtils {
   public static boolean isNonterminal(String token) {
     return (token.length() >=3 && token.charAt(0) == '[') && (token.charAt(token.length() - 1) == ']');
   }
+  
+  /**
+   * Determines whether the ID represents a nonterminal. This is a trivial check, since nonterminal
+   * IDs are simply negative ones.
+   */
+  public static boolean isNonterminal(int id) {
+    return id < 0;
+  }
 
   /**
    * Nonterminals are stored in the vocabulary in square brackets. This removes them when you